diff --git a/README b/README index 6390957..36bb28c 100644 --- a/README +++ b/README @@ -18,9 +18,9 @@ Based on: Installation:: - easy_install readability-xml + easy_install readability-lxml or - pip install readability-xml + pip install readability-lxml Usage:: diff --git a/readability/debug.py b/readability/debug.py index df67500..a5e644d 100644 --- a/readability/debug.py +++ b/readability/debug.py @@ -23,4 +23,3 @@ def describe(node, depth=2): if depth and node.getparent() is not None: return name+' - '+describe(node.getparent(), depth-1) return name - diff --git a/readability/encoding.py b/readability/encoding.py index b83dc28..d05b7f4 100644 --- a/readability/encoding.py +++ b/readability/encoding.py @@ -19,4 +19,3 @@ def get_encoding(page): if enc == 'MacCyrillic': enc = 'cp1251' return enc - diff --git a/readability/readability.py b/readability/readability.py index 4ef86cb..e4991f3 100644 --- a/readability/readability.py +++ b/readability/readability.py @@ -121,8 +121,8 @@ class Document: else: logging.debug("Ruthless and lenient parsing did not work. Returning raw html") article = self.html.find('body') - if article is None: - article = self.html + if article is None: + article = self.html cleaned_article = self.sanitize(article, candidates) of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH) @@ -497,8 +497,8 @@ def main(): import urllib file = urllib.urlopen(options.url) else: - file = open(args[0]) - enc = sys.stdout.encoding or 'utf-8' + file = open(args[0], 'rt') + enc = sys.__stdout__.encoding or 'utf-8' try: print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace') finally: diff --git a/setup.py b/setup.py index 9520437..cd86aa0 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup( name="readability-lxml", - version="0.2.2", + version="0.2.3", author="Yuri Baburov", author_email="burchik@gmail.com", description="fast python port of arc90's readability tool",