From 3ac56329e2a9918497b52bfc9c8c3dd3f6f060ad Mon Sep 17 00:00:00 2001 From: Martin Thurau Date: Wed, 29 Apr 2015 19:33:43 +0200 Subject: [PATCH] Corrects some things were 2to3 did to much. --- readability/htmls.py | 2 +- readability/readability.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/readability/htmls.py b/readability/htmls.py index 526fbce..292b4bb 100644 --- a/readability/htmls.py +++ b/readability/htmls.py @@ -36,7 +36,7 @@ def normalize_entities(cur_title): u'\u00BB': '"', u'"': '"', } - for c, r in list(entities.items()): + for c, r in entities.items(): if c in cur_title: cur_title = cur_title.replace(c, r) diff --git a/readability/readability.py b/readability/readability.py index c6391d7..18ae4b2 100755 --- a/readability/readability.py +++ b/readability/readability.py @@ -89,7 +89,7 @@ def compile_pattern(elements): else: # assume string or string like object elements = elements.split(',') - return re.compile('|'.join([re.escape(x.lower()) for x in elements]), re.U) + return re.compile(u'|'.join([re.escape(x.lower()) for x in elements]), re.U) class Document: """Class to build a etree document out of html.""" @@ -207,7 +207,7 @@ class Document: # code never runs) which would require write this line as: # write this line as # Unparseable(str(e)) - # but then we loose the traceback information. 3.4 on the + # but then we lose the traceback information. 3.4 on the # other hand accepts the old syntax and would only complain # at runtime. raise Unparseable(str(e)), None, sys.exc_info()[2] @@ -262,7 +262,7 @@ class Document: return output def select_best_candidate(self, candidates): - sorted_candidates = sorted(list(candidates.values()), key=lambda x: x['content_score'], reverse=True) + sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True) for candidate in sorted_candidates[:5]: elem = candidate['elem'] self.debug("Top 5 : %6.3f %s" % (