Corrects some things were 2to3 did to much.

pull/64/head
Martin Thurau 9 years ago
parent aa4132f57a
commit 3ac56329e2

@ -36,7 +36,7 @@ def normalize_entities(cur_title):
u'\u00BB': '"', u'\u00BB': '"',
u'"': '"', u'"': '"',
} }
for c, r in list(entities.items()): for c, r in entities.items():
if c in cur_title: if c in cur_title:
cur_title = cur_title.replace(c, r) cur_title = cur_title.replace(c, r)

@ -89,7 +89,7 @@ def compile_pattern(elements):
else: else:
# assume string or string like object # assume string or string like object
elements = elements.split(',') elements = elements.split(',')
return re.compile('|'.join([re.escape(x.lower()) for x in elements]), re.U) return re.compile(u'|'.join([re.escape(x.lower()) for x in elements]), re.U)
class Document: class Document:
"""Class to build a etree document out of html.""" """Class to build a etree document out of html."""
@ -207,7 +207,7 @@ class Document:
# code never runs) which would require write this line as: # code never runs) which would require write this line as:
# write this line as # write this line as
# Unparseable(str(e)) # Unparseable(str(e))
# but then we loose the traceback information. 3.4 on the # but then we lose the traceback information. 3.4 on the
# other hand accepts the old syntax and would only complain # other hand accepts the old syntax and would only complain
# at runtime. # at runtime.
raise Unparseable(str(e)), None, sys.exc_info()[2] raise Unparseable(str(e)), None, sys.exc_info()[2]
@ -262,7 +262,7 @@ class Document:
return output return output
def select_best_candidate(self, candidates): def select_best_candidate(self, candidates):
sorted_candidates = sorted(list(candidates.values()), key=lambda x: x['content_score'], reverse=True) sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)
for candidate in sorted_candidates[:5]: for candidate in sorted_candidates[:5]:
elem = candidate['elem'] elem = candidate['elem']
self.debug("Top 5 : %6.3f %s" % ( self.debug("Top 5 : %6.3f %s" % (

Loading…
Cancel
Save