0.3.0.dev
Richard Harding 12 years ago
parent aa51283dff
commit 35792e7a59

@ -159,7 +159,6 @@ class Document:
self.remove_unlikely_candidates()
self.transform_misused_divs_into_paragraphs()
candidates = self.score_paragraphs()
best_candidate = self.select_best_candidate(candidates)
if best_candidate:
@ -249,6 +248,7 @@ class Document:
sorted_candidates = sorted(candidates.values(),
key=lambda x: x['content_score'],
reverse=True)
for candidate in sorted_candidates[:5]:
elem = candidate['elem']
self.debug("Top 5 : %6.3f %s" % (

@ -36,4 +36,3 @@ class TestArticleOnly(unittest.TestCase):
doc = Document(sample, url='http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html')
res = doc.summary(enclose_with_html_tag=True)
self.assertEqual('<div><div class="', res[0:17])

Loading…
Cancel
Save