Return a div fragment instead of a whole HTML page

0.3.0.dev
Jerry Charumilind 13 years ago
parent ac517834e6
commit 7aac0f0855

@ -140,8 +140,8 @@ class Document:
# Things like preambles, content split by ads that we removed, etc.
sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
body = B.BODY()
html = B.HTML(body)
article = B.DIV()
article.attrib['id'] = 'article'
best_elem = best_candidate['elem']
for sibling in best_elem.getparent().getchildren():
#if isinstance(sibling, NavigableString): continue#in lxml there no concept of simple text
@ -163,11 +163,11 @@ class Document:
append = True
if append:
body.append(sibling)
article.append(sibling)
#if body is not None:
# body.append(best_elem)
return html
#if article is not None:
# article.append(best_elem)
return article
def select_best_candidate(self, candidates):
sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)

Loading…
Cancel
Save