Merge pull request #65 from avalanchy/best_elem_is_root

Failure if best_elem is root (fix #58)
Thanks a lot @avalanchy and @jnothman !
pull/63/merge
Yuri Baburov 9 years ago
commit 1cb17d919b

@ -212,7 +212,9 @@ class Document:
else:
output = document_fromstring('<div/>')
best_elem = best_candidate['elem']
for sibling in best_elem.getparent().getchildren():
parent = best_elem.getparent()
siblings = parent.getchildren() if parent else [best_elem]
for sibling in siblings:
# in lxml there no concept of simple text
# if isinstance(sibling, NavigableString): continue
append = False

@ -50,3 +50,14 @@ class TestArticleOnly(unittest.TestCase):
doc = Document(sample)
res = doc.summary(html_partial=True)
self.assertEqual('<div><div class="content__article-body ', res[0:39])
def test_best_elem_is_root_and_passing(self):
sample = (
'<html class="article" id="body">'
' <body>'
' <p>1234567890123456789012345</p>'
' </body>'
'</html>'
)
doc = Document(sample)
doc.summary()

Loading…
Cancel
Save