Return a div fragment instead of a whole HTML page

13 years ago · 7aac0f0855
parent ac517834e6
commit 7aac0f0855
1 changed files with 6 additions and 6 deletions
--- a/readability/readability.py
+++ b/readability/readability.py
@ -140,8 +140,8 @@ class Document:
        # Things like preambles, content split by ads that we removed, etc.

        sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
-        body = B.BODY()
-        html = B.HTML(body)
+        article = B.DIV()
+        article.attrib['id'] = 'article'
        best_elem = best_candidate['elem']
        for sibling in best_elem.getparent().getchildren():
            #if isinstance(sibling, NavigableString): continue#in lxml there no concept of simple text 
@ -163,11 +163,11 @@ class Document:
                    append = True

            if append:
-                body.append(sibling)
+                article.append(sibling)

-        #if body is not None: 
-        #    body.append(best_elem)
-        return html
+        #if article is not None: 
+        #    article.append(best_elem)
+        return article

    def select_best_candidate(self, candidates):
        sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)