Convert tabs to spaces; put article in body

13 years ago · ac517834e6
parent 01247903b8
commit ac517834e6
1 changed files with 475 additions and 472 deletions
--- a/readability/readability.py
+++ b/readability/readability.py
@ -4,6 +4,7 @@ from collections import defaultdict
 from htmls import build_doc, get_body, get_title, shorten_title
 from lxml.etree import tostring, tounicode
 from lxml.html import fragment_fromstring, document_fromstring
+from lxml.html import builder as B
 import logging
 import re
 import sys
@ -139,7 +140,8 @@ class Document:
        # Things like preambles, content split by ads that we removed, etc.

        sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
-		output = document_fromstring('<div/>')
+        body = B.BODY()
+        html = B.HTML(body)
        best_elem = best_candidate['elem']
        for sibling in best_elem.getparent().getchildren():
            #if isinstance(sibling, NavigableString): continue#in lxml there no concept of simple text 
@ -161,10 +163,11 @@ class Document:
                    append = True

            if append:
-				output.append(sibling)
-		#if output is not None: 
-		#	output.append(best_elem)
-		return output
+                body.append(sibling)
+
+        #if body is not None: 
+        #    body.append(best_elem)
+        return html

    def select_best_candidate(self, candidates):
        sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)