Fix the flipped nature of the <html> wrapping setting

12 years ago · 3347f16d93
parent 93ac1111a1
commit 3347f16d93
2 changed files with 7 additions and 7 deletions
--- a/src/readability_lxml/readability.py
+++ b/src/readability_lxml/readability.py
@ -139,7 +139,7 @@ class Document:
    def short_title(self):
        return shorten_title(self.html)

-    def summary(self, enclose_with_html_tag=False):
+    def summary(self, enclose_with_html_tag=True):
        """Generate the summary of the html docuemnt

        :param enclose_with_html_tag: return only the div of the document,
@ -197,7 +197,7 @@ class Document:
            raise Unparseable(str(e)), None, sys.exc_info()[2]

    def get_article(self, candidates, best_candidate,
-        enclose_with_html_tag=False):
+        enclose_with_html_tag=True):
        # Now that we have the top candidate, look through its siblings for
        # content that might also be related.
        # Things like preambles, content split by ads that we removed, etc.
@ -206,9 +206,9 @@ class Document:
            best_candidate['content_score'] * 0.2])
        # create a new html document with a html->body->div
        if enclose_with_html_tag:
-            output = fragment_fromstring('<div/>')
-        else:
            output = document_fromstring('<div/>')
+        else:
+            output = fragment_fromstring('<div/>')
        best_elem = best_candidate['elem']
        for sibling in best_elem.getparent().getchildren():
            # in lxml there no concept of simple text
@ -238,9 +238,9 @@ class Document:
                # We don't want to append directly to output, but the div
                # in html->body->div
                if enclose_with_html_tag:
-                    output.append(sibling)
-                else:
                    output.getchildren()[0].getchildren()[0].append(sibling)
+                else:
+                    output.append(sibling)
        #if output is not None:
        #    output.append(best_elem)
        return output
--- a/src/tests/test_article_only.py
+++ b/src/tests/test_article_only.py
@ -34,5 +34,5 @@ class TestArticleOnly(unittest.TestCase):
        """Using the si sample, make sure we can get the article alone."""
        sample = load_sample('si-game.sample.html')
        doc = Document(sample, url='http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html')
-        res = doc.summary(enclose_with_html_tag=True)
+        res = doc.summary(enclose_with_html_tag=False)
        self.assertEqual('<div><div class="', res[0:17])