fix: Decodes bytes if needed in get_body

4 years ago · 15f3692e68
parent 5800210e99
commit 15f3692e68
1 changed files with 3 additions and 1 deletions
--- a/readability/htmls.py
+++ b/readability/htmls.py
@ -134,7 +134,9 @@ def get_body(doc):
        elem.drop_tree()
    # tostring() always return utf-8 encoded string
    # FIXME: isn't better to use tounicode?
-    raw_html = str_(tostring(doc.body or doc))
+    raw_html = tostring(doc.body or doc)
+    if isinstance(raw_html, bytes):
+        raw_html = raw_html.decode()
    cleaned = clean_attributes(raw_html)
    try:
        # BeautifulSoup(cleaned) #FIXME do we really need to try loading it?