From 15f3692e68f8bd66affc898f8ce31aca86c3e886 Mon Sep 17 00:00:00 2001
From: Raphael Cohen <raphael.cohen@sekoia.fr>
Date: Tue, 4 Feb 2020 16:15:50 +0100
Subject: [PATCH] fix: Decodes bytes if needed in get_body

---
 readability/htmls.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/readability/htmls.py b/readability/htmls.py
index 17a75c7..b2eb3ce 100644
--- a/readability/htmls.py
+++ b/readability/htmls.py
@@ -134,7 +134,9 @@ def get_body(doc):
         elem.drop_tree()
     # tostring() always return utf-8 encoded string
     # FIXME: isn't better to use tounicode?
-    raw_html = str_(tostring(doc.body or doc))
+    raw_html = tostring(doc.body or doc)
+    if isinstance(raw_html, bytes):
+        raw_html = raw_html.decode()
     cleaned = clean_attributes(raw_html)
     try:
         # BeautifulSoup(cleaned) #FIXME do we really need to try loading it?