diff --git a/readability/compat/__init__.py b/readability/compat/__init__.py
index 900f819..c648633 100644
--- a/readability/compat/__init__.py
+++ b/readability/compat/__init__.py
@@ -5,11 +5,16 @@ It mainly exists because their are certain incompatibilities in the Python
syntax that can only be solved by conditionally importing different functions.
"""
import sys
+from lxml.etree import tostring
if sys.version_info[0] == 2:
bytes_ = str
str_ = unicode
+ def tostring_(s):
+ return tostring(s, encoding='utf-8').decode('utf-8')
elif sys.version_info[0] == 3:
bytes_ = bytes
str_ = str
+ def tostring_(s):
+ return tostring(s, encoding='utf-8')
diff --git a/readability/readability.py b/readability/readability.py
index eca389d..6676d57 100755
--- a/readability/readability.py
+++ b/readability/readability.py
@@ -4,7 +4,6 @@ import logging
import re
import sys
-from lxml.etree import tostring
from lxml.etree import tounicode
from lxml.html import document_fromstring
from lxml.html import fragment_fromstring
@@ -15,7 +14,7 @@ from .htmls import build_doc
from .htmls import get_body
from .htmls import get_title
from .htmls import shorten_title
-from .compat import str_, bytes_
+from .compat import str_, bytes_, tostring_
from .debug import describe, text_content
@@ -464,7 +463,7 @@ class Document:
# This results in incorrect results in case there is an
# buried within an for example
if not REGEXES["divToPElementsRe"].search(
- str_(b"".join(map(lambda it: tostring(it, encoding="utf-8"), list(elem))))
+ str_(b"".join(map(tostring_, list(elem))))
):
# log.debug("Altering %s to p" % (describe(elem)))
elem.tag = "p"