encoding regex fix

11 years ago · d58d563299
parent 0104d4ba87
commit d58d563299
1 changed files with 1 additions and 1 deletions
--- a/readability/encoding.py
+++ b/readability/encoding.py
@ -2,7 +2,7 @@ import re
 import chardet

 def get_encoding(page):
-    text = re.sub('</?[^>]*>\s*', ' ', page)
+    text = re.sub(b'</?[^>]*>\s*', b' ', page)
    enc = 'utf-8'
    if not text.strip() or len(text) < 10:
        return enc # can't guess