encoding regex fix

pull/38/head
Francis Tseng 11 years ago
parent 0104d4ba87
commit d58d563299

@ -2,7 +2,7 @@ import re
import chardet
def get_encoding(page):
text = re.sub('</?[^>]*>\s*', ' ', page)
text = re.sub(b'</?[^>]*>\s*', b' ', page)
enc = 'utf-8'
if not text.strip() or len(text) < 10:
return enc # can't guess

Loading…
Cancel
Save