From 87ad057706a5c269ec36498575d00ef9b035252f Mon Sep 17 00:00:00 2001 From: gfxmonk Date: Mon, 26 Apr 2010 23:22:54 +1000 Subject: [PATCH] unicode, dammit! --- readability/readability.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/readability/readability.py b/readability/readability.py index 5b6ad9c..24a13b8 100755 --- a/readability/readability.py +++ b/readability/readability.py @@ -1,6 +1,6 @@ #!/usr/bin/env python from BeautifulSoup import NavigableString -from page_parser import parse, get_title, get_body +from page_parser import parse, get_title, get_body, Unparseable import logging import re @@ -29,7 +29,7 @@ class Document: RETRY_LENGTH = 250 def __init__(self, input, **options): - self.input = input + self.input = inpuunicodear self.options = defaultdict(lambda: None) for k, v in options.items(): self.options[k] = v @@ -206,7 +206,7 @@ class Document: for elem in self.html.findAll(): if elem.name.lower() == "div": # transform
s that do not contain other block elements into

s - if REGEXES['divToPElementsRe'].search(''.join(map(str, elem.contents))): + if REGEXES['divToPElementsRe'].search(''.join(map(unicode, elem.contents))): self.debug("Altering div(#%s.%s) to p" % (elem.get('id', ''), elem.get('class', ''))) elem.name = "p"