python 3 update

11 years ago · 0e33b26432
parent 4e3192f5ab
commit 0e33b26432
1 changed files with 14 additions and 14 deletions
--- a/readability/readability.py
+++ b/readability/readability.py
@ -9,12 +9,12 @@ from lxml.etree import tounicode
 from lxml.html import document_fromstring
 from lxml.html import fragment_fromstring

-from cleaners import clean_attributes
-from cleaners import html_cleaner
-from htmls import build_doc
-from htmls import get_body
-from htmls import get_title
-from htmls import shorten_title
+from .cleaners import clean_attributes
+from .cleaners import html_cleaner
+from .htmls import build_doc
+from .htmls import get_body
+from .htmls import get_title
+from .htmls import shorten_title


 logging.basicConfig(level=logging.INFO)
@ -179,9 +179,9 @@ class Document:
                    continue
                else:
                    return cleaned_article
-        except StandardError, e:
+        except Exception as e:
            log.exception('error getting summary: ')
-            raise Unparseable(str(e)), None, sys.exc_info()[2]
+            raise Unparseable(str(e)).with_traceback(sys.exc_info()[2])

    def get_article(self, candidates, best_candidate, html_partial=False):
        # Now that we have the top candidate, look through its siblings for
@ -231,7 +231,7 @@ class Document:
        return output

    def select_best_candidate(self, candidates):
-        sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)
+        sorted_candidates = sorted(list(candidates.values()), key=lambda x: x['content_score'], reverse=True)
        for candidate in sorted_candidates[:5]:
            elem = candidate['elem']
            self.debug("Top 5 : %6.3f %s" % (
@ -366,7 +366,7 @@ class Document:
            # This results in incorrect results in case there is an <img>
            # buried within an <a> for example
            if not REGEXES['divToPElementsRe'].search(
-                    unicode(''.join(map(tostring, list(elem))))):
+                    str(''.join(map(tostring, list(elem))))):
                #self.debug("Altering %s to p" % (describe(elem)))
                elem.tag = "p"
                #print "Fixed element "+describe(elem)
@ -577,15 +577,15 @@ def main():

    file = None
    if options.url:
-        import urllib
-        file = urllib.urlopen(options.url)
+        import urllib.request, urllib.parse, urllib.error
+        file = urllib.request.urlopen(options.url)
    else:
        file = open(args[0], 'rt')
    enc = sys.__stdout__.encoding or 'utf-8'
    try:
-        print Document(file.read(),
+        print(Document(file.read(),
            debug=options.verbose,
-            url=options.url).summary().encode(enc, 'replace')
+            url=options.url).summary().encode(enc, 'replace'))
    finally:
        file.close()