From 8ff11e68a66f5e65ee147db9dc88c49fa2c878d5 Mon Sep 17 00:00:00 2001 From: Yuri Baburov Date: Mon, 27 Jul 2015 11:59:17 +0600 Subject: [PATCH] Debugging improvements. Bump to 0.6.0.5 --- readability/debug.py | 10 +++++----- readability/readability.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/readability/debug.py b/readability/debug.py index dc149e5..f14f682 100644 --- a/readability/debug.py +++ b/readability/debug.py @@ -15,8 +15,8 @@ def describe_node(node): name = node.tag if node.get('id', ''): name += '#' + node.get('id') - if node.get('class', ''): - name += '.' + node.get('class').replace(' ', '.') + if node.get('class', '').strip(): + name += '.' + '.'.join(node.get('class').split()) if name[:4] in ['div#', 'div.']: name = name[3:] if name in ['tr', 'td', 'div', 'p']: @@ -27,7 +27,7 @@ def describe_node(node): return name -def describe(node, depth=2): +def describe(node, depth=1): global uids, uids_document doc = node.getroottree().getroot() if doc != uids_document: @@ -37,8 +37,8 @@ def describe(node, depth=2): #return repr(NodeRepr(node)) parent = '' if depth and node.getparent() is not None: - parent = describe(node.getparent(), depth=depth - 1) - return parent + '/' + describe_node(node) + parent = describe(node.getparent(), depth=depth - 1) + '>' + return parent + describe_node(node) RE_COLLAPSE_WHITESPACES = re.compile('\s+', re.U) diff --git a/readability/readability.py b/readability/readability.py index 9b3c9c9..e359777 100755 --- a/readability/readability.py +++ b/readability/readability.py @@ -33,7 +33,7 @@ REGEXES = { #'trimRe': re.compile('^\s+|\s+$/'), #'normalizeRe': re.compile('\s{2,}/'), #'killBreaksRe': re.compile('((\s| ?)*){1,}/'), - 'videoRe': re.compile('http:\/\/(www\.)?(youtube|vimeo)\.com', re.I), + 'videoRe': re.compile('https?:\/\/(www\.)?(youtube|vimeo)\.com', re.I), #skipFootnoteLink: /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i, }