From 960f885edfaa4c54f371cab8eb4d51808c570875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Wei=C3=9F?= Date: Sat, 24 Mar 2012 09:56:08 +0100 Subject: [PATCH] Continue early in remove_unlikely_candidates() in case there is neither a class nor an id attribute. --- readability/readability.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/readability/readability.py b/readability/readability.py index 11c0e24..d9431e8 100755 --- a/readability/readability.py +++ b/readability/readability.py @@ -280,6 +280,8 @@ class Document: def remove_unlikely_candidates(self): for elem in self.html.iter(): s = "%s %s" % (elem.get('class', ''), elem.get('id', '')) + if len(s) < 2: + continue #self.debug(s) if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body': self.debug("Removing unlikely candidate - %s" % describe(elem))