Fix issue with trying to drop root node

remove_unlikely_candidates would try to drop_tree the root node if it deemed it
an unlikely candidate.  This prevents that from happening.
0.3.0.dev
Jerry Charumilind 13 years ago
parent 18fa6b5146
commit cba19f209b

@ -295,7 +295,11 @@ class Document:
for elem in self.html.iter():
s = "%s %s" % (elem.get('class', ''), elem.get('id', ''))
#self.debug(s)
if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body':
if (REGEXES['unlikelyCandidatesRe'].search(s) and
(not REGEXES['okMaybeItsACandidateRe'].search(s)) and
elem.tag != 'body' and
elem.getparent() is not None
):
self.debug("Removing unlikely candidate - %s" % describe(elem))
elem.drop_tree()

Loading…
Cancel
Save