readability.htmls: some docs do not have title elem

pull/20/head
Andrey Popp 12 years ago
parent 274b60cdb1
commit 95852d5c18

@ -43,11 +43,11 @@ def norm_title(title):
return normalize_entities(normalize_spaces(title))
def get_title(doc):
title = doc.find('.//title').text
if not title:
title = doc.find('.//title')
if not title or not title.text:
return '[no-title]'
return norm_title(title)
return norm_title(title.text)
def add_match(collection, text, orig):
text = norm_title(text)
@ -56,11 +56,11 @@ def add_match(collection, text, orig):
collection.add(text)
def shorten_title(doc):
title = doc.find('.//title').text
if not title:
title = doc.find('.//title')
if not title or not title.text:
return ''
title = orig = norm_title(title)
title = orig = norm_title(title.text)
candidates = set()

Loading…
Cancel
Save