diff --git a/src/readability_lxml/htmls.py b/src/readability_lxml/htmls.py index a401625..a2d1a8a 100644 --- a/src/readability_lxml/htmls.py +++ b/src/readability_lxml/htmls.py @@ -59,10 +59,11 @@ def norm_title(title): def get_title(doc): - title = doc.find('.//title').text - if not title: - return '[no-title]' + title_node = doc.find('.//title') + if not title_node: + return '[no-title]' + title = title_node.text return norm_title(title) @@ -74,10 +75,11 @@ def add_match(collection, text, orig): def shorten_title(doc): - title = doc.find('.//title').text - if not title: + title_node = doc.find('.//title') + if not title_node: return '' + title = title_node.text title = orig = norm_title(title) candidates = set() diff --git a/src/tests/test_article_only.py b/src/tests/test_article_only.py index d6808e7..ce286ef 100644 --- a/src/tests/test_article_only.py +++ b/src/tests/test_article_only.py @@ -28,14 +28,14 @@ class TestArticleOnly(unittest.TestCase): sample, url='http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html') res = doc.summary() - self.assertEqual('