From d04d41b7494b9a9a8797dccda3cf9fc20f2db6bd Mon Sep 17 00:00:00 2001 From: Miguel Galves Date: Fri, 7 Feb 2014 18:27:15 -0200 Subject: [PATCH] Insert text inside iframe for correct output --- readability/readability.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/readability/readability.py b/readability/readability.py index e1de04a..fcbab77 100755 --- a/readability/readability.py +++ b/readability/readability.py @@ -183,6 +183,7 @@ class Document: if article is None: article = self.html cleaned_article = self.sanitize(article, candidates) + article_length = len(cleaned_article or '') retry_length = self.options.get( 'retry_length', @@ -432,7 +433,9 @@ class Document: elem.drop_tree() for elem in self.tags(node, "iframe"): - if not ("src" in elem.attrib and REGEXES["videoRe"].search(elem.attrib["src"])): + if "src" in elem.attrib and REGEXES["videoRe"].search(elem.attrib["src"]): + elem.text = "VIDEO" # ADD content to iframe text node to force proper output + else: elem.drop_tree() allowed = {}