Add summary_with_score.

pull/120/head
maomao905 5 years ago
parent b21235d0a0
commit fdb2ebe3d7

@ -33,11 +33,18 @@ Usage
'Example Domain'
>>> doc.summary()
u'<html><body><div><body id="readabilityBody">\n<div>\n <h1>Example Domain</h1>\n
<p>This domain is established to be used for illustrative examples in documents. You may
use this\n domain in examples without prior coordination or asking for permission.</p>
\n <p><a href="http://www.iana.org/domains/example">More information...</a></p>\n</div>
\n</body>\n</div></body></html>'
>>> doc.summary_with_score()
(u'<html><body><div><body id="readabilityBody">\n<div>\n <h1>Example Domain</h1>\n
<p>This domain is established to be used for illustrative examples in documents. You may
use this\n domain in examples without prior coordination or asking for permission.</p>
\n <p><a href="http://www.iana.org/domains/example">More information...</a></p>\n</div>
\n</body>\n</div></body></html>', 31.43)
\n</body>\n</div></body></html>', 24.293830845771144)
Change Log
----------

@ -180,6 +180,19 @@ class Document:
:param html_partial: return only the div of the document, don't wrap
in html and body tags.
Warning: It mutates internal DOM representation of the HTML document,
so it is better to call other API methods before this one.
"""
article, _ = self.summary_with_score(html_partial)
return article
def summary_with_score(self, html_partial=False):
"""
Given a HTML file, extracts the text of the article with score.
:param html_partial: return only the div of the document, don't wrap
in html and body tags.
Warning: It mutates internal DOM representation of the HTML document,
so it is better to call other API methods before this one.
"""

Loading…
Cancel
Save