Update readability.py

pull/87/head
Yuri Baburov 8 years ago committed by GitHub
parent b20d5c15ef
commit e4efc87a20

@ -97,13 +97,13 @@ class Document:
negative_keywords=["mysidebar", "related", "ads"] negative_keywords=["mysidebar", "related", "ads"]
The Document class is not re-enterable. The Document class is not re-enterable.
You need to create a new Document() for each HTML file to process. It is designed to create a new Document() for each HTML file to process it.
Provides four API methods: API methods:
.get_title() .title() -- full title
.short_title() .short_title() -- cleaned up title
.get_content() .content() -- full content
.summary() .summary() -- cleaned up content
""" """
self.input = input self.input = input
self.html = None self.html = None
@ -143,7 +143,7 @@ class Document:
return doc return doc
def content(self): def content(self):
"""Returns full document body""" """Returns document body"""
return get_body(self._html(True)) return get_body(self._html(True))
def title(self): def title(self):
@ -168,8 +168,8 @@ class Document:
:param html_partial: return only the div of the document, don't wrap :param html_partial: return only the div of the document, don't wrap
in html and body tags. in html and body tags.
Warning: It mangles internal DOM representation of the HTML document, Warning: It mutates internal DOM representation of the HTML document,
so always use other API methods before this one. so it is better to call other API methods before this one.
""" """
try: try:
ruthless = True ruthless = True
@ -395,7 +395,6 @@ class Document:
} }
def remove_unlikely_candidates(self): def remove_unlikely_candidates(self):
"""Utility method"""
for elem in self.html.iter(): for elem in self.html.iter():
s = "%s %s" % (elem.get('class', ''), elem.get('id', '')) s = "%s %s" % (elem.get('class', ''), elem.get('id', ''))
if len(s) < 2: if len(s) < 2:
@ -405,7 +404,6 @@ class Document:
elem.drop_tree() elem.drop_tree()
def transform_misused_divs_into_paragraphs(self): def transform_misused_divs_into_paragraphs(self):
"""Utility method"""
for elem in self.tags(self.html, 'div'): for elem in self.tags(self.html, 'div'):
# transform <div>s that do not contain other block elements into # transform <div>s that do not contain other block elements into
# <p>s # <p>s

Loading…
Cancel
Save