Update html to be a property with a getter

0.3.0.dev
Richard Harding 12 years ago
parent b0063ffb3c
commit a4b6957be2

@ -100,7 +100,15 @@ class Document:
self.input_doc = input_doc
self.options = options
self.html = None
self._html = None
@property
def html(self):
"""The parsed html document from the input"""
if not self._html:
self._html = self._parse(self.input_doc)
return self._html
def _parse(self, input_doc):
doc = build_doc(input_doc)
@ -113,13 +121,13 @@ class Document:
return doc
def content(self):
return get_body(self._html(True))
return get_body(self.html)
def title(self):
return get_title(self._html(True))
return get_title(self.html)
def short_title(self):
return shorten_title(self._html(True))
return shorten_title(self.html)
def summary(self, enclose_with_html_tag=False):
"""Generate the summary of the html docuemnt
@ -255,7 +263,7 @@ class Document:
self.TEXT_LENGTH_THRESHOLD)
candidates = {}
ordered = []
for elem in self.tags(self._html(), "p", "pre", "td"):
for elem in self.tags(self.html, "p", "pre", "td"):
parent_node = elem.getparent()
if parent_node is None:
continue

Loading…
Cancel
Save