From a4b6957be264ff60a04033f5e3c7616f168984a3 Mon Sep 17 00:00:00 2001 From: Richard Harding Date: Wed, 18 Apr 2012 22:50:07 -0400 Subject: [PATCH] Update html to be a property with a getter --- src/readability_lxml/readability.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/readability_lxml/readability.py b/src/readability_lxml/readability.py index c83b46f..5a19374 100755 --- a/src/readability_lxml/readability.py +++ b/src/readability_lxml/readability.py @@ -100,7 +100,15 @@ class Document: self.input_doc = input_doc self.options = options - self.html = None + self._html = None + + @property + def html(self): + """The parsed html document from the input""" + if not self._html: + self._html = self._parse(self.input_doc) + + return self._html def _parse(self, input_doc): doc = build_doc(input_doc) @@ -113,13 +121,13 @@ class Document: return doc def content(self): - return get_body(self._html(True)) + return get_body(self.html) def title(self): - return get_title(self._html(True)) + return get_title(self.html) def short_title(self): - return shorten_title(self._html(True)) + return shorten_title(self.html) def summary(self, enclose_with_html_tag=False): """Generate the summary of the html docuemnt @@ -255,7 +263,7 @@ class Document: self.TEXT_LENGTH_THRESHOLD) candidates = {} ordered = [] - for elem in self.tags(self._html(), "p", "pre", "td"): + for elem in self.tags(self.html, "p", "pre", "td"): parent_node = elem.getparent() if parent_node is None: continue