Bump to version 0.2

pull/9/head 0.2
Yuri Baburov 13 years ago
parent 21906f1c44
commit 61715dca0a

@ -13,9 +13,12 @@ Based on:
- Ruby port by starrhorne and iterationlabs - Ruby port by starrhorne and iterationlabs
- Python port by gfxmonk ( https://github.com/gfxmonk/python-readability , based on BeautifulSoup ) - Python port by gfxmonk ( https://github.com/gfxmonk/python-readability , based on BeautifulSoup )
- Decruft effort to move to lxml ( http://www.minvolai.com/blog/decruft-arc90s-readability-in-python/ ) - Decruft effort to move to lxml ( http://www.minvolai.com/blog/decruft-arc90s-readability-in-python/ )
- "BR to P" fix from readability.js which improves quality for smaller texts.
- Github users contributions.
Usage: Usage:
from readability.readability import Document
import urllib import urllib
html = urllib.urlopen(url).read() html = urllib.urlopen(url).read()
readable_article = Document(html).summary() readable_article = Document(html).summary()
@ -23,4 +26,4 @@ readable_title = Document(html).short_title()
Command-line usage: Command-line usage:
python -m readability.readability -u http://yoursite.com/yourpage python -m readability.readability -u http://pypi.python.org/pypi/readability-lxml

@ -120,7 +120,9 @@ class Document:
continue continue
else: else:
logging.debug("Ruthless and lenient parsing did not work. Returning raw html") logging.debug("Ruthless and lenient parsing did not work. Returning raw html")
article = self.html.find('body') or self.html article = self.html.find('body')
if article is None:
article = self.html
cleaned_article = self.sanitize(article, candidates) cleaned_article = self.sanitize(article, candidates)
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH) of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)

@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup( setup(
name="readability-lxml", name="readability-lxml",
version="0.1dev", version="0.2",
author="Yuri Baburov", author="Yuri Baburov",
author_email="burchik@gmail.com", author_email="burchik@gmail.com",
description="python port of arc90's readability bookmarklet", description="python port of arc90's readability bookmarklet",

Loading…
Cancel
Save