diff --git a/src/readability_lxml/client.py b/src/readability_lxml/client.py new file mode 100644 index 0000000..5a1e371 --- /dev/null +++ b/src/readability_lxml/client.py @@ -0,0 +1,54 @@ +import argparse +import sys + +from readability_lxmly import VERSION +from readability_lxml.readability import Document + + +def parse_args(): + desc = "fast python port of arc90's readability tool" + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--version', + action='version', version=VERSION) + + parser.add_argument('-v', '--verbose', + action='store_true', + default=False, + help="Increase logging verbosity to DEBUG.") + + parser.add_argument('-u', '--url', + action='store', + default=None, + help="Indicate that this is a url path.") + + parser.add_argument('path', metavar='P', type=str, nargs=1, + help="The url or file path to process in readable form.") + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + target = None + if args.url: + import urllib + target = urllib.urlopen(args.path[0]) + else: + target = open(args.path[0], 'rt') + + enc = sys.__stdout__.encoding or 'utf-8' + + try: + doc = Document(target.read(), + debug=args.verbose, + url=args.url) + print doc.summary().encode(enc, 'replace') + + finally: + target.close() + + +if __name__ == '__main__': + main() diff --git a/src/readability_lxml/readability.py b/src/readability_lxml/readability.py index 168bc95..aaa8dab 100755 --- a/src/readability_lxml/readability.py +++ b/src/readability_lxml/readability.py @@ -531,59 +531,3 @@ class Document: pass return clean_attributes(tounicode(node)) - - -class HashableElement(): - def __init__(self, node): - self.node = node - self._path = None - - def _get_path(self): - if self._path is None: - reverse_path = [] - node = self.node - while node is not None: - node_id = (node.tag, tuple(node.attrib.items()), node.text) - reverse_path.append(node_id) - node = node.getparent() - self._path = tuple(reverse_path) - return self._path - path = property(_get_path) - - def __hash__(self): - return hash(self.path) - - def __eq__(self, other): - return self.path == other.path - - def __getattr__(self, tag): - return getattr(self.node, tag) - - -def main(): - from optparse import OptionParser - parser = OptionParser(usage="%prog: [options] [file]") - parser.add_option('-v', '--verbose', action='store_true') - parser.add_option('-u', '--url', default=None, help="use URL instead of a local file") - (options, args) = parser.parse_args() - - if not (len(args) == 1 or options.url): - parser.print_help() - sys.exit(1) - - file = None - if options.url: - import urllib - file = urllib.urlopen(options.url) - else: - file = open(args[0], 'rt') - enc = sys.__stdout__.encoding or 'utf-8' - try: - print Document(file.read(), - debug=options.verbose, - url=options.url).summary().encode(enc, 'replace') - finally: - file.close() - -if __name__ == '__main__': - main()