Update cli client commands, add debugging to test server

0.3.0.dev
Richard Harding 12 years ago
parent f5451356ee
commit ac5ef73e71

@ -16,11 +16,6 @@ def parse_args():
default=False,
help="Increase logging verbosity to DEBUG.")
parser.add_argument('-u', '--url',
action='store',
default=None,
help="Indicate that this is a url path.")
parser.add_argument('path', metavar='P', type=str, nargs=1,
help="The url or file path to process in readable form.")
@ -31,19 +26,27 @@ def parse_args():
def main():
args = parse_args()
target = None
if args.url:
target = args.path[0]
if target.startswith('http') or target.startswith('www'):
is_url = True
url = target
else:
is_url = False
url = None
if is_url:
import urllib
target = urllib.urlopen(args.path[0])
target = urllib.urlopen(target)
else:
target = open(args.path[0], 'rt')
target = open(target, 'rt')
enc = sys.__stdout__.encoding or 'utf-8'
try:
doc = Document(target.read(),
debug=args.verbose,
url=args.url)
url=url)
print doc.summary().encode(enc, 'replace')
finally:

@ -1,8 +1,14 @@
import logging
import re
import chardet
LOG = logging.getLogger()
def get_encoding(page):
LOG.info('GET ENCODING')
LOG.info(type(page))
text = re.sub('</?[^>]*>\s*', ' ', page)
enc = 'utf-8'
if not text.strip() or len(text) < 10:

@ -13,7 +13,12 @@ logging.getLogger().setLevel(logging.DEBUG)
utf8_parser = HTMLParser(encoding='utf-8')
LOG = logging.getLogger()
def build_doc(page):
LOG.info('BUILD DOC')
LOG.info(type(page))
if isinstance(page, unicode):
page_unicode = page
else:

Loading…
Cancel
Save