You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
python-readability/src/readability_lxml/debug.py

33 lines
897 B
Python

uids = {}
def save_to_file(text, filename):
f = open(filename, 'wt')
f.write("""
<meta http-equiv="Content-Type"
content="text/html; charset=UTF-8"
/>""")
f.write(text.encode('utf-8'))
f.close()
def describe(node, depth=2):
if not hasattr(node, 'tag'):
return "[%s]" % type(node)
name = node.tag
if node.get('id', ''):
name += '#' + node.get('id')
if node.get('class', ''):
name += '.' + node.get('class').replace(' ', '.')
if name[:4] in ['div#', 'div.']:
name = name[3:]
if name in ['tr', 'td', 'div', 'p']:
if not node in uids:
uid = uids[node] = len(uids) + 1
else:
uid = uids.get(node)
name += "%02d" % (uid)
if depth and node.getparent() is not None:
return name + ' - ' + describe(node.getparent(), depth - 1)
return name