You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
python-readability/readability/debug.py

52 lines
1.3 KiB
Python

import re
# FIXME: use with caution, can leak memory
uids = {}
uids_document = None
def describe_node(node):
global uids
if node is None:
return ""
if not hasattr(node, "tag"):
return "[%s]" % type(node)
name = node.tag
if node.get("id", ""):
name += "#" + node.get("id")
if node.get("class", "").strip():
name += "." + ".".join(node.get("class").split())
if name[:4] in ["div#", "div."]:
name = name[3:]
if name in ["tr", "td", "div", "p"]:
uid = uids.get(node)
if uid is None:
uid = uids[node] = len(uids) + 1
name += "{%02d}" % uid
return name
def describe(node, depth=1):
global uids, uids_document
doc = node.getroottree().getroot()
if doc != uids_document:
uids = {}
uids_document = doc
# return repr(NodeRepr(node))
parent = ""
if depth and node.getparent() is not None:
parent = describe(node.getparent(), depth=depth - 1) + ">"
return parent + describe_node(node)
RE_COLLAPSE_WHITESPACES = re.compile(r"\s+", re.U)
def text_content(elem, length=40):
content = RE_COLLAPSE_WHITESPACES.sub(" ", elem.text_content().replace("\r", ""))
if len(content) < length:
return content
return content[:length] + "..."