You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
52 lines
1.3 KiB
Python
52 lines
1.3 KiB
Python
import re
|
|
|
|
|
|
# FIXME: use with caution, can leak memory
|
|
uids = {}
|
|
uids_document = None
|
|
|
|
|
|
def describe_node(node):
|
|
global uids
|
|
if node is None:
|
|
return ""
|
|
if not hasattr(node, "tag"):
|
|
return "[%s]" % type(node)
|
|
name = node.tag
|
|
if node.get("id", ""):
|
|
name += "#" + node.get("id")
|
|
if node.get("class", "").strip():
|
|
name += "." + ".".join(node.get("class").split())
|
|
if name[:4] in ["div#", "div."]:
|
|
name = name[3:]
|
|
if name in ["tr", "td", "div", "p"]:
|
|
uid = uids.get(node)
|
|
if uid is None:
|
|
uid = uids[node] = len(uids) + 1
|
|
name += "{%02d}" % uid
|
|
return name
|
|
|
|
|
|
def describe(node, depth=1):
|
|
global uids, uids_document
|
|
doc = node.getroottree().getroot()
|
|
if doc != uids_document:
|
|
uids = {}
|
|
uids_document = doc
|
|
|
|
# return repr(NodeRepr(node))
|
|
parent = ""
|
|
if depth and node.getparent() is not None:
|
|
parent = describe(node.getparent(), depth=depth - 1) + ">"
|
|
return parent + describe_node(node)
|
|
|
|
|
|
RE_COLLAPSE_WHITESPACES = re.compile(r"\s+", re.U)
|
|
|
|
|
|
def text_content(elem, length=40):
|
|
content = RE_COLLAPSE_WHITESPACES.sub(" ", elem.text_content().replace("\r", ""))
|
|
if len(content) < length:
|
|
return content
|
|
return content[:length] + "..."
|