import re # FIXME: use with caution, can leak memory uids = {} uids_document = None def describe_node(node): global uids if node is None: return "" if not hasattr(node, "tag"): return "[%s]" % type(node) name = node.tag if node.get("id", ""): name += "#" + node.get("id") if node.get("class", "").strip(): name += "." + ".".join(node.get("class").split()) if name[:4] in ["div#", "div."]: name = name[3:] if name in ["tr", "td", "div", "p"]: uid = uids.get(node) if uid is None: uid = uids[node] = len(uids) + 1 name += "{%02d}" % uid return name def describe(node, depth=1): global uids, uids_document doc = node.getroottree().getroot() if doc != uids_document: uids = {} uids_document = doc # return repr(NodeRepr(node)) parent = "" if depth and node.getparent() is not None: parent = describe(node.getparent(), depth=depth - 1) + ">" return parent + describe_node(node) RE_COLLAPSE_WHITESPACES = re.compile(r"\s+", re.U) def text_content(elem, length=40): content = RE_COLLAPSE_WHITESPACES.sub(" ", elem.text_content().replace("\r", "")) if len(content) < length: return content return content[:length] + "..."