bypass doc building if input is already a parsed object

pull/138/head
Adrien Barbaresi 4 years ago
parent c2916a1b4f
commit 6429899aa2

@ -6,8 +6,10 @@ import sys
from lxml.etree import tostring
from lxml.etree import tounicode
from lxml.etree import _ElementTree
from lxml.html import document_fromstring
from lxml.html import fragment_fromstring
from lxml.html import HtmlElement
from .cleaners import clean_attributes
from .cleaners import html_cleaner
@ -136,7 +138,10 @@ class Document:
return self.html
def _parse(self, input):
doc, self.encoding = build_doc(input)
if isinstance(input, (_ElementTree, HtmlElement)):
doc = input
else:
doc, self.encoding = build_doc(input)
doc = html_cleaner.clean_html(doc)
base_href = self.url
if base_href:

Loading…
Cancel
Save