avoid excessive parsing of HTML into DOM

pull/263/head
Sunshine 3 years ago
parent b29b9a6a7c
commit 7686b2ea64
No known key found for this signature in database
GPG Key ID: B80CA68703CD8AB1

@ -210,15 +210,23 @@ fn main() {
process::exit(1); process::exit(1);
} }
// Initial parse to read document's charset from META tag // Initial parse
dom = html_to_dom(&data, document_encoding.clone()); dom = html_to_dom(&data, document_encoding.clone());
// TODO: investigate if charset from filesystem/data URL/HTTP headers
// has power over what's specified in HTML
// Attempt to determine document's charset // Attempt to determine document's charset
if let Some(charset) = get_charset(&dom.document) { if let Some(charset) = get_charset(&dom.document) {
if !charset.is_empty() { if !charset.is_empty() {
// TODO && label(charset) != UTF_8 // Check if the charset specified inside HTML is valid
document_encoding = charset; if let Some(encoding) = Encoding::for_label(charset.as_bytes()) {
dom = html_to_dom(&data, document_encoding.clone()); // No point in parsing HTML again with the same encoding as before
if encoding.name() != "UTF-8" {
document_encoding = charset;
dom = html_to_dom(&data, document_encoding.clone());
}
}
} }
} }

Loading…
Cancel
Save