From 1d6392cb288765d17c39f75d49586e18b0fa3ece Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sat, 1 Aug 2020 01:44:09 -0400 Subject: [PATCH] implement support for BASE tag --- src/html.rs | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 16 +++++++--- 2 files changed, 102 insertions(+), 6 deletions(-) diff --git a/src/html.rs b/src/html.rs index 3739d3d..5a1ac4c 100644 --- a/src/html.rs +++ b/src/html.rs @@ -29,6 +29,31 @@ struct SrcSetItem<'a> { const ICON_VALUES: &[&str] = &["icon", "shortcut icon"]; +pub fn add_base_tag(document: &Handle, url: String) -> RcDom { + let mut buf: Vec = Vec::new(); + serialize(&mut buf, document, SerializeOpts::default()) + .expect("unable to serialize DOM into buffer"); + let result = String::from_utf8(buf).unwrap(); + + let mut dom = html_to_dom(&result); + let doc = dom.get_document(); + let html = get_child_node_by_name(&doc, "html"); + let head = get_child_node_by_name(&html, "head"); + let favicon_node = dom.create_element( + QualName::new(None, ns!(), local_name!("base")), + vec![Attribute { + name: QualName::new(None, ns!(), local_name!("href")), + value: format_tendril!("{}", url), + }], + Default::default(), + ); + + // Insert BASE tag into HEAD + head.children.borrow_mut().push(favicon_node.clone()); + + dom +} + pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom { let mut buf: Vec = Vec::new(); serialize(&mut buf, document, SerializeOpts::default()) @@ -54,7 +79,7 @@ pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom { Default::default(), ); - // Append favicon node to HEAD + // Insert favicon LINK tag into HEAD head.children.borrow_mut().push(favicon_node.clone()); dom @@ -205,6 +230,56 @@ pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool { } } +pub fn has_base_tag(handle: &Handle) -> bool { + let mut found_base_tag: bool = false; + + match handle.data { + NodeData::Document => { + // Dig deeper + for child in handle.children.borrow().iter() { + if has_base_tag(child) { + found_base_tag = true; + break; + } + } + } + NodeData::Element { + ref name, + ref attrs, + .. + } => { + match name.local.as_ref() { + "base" => { + let attrs_mut = &mut attrs.borrow_mut(); + + for attr in attrs_mut.iter_mut() { + if &attr.name.local == "href" { + if !attr.value.trim().is_empty() { + found_base_tag = true; + break; + } + } + } + } + _ => {} + } + + if !found_base_tag { + // Dig deeper + for child in handle.children.borrow().iter() { + if has_base_tag(child) { + found_base_tag = true; + break; + } + } + } + } + _ => {} + } + + found_base_tag +} + pub fn has_favicon(handle: &Handle) -> bool { let mut found_favicon: bool = false; @@ -600,6 +675,7 @@ pub fn walk_and_embed_assets( } } LinkType::Unknown => { + // Make sure that all other LINKs' href attributes are full URLs for attr in attrs_mut.iter_mut() { let attr_name: &str = &attr.name.local; if attr_name.eq_ignore_ascii_case("href") { @@ -612,6 +688,20 @@ pub fn walk_and_embed_assets( } } } + "base" => { + if is_http_url(url) { + // Ensure BASE href is a full URL, not a relative one + for attr in attrs_mut.iter_mut() { + let attr_name: &str = &attr.name.local; + if attr_name.eq_ignore_ascii_case("href") { + let href_full_url = + resolve_url(&url, attr.value.trim()).unwrap_or_default(); + attr.value.clear(); + attr.value.push_slice(&href_full_url.as_str()); + } + } + } + } "body" => { // Find and remove background attribute(s), keep value of the last found one let mut background: String = str!(); diff --git a/src/main.rs b/src/main.rs index aa54333..e8bba9e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,8 @@ use std::process; use std::time::Duration; use monolith::html::{ - add_favicon, has_favicon, html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets, + add_base_tag, add_favicon, has_base_tag, has_favicon, html_to_dom, metadata_tag, + stringify_document, walk_and_embed_assets, }; use monolith::opts::Options; use monolith::url::{ @@ -141,6 +142,14 @@ fn main() { process::exit(1); } + // Embed remote assets + walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0); + + // Take care of BASE tag + if is_http_url(base_url.clone()) && !has_base_tag(&dom.document) { + dom = add_base_tag(&dom.document, base_url.clone()); + } + // Request and embed /favicon.ico (unless it's already linked in the document) if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) { let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap(); @@ -163,15 +172,12 @@ fn main() { } } - // Embed remote assets - walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0); - // Serialize DOM tree let mut result: String = stringify_document(&dom.document, &options); // Add metadata tag if !options.no_metadata { - let metadata_comment = metadata_tag(&base_url); + let metadata_comment: String = metadata_tag(&base_url); result.insert_str(0, &metadata_comment); if metadata_comment.len() > 0 { result.insert_str(metadata_comment.len(), "\n");