From 09d41d2cf151644a445c62b341cbb39895559f3b Mon Sep 17 00:00:00 2001 From: Sunshine Date: Tue, 14 Jul 2020 02:58:29 -0400 Subject: [PATCH] automatically obtain favicon.ico --- src/html.rs | 99 ++++++++++++++++++++++++++++++----- src/main.rs | 32 +++++++++-- src/tests/html/add_favicon.rs | 29 ++++++++++ src/tests/html/has_favicon.rs | 52 ++++++++++++++++++ src/tests/html/is_icon.rs | 20 +++---- src/tests/html/mod.rs | 2 + 6 files changed, 209 insertions(+), 25 deletions(-) create mode 100644 src/tests/html/add_favicon.rs create mode 100644 src/tests/html/has_favicon.rs diff --git a/src/html.rs b/src/html.rs index 50285ae..7857e59 100644 --- a/src/html.rs +++ b/src/html.rs @@ -27,13 +27,38 @@ struct SrcSetItem<'a> { descriptor: &'a str, } -const ICON_VALUES: &[&str] = &[ - "icon", - "shortcut icon", - "mask-icon", - "apple-touch-icon", - "fluid-icon", -]; +const ICON_VALUES: &[&str] = &["icon", "shortcut icon"]; + +pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom { + let mut buf: Vec = Vec::new(); + serialize(&mut buf, document, SerializeOpts::default()) + .expect("unable to serialize DOM into buffer"); + let result = String::from_utf8(buf).unwrap(); + + let mut dom = html_to_dom(&result); + let doc = dom.get_document(); + let html = get_child_node_by_name(&doc, "html"); + let head = get_child_node_by_name(&html, "head"); + let favicon_node = dom.create_element( + QualName::new(None, ns!(), local_name!("link")), + vec![ + Attribute { + name: QualName::new(None, ns!(), local_name!("rel")), + value: format_tendril!("icon"), + }, + Attribute { + name: QualName::new(None, ns!(), local_name!("href")), + value: format_tendril!("{}", favicon_data_url), + }, + ], + Default::default(), + ); + + // Append favicon node to HEAD + head.children.borrow_mut().push(favicon_node.clone()); + + dom +} pub fn get_parent_node(node: &Handle) -> Handle { let parent = node.parent.take().clone(); @@ -140,6 +165,56 @@ pub fn embed_srcset( result } +pub fn has_favicon(handle: &Handle) -> bool { + let mut found_favicon: bool = false; + + match handle.data { + NodeData::Document => { + // Dig deeper + for child in handle.children.borrow().iter() { + if has_favicon(child) { + found_favicon = true; + break; + } + } + } + NodeData::Element { + ref name, + ref attrs, + .. + } => { + match name.local.as_ref() { + "link" => { + let attrs_mut = &mut attrs.borrow_mut(); + + for attr in attrs_mut.iter_mut() { + if &attr.name.local == "rel" { + if is_icon(attr.value.trim()) { + found_favicon = true; + break; + } + } + } + } + _ => {} + } + + if !found_favicon { + // Dig deeper + for child in handle.children.borrow().iter() { + if has_favicon(child) { + found_favicon = true; + break; + } + } + } + } + _ => {} + } + + found_favicon +} + pub fn walk_and_embed_assets( cache: &mut HashMap>, client: &Client, @@ -1061,7 +1136,7 @@ pub fn walk_and_embed_assets( } } -pub fn html_to_dom(data: &str) -> html5ever::rcdom::RcDom { +pub fn html_to_dom(data: &str) -> RcDom { parse_document(RcDom::default(), Default::default()) .from_utf8() .read_from(&mut data.as_bytes()) @@ -1087,7 +1162,9 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String { let mut result = String::from_utf8(buf).unwrap(); - // Take care of CSP + // We can't make it isolate the page right away since it may have no HEAD element, + // ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then + // finally serialize and return the resulting string if options.isolate || options.no_css || options.no_fonts @@ -1095,6 +1172,7 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String { || options.no_js || options.no_images { + // Take care of CSP let mut buf: Vec = Vec::new(); let mut dom = html_to_dom(&result); let doc = dom.get_document(); @@ -1123,9 +1201,6 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String { head.children.borrow_mut().push(meta.clone()); head.children.borrow_mut().reverse(); - // Note: we can't make it isolate the page right away since it may have no HEAD element, - // ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then - // finally serialize the result serialize(&mut buf, &doc, SerializeOpts::default()) .expect("unable to serialize DOM into buffer"); result = String::from_utf8(buf).unwrap(); diff --git a/src/main.rs b/src/main.rs index bbb1034..5cfab69 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,9 +8,13 @@ use std::path::Path; use std::process; use std::time::Duration; -use monolith::html::{html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets}; +use monolith::html::{ + add_favicon, has_favicon, html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets, +}; use monolith::opts::Options; -use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url}; +use monolith::url::{ + data_to_data_url, data_url_to_data, is_data_url, is_file_url, is_http_url, resolve_url, +}; use monolith::utils::retrieve_asset; mod macros; @@ -48,7 +52,7 @@ fn main() { let original_target: &str = &options.target; let target_url: &str; let base_url; - let dom; + let mut dom; // Pre-process the input let cwd_normalized: String = @@ -137,6 +141,28 @@ fn main() { process::exit(1); } + // Request and embed /favicon.ico (unless it's already linked in the document) + if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) { + let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap(); + + match retrieve_asset( + &mut cache, + &client, + &base_url, + &favicon_ico_url, + options.silent, + 0, + ) { + Ok((data, final_url, media_type)) => { + let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url); + dom = add_favicon(&dom.document, favicon_data_url); + } + Err(_) => { + // Failed to retrieve favicon.ico + } + } + } + // Embed remote assets walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0); diff --git a/src/tests/html/add_favicon.rs b/src/tests/html/add_favicon.rs new file mode 100644 index 0000000..80bee70 --- /dev/null +++ b/src/tests/html/add_favicon.rs @@ -0,0 +1,29 @@ +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod passing { + use html5ever::serialize::{serialize, SerializeOpts}; + + use crate::html; + + #[test] + fn basic() { + let html = "
text
"; + let mut dom = html::html_to_dom(&html); + + dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string()); + + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); + + assert_eq!( + buf.iter().map(|&c| c as char).collect::(), + "
text
" + ); + } +} diff --git a/src/tests/html/has_favicon.rs b/src/tests/html/has_favicon.rs new file mode 100644 index 0000000..67b71ff --- /dev/null +++ b/src/tests/html/has_favicon.rs @@ -0,0 +1,52 @@ +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod passing { + use crate::html; + use crate::opts::Options; + + #[test] + fn icon() { + let html = "
text
"; + let dom = html::html_to_dom(&html); + let res: bool = html::has_favicon(&dom.document); + + assert!(res); + } + + #[test] + fn shortcut_icon() { + let html = "
text
"; + let dom = html::html_to_dom(&html); + let res: bool = html::has_favicon(&dom.document); + + assert!(res); + } +} + +// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ +// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ +// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ +// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod failing { + use crate::html; + use crate::opts::Options; + + #[test] + fn absent() { + let html = "
text
"; + let dom = html::html_to_dom(&html); + let res: bool = html::has_favicon(&dom.document); + + assert!(!res); + } +} diff --git a/src/tests/html/is_icon.rs b/src/tests/html/is_icon.rs index a187356..cc531bf 100644 --- a/src/tests/html/is_icon.rs +++ b/src/tests/html/is_icon.rs @@ -23,16 +23,6 @@ mod passing { fn icon_uppercase() { assert!(html::is_icon("ICON")); } - - #[test] - fn mask_icon() { - assert!(html::is_icon("mask-icon")); - } - - #[test] - fn fluid_icon() { - assert!(html::is_icon("fluid-icon")); - } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ @@ -46,6 +36,16 @@ mod passing { mod failing { use crate::html; + #[test] + fn mask_icon() { + assert!(!html::is_icon("mask-icon")); + } + + #[test] + fn fluid_icon() { + assert!(!html::is_icon("fluid-icon")); + } + #[test] fn stylesheet() { assert!(!html::is_icon("stylesheet")); diff --git a/src/tests/html/mod.rs b/src/tests/html/mod.rs index 09e65f7..a912338 100644 --- a/src/tests/html/mod.rs +++ b/src/tests/html/mod.rs @@ -1,6 +1,8 @@ +mod add_favicon; mod csp; mod embed_srcset; mod get_node_name; +mod has_favicon; mod has_proper_integrity; mod is_icon; mod metadata_tag;