diff --git a/Cargo.toml b/Cargo.toml index 4bf94f2..f841eca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,5 @@ mime-sniffer = "0.1.2" regex = "1.2.1" reqwest = "0.9.20" url = "2.1.0" +lazy_static = "1.3.0" + diff --git a/README.md b/README.md index b0b02dc..8e4b6e0 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ If compared to saving websites with `wget -mpk`, `monolith` embeds all assets as ### Installation $ git clone https://github.com/Y2Z/monolith.git $ cd monolith - $ cargo install + $ cargo install --path . ### Usage $ monolith https://lyrics.github.io/db/p/portishead/dummy/roads/ > portishead-roads-lyrics.html diff --git a/src/html.rs b/src/html.rs index 0ef386b..840afb6 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,13 +1,11 @@ -extern crate html5ever; - +use http::{is_valid_url, resolve_url, retrieve_asset}; use std::default::Default; use std::io; -use http::{is_url, retrieve_asset, resolve_url}; -use self::html5ever::parse_document; -use self::html5ever::rcdom::{Handle, NodeData, RcDom}; -use self::html5ever::tendril::TendrilSink; -use self::html5ever::serialize::{SerializeOpts, serialize}; +use html5ever::parse_document; +use html5ever::rcdom::{Handle, NodeData, RcDom}; +use html5ever::serialize::{serialize, SerializeOpts}; +use html5ever::tendril::TendrilSink; enum NodeMatch { Icon, @@ -19,19 +17,37 @@ enum NodeMatch { Other, } -static PNG_PIXEL: &str = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="; +const PNG_PIXEL: &str = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="; -static JS_DOM_EVENT_ATTRS: [&str; 21] = [ +const JS_DOM_EVENT_ATTRS: [&str; 21] = [ // Input - "onfocus", "onblur", "onselect", "onchange", "onsubmit", "onreset", "onkeydown", "onkeypress", "onkeyup", + "onfocus", + "onblur", + "onselect", + "onchange", + "onsubmit", + "onreset", + "onkeydown", + "onkeypress", + "onkeyup", // Mouse - "onmouseover", "onmouseout", "onmousedown", "onmouseup", "onmousemove", + "onmouseover", + "onmouseout", + "onmousedown", + "onmouseup", + "onmousemove", // Click - "onclick", "ondblclick", + "onclick", + "ondblclick", // Load - "onload", "onunload", "onabort", "onerror", "onresize", + "onload", + "onunload", + "onabort", + "onerror", + "onresize", ]; +#[allow(clippy::cognitive_complexity)] pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_images: bool) { match node.data { NodeData::Document => { @@ -39,28 +55,24 @@ pub fn walk_and_embed_assets(url: &str, node: &Handle, opt_no_js: bool, opt_no_i for child in node.children.borrow().iter() { walk_and_embed_assets(&url, child, opt_no_js, opt_no_images); } - }, + } - NodeData::Doctype { - name: _, - public_id: _, - system_id: _, - } => {}, + NodeData::Doctype { .. } => {} - NodeData::Text { contents: _, } => {}, + NodeData::Text { .. } => {} - NodeData::Comment { contents: _, } => { + NodeData::Comment { .. } => { // Note: in case of opt_no_js being set to true, there's no need to worry about // getting rid of comments that may contain scripts, e.g.