diff --git a/Cargo.lock b/Cargo.lock index bf82f34..85cbd00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,9 +27,9 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "doc-comment 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "predicates 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", - "predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "predicates-tree 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates-tree 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "wait-timeout 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -608,6 +608,7 @@ name = "monolith" version = "2.4.0" dependencies = [ "assert_cmd 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -858,24 +859,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "predicates" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "predicates-core" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "predicates-tree" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1755,9 +1756,9 @@ dependencies = [ "checksum pkg-config 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" "checksum ppv-lite86 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" "checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" -"checksum predicates 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "73dd9b7b200044694dfede9edf907c1ca19630908443e9447e624993700c6932" -"checksum predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fb3dbeaaf793584e29c58c7e3a82bbb3c7c06b63cea68d13b0e3cddc124104dc" -"checksum predicates-tree 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aee95d988ee893cb35c06b148c80ed2cd52c8eea927f50ba7a0be1a786aeab73" +"checksum predicates 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eeb433456c1a57cc93554dea3ce40b4c19c4057e41c55d4a0f3d84ea71c325aa" +"checksum predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "57e35a3326b75e49aa85f5dc6ec15b41108cf5aee58eabb1f274dd18b73c2451" +"checksum predicates-tree 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "15f553275e5721409451eb85e15fd9a860a6e5ab4496eb215987502b5f5391f2" "checksum proc-macro-hack 0.5.19 (registry+https://github.com/rust-lang/crates.io-index)" = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" "checksum proc-macro2 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)" = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" "checksum quote 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" diff --git a/Cargo.toml b/Cargo.toml index eb5062d..f7a80c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ include = [ license = "Unlicense" [dependencies] +atty = "0.2" # Used for highlighting network errors base64 = "0.13.0" chrono = "0.4.19" # Used for formatting creation timestamp clap = "2.33.3" diff --git a/README.md b/README.md index f6103ec..829b44d 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ or ## Options - `-a`: Exclude audio sources - - `-b`: Use custom base URL + - `-b`: Use custom `base URL` - `-c`: Exclude CSS - `-e`: Ignore network errors - `-f`: Omit frames @@ -71,10 +71,10 @@ or - `-j`: Exclude JavaScript - `-k`: Accept invalid X.509 (TLS) certificates - `-M`: Don't add timestamp and URL information - - `-o`: Write output to file + - `-o`: Write output to `file` - `-s`: Be quiet - - `-t`: Adjust network request timeout - - `-u`: Provide custom User-Agent + - `-t`: Adjust `network request timeout` + - `-u`: Provide `custom User-Agent` - `-v`: Exclude videos --------------------------------------------------- diff --git a/src/html.rs b/src/html.rs index 55b40ae..b8a7b04 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1063,45 +1063,19 @@ pub fn walk_and_embed_assets( if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") { if parent_node_name == "picture" { - if options.no_images { - set_node_attr(node, "srcset", Some(str!(empty_image!()))); - } else { - let srcset_full_url = - resolve_url(&url, source_attr_srcset_value).unwrap_or_default(); - let srcset_url_fragment = get_url_fragment(srcset_full_url.clone()); - match retrieve_asset( - cache, - client, - &url, - &srcset_full_url, - options, - depth + 1, - ) { - Ok((srcset_data, srcset_final_url, srcset_media_type)) => { - let srcset_data_url = data_to_data_url( - &srcset_media_type, - &srcset_data, - &srcset_final_url, - ); - let assembled_url: String = url_with_fragment( - srcset_data_url.as_str(), - srcset_url_fragment.as_str(), - ); - set_node_attr(node, "srcset", Some(assembled_url)); - } - Err(_) => { - if is_http_url(srcset_full_url.clone()) { - // Keep remote reference if unable to retrieve the asset - let assembled_url: String = url_with_fragment( - srcset_full_url.as_str(), - srcset_url_fragment.as_str(), - ); - set_node_attr(node, "srcset", Some(assembled_url)); - } else { - // Exclude non-remote URLs - set_node_attr(node, "srcset", None); - } - } + if !source_attr_srcset_value.is_empty() { + if options.no_images { + set_node_attr(node, "srcset", Some(str!(empty_image!()))); + } else { + let resolved_srcset: String = embed_srcset( + cache, + client, + &url, + &source_attr_srcset_value, + options, + depth, + ); + set_node_attr(node, "srcset", Some(resolved_srcset)); } } } @@ -1199,8 +1173,8 @@ pub fn walk_and_embed_assets( // Empty inner content of STYLE tags node.children.borrow_mut().clear(); } else { - for node in node.children.borrow_mut().iter_mut() { - if let NodeData::Text { ref contents } = node.data { + for child_node in node.children.borrow_mut().iter_mut() { + if let NodeData::Text { ref contents } = child_node.data { let mut tendril = contents.borrow_mut(); let replacement = embed_css( cache, @@ -1436,6 +1410,42 @@ pub fn walk_and_embed_assets( } } } + "noscript" => { + for child_node in node.children.borrow_mut().iter_mut() { + match child_node.data { + NodeData::Text { ref contents } => { + // Get contents of the NOSCRIPT node + let mut noscript_contents = contents.borrow_mut(); + // Parse contents of the NOSCRIPT node + let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents); + // Embed assets within the NOSCRIPT node + walk_and_embed_assets( + cache, + client, + &url, + &noscript_contents_dom.document, + &options, + depth, + ); + // Get rid of original contents + noscript_contents.clear(); + // Insert HTML containing embedded assets into the NOSCRIPT node + if let Some(html) = + get_child_node_by_name(&noscript_contents_dom.document, "html") + { + if let Some(body) = get_child_node_by_name(&html, "body") { + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &body, SerializeOpts::default()) + .expect("Unable to serialize DOM into buffer"); + let result = String::from_utf8(buf).unwrap(); + noscript_contents.push_slice(&result); + } + } + } + _ => {} + } + } + } _ => {} } diff --git a/src/opts.rs b/src/opts.rs index 1137c6d..c1a2a2a 100644 --- a/src/opts.rs +++ b/src/opts.rs @@ -1,4 +1,5 @@ use clap::{App, Arg}; +use std::env; #[derive(Default)] pub struct Options { @@ -19,6 +20,7 @@ pub struct Options { pub user_agent: Option, pub no_video: bool, pub target: String, + pub no_color: bool, } const ASCII: &'static str = " \ @@ -33,6 +35,8 @@ const ASCII: &'static str = " \ const DEFAULT_NETWORK_TIMEOUT: u64 = 120; const DEFAULT_USER_AGENT: &'static str = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0"; +const ENV_VAR_NO_COLOR: &str = "NO_COLOR"; +const ENV_VAR_TERM: &str = "TERM"; impl Options { pub fn from_args() -> Options { @@ -98,6 +102,14 @@ impl Options { } options.no_video = app.is_present("no-video"); + options.no_color = + env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr); + if let Some(term) = env::var_os(ENV_VAR_TERM) { + if term == "dumb" { + options.no_color = true; + } + } + options } } diff --git a/src/tests/html/embed_srcset.rs b/src/tests/html/embed_srcset.rs index a2c2f59..704bdfa 100644 --- a/src/tests/html/embed_srcset.rs +++ b/src/tests/html/embed_srcset.rs @@ -14,10 +14,10 @@ mod passing { use crate::opts::Options; #[test] - fn replace_with_empty_images() { + fn small_medium_large() { let cache = &mut HashMap::new(); let client = Client::new(); - let srcset_value = "small.png 1x, large.png 2x"; + let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x"; let mut options = Options::default(); options.no_images = true; options.silent = true; @@ -25,7 +25,28 @@ mod passing { assert_eq!( embedded_css, - format!("{} 1x, {} 2x", empty_image!(), empty_image!()), + format!( + "{} 1x, {} 1.5x, {} 2x", + empty_image!(), + empty_image!(), + empty_image!(), + ), + ); + } + + #[test] + fn small_medium_only_medium_has_scale() { + let cache = &mut HashMap::new(); + let client = Client::new(); + let srcset_value = "small.png, medium.png 1.5x"; + let mut options = Options::default(); + options.no_images = true; + options.silent = true; + let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); + + assert_eq!( + embedded_css, + format!("{}, {} 1.5x", empty_image!(), empty_image!()), ); } diff --git a/src/tests/html/walk_and_embed_assets.rs b/src/tests/html/walk_and_embed_assets.rs index 901574a..93026d1 100644 --- a/src/tests/html/walk_and_embed_assets.rs +++ b/src/tests/html/walk_and_embed_assets.rs @@ -326,4 +326,45 @@ mod passing { " ); } + + #[test] + fn processes_noscript_tags() { + let html = "\ + \ + \ + \ + "; + let dom = html::html_to_dom(&html); + let url = "http://localhost"; + let cache = &mut HashMap::new(); + + let mut options = Options::default(); + options.no_images = true; + options.silent = true; + + let client = Client::new(); + + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); + + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); + + assert_eq!( + buf.iter().map(|&c| c as char).collect::(), + format!( + "\ + \ + \ + \ + \ + \ + ", + empty_image!(), + ) + ); + } } diff --git a/src/utils.rs b/src/utils.rs index e9cf90d..90030d7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -7,6 +7,8 @@ use std::path::Path; use crate::opts::Options; use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url}; +const ANSI_COLOR_RED: &'static str = "\x1b[31m"; +const ANSI_COLOR_RESET: &'static str = "\x1b[0m"; const INDENT: &'static str = " "; const MAGIC: [[&[u8]; 2]; 18] = [ @@ -32,7 +34,6 @@ const MAGIC: [[&[u8]; 2]; 18] = [ [b"....moov", b"video/quicktime"], [b"\x1A\x45\xDF\xA3", b"video/webm"], ]; - const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ "application/javascript", "image/svg+xml", @@ -128,7 +129,18 @@ pub fn retrieve_asset( Ok(mut response) => { if !options.ignore_errors && response.status() != 200 { if !options.silent { - eprintln!("Unable to retrieve {} (error: {})", &url, response.status()); + eprintln!( + "{}{}{} ({}){}", + indent(depth).as_str(), + if options.no_color { "" } else { ANSI_COLOR_RED }, + &url, + response.status(), + if options.no_color { + "" + } else { + ANSI_COLOR_RESET + }, + ); } // Provoke error return Err(client.get("").send().unwrap_err());