diff --git a/.appveyor.yml b/.appveyor.yml index 12c88c3..5a17430 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -52,11 +52,11 @@ environment: # Nightly 64-bit MSVC - channel: nightly target: x86_64-pc-windows-msvc - #cargoflags: --features "unstable" + cargoflags: --features "unstable" # Nightly 32-bit MSVC - channel: nightly target: i686-pc-windows-msvc - #cargoflags: --features "unstable" + cargoflags: --features "unstable" ### GNU Toolchains ### @@ -80,12 +80,12 @@ environment: - channel: nightly target: x86_64-pc-windows-gnu MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin' - #cargoflags: --features "unstable" + cargoflags: --features "unstable" # Nightly 32-bit GNU - channel: nightly target: i686-pc-windows-gnu MINGW_PATH: 'C:\MinGW\bin' - #cargoflags: --features "unstable" + cargoflags: --features "unstable" ### Allowed failures ### @@ -124,7 +124,8 @@ install: build: false # Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs -#directly or perform other testing commands. Rust will automatically be placed in the PATH +# directly or perform other testing commands. Rust will automatically be placed in the PATH # environment variable. test_script: - - cargo test --verbose %cargoflags% + - cargo build --all --locked --verbose %cargoflags% + - cargo test --all --locked --verbose %cargoflags% diff --git a/Cargo.lock b/Cargo.lock index ec34cb7..92686ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,18 @@ name = "anyhow" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "assert_cmd" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates-tree 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "async-compression" version = "0.2.0" @@ -134,6 +146,16 @@ dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "difference" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "doc-comment" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "dtoa" version = "0.4.4" @@ -147,6 +169,17 @@ dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "escargot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "flate2" version = "1.0.13" @@ -512,6 +545,7 @@ dependencies = [ name = "monolith" version = "2.1.2" dependencies = [ + "assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -683,6 +717,29 @@ name = "precomputed-hash" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "predicates" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "predicates-core" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "predicates-tree" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "proc-macro2" version = "1.0.6" @@ -941,6 +998,9 @@ dependencies = [ name = "serde" version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "serde_derive" @@ -1131,6 +1191,11 @@ name = "tower-service" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "treeline" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "try-lock" version = "0.2.2" @@ -1372,6 +1437,7 @@ dependencies = [ "checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" +"checksum assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6283bac8dd7226470d491bc4737816fea4ca1fba7a2847f2e9097fd6bfb4624c" "checksum async-compression 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2c5c52622726d68ec35fec88edfb4ccb862d4f3b3bfa4af2f45142e69ef9b220" "checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" "checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" @@ -1387,8 +1453,11 @@ dependencies = [ "checksum core-foundation 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "25b9e03f145fd4f2bf705e07b900cd41fc636598fe5dc452fd0db1441c3f496d" "checksum core-foundation-sys 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e7ca8a5221364ef15ce201e8ed2f609fc312682a8f4e0e3d4aa5879764e0fa3b" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" "checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9" +"checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d" "checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" @@ -1451,6 +1520,9 @@ dependencies = [ "checksum pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)" = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677" "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" "checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +"checksum predicates 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a9bfe52247e5cc9b2f943682a85a5549fb9662245caf094504e69a2f03fe64d4" +"checksum predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178" +"checksum predicates-tree 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124" "checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" "checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" @@ -1499,6 +1571,7 @@ dependencies = [ "checksum tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bde02a3a5291395f59b06ec6945a3077602fac2b07eeeaf0dee2122f3619828" "checksum tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "571da51182ec208780505a32528fc5512a8fe1443ab960b3f2f3ef093cd16930" "checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860" +"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" "checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382" "checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" diff --git a/Cargo.toml b/Cargo.toml index 4cf1fd1..5c6aa20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,3 +23,6 @@ url = "2.1.1" version = "0.10.*" default-features = false features = ["default-tls", "blocking", "gzip"] + +[dev-dependencies] +assert_cmd = "0.12.0" diff --git a/Makefile b/Makefile index 5964e1a..3117e09 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,21 @@ -.PHONY: all build install run test lint +#!/usr/bin/make -f -all: test build +all: test +.PHONY: all build: @cargo build --locked +.PHONY: build install: @cargo install --force --locked --path . +.PHONY: install -test: +test: build @cargo test --locked @cargo fmt --all -- --check +.PHONY: test lint: @cargo fmt --all -- +.PHONY: lint diff --git a/src/args.rs b/src/args.rs index ac0c618..ca72efc 100644 --- a/src/args.rs +++ b/src/args.rs @@ -21,7 +21,7 @@ const DEFAULT_USER_AGENT: &str = impl AppArgs { pub fn get() -> AppArgs { - let app = App::new("monolith") + let app = App::new(env!("CARGO_PKG_NAME")) .version(crate_version!()) .author(crate_authors!("\n")) .about(crate_description!()) diff --git a/src/main.rs b/src/main.rs index d1c338c..83fc37e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -52,7 +52,7 @@ fn main() { if !is_http_url(target_url) && !is_data_url(target_url) { eprintln!( - "Only HTTP(S) or data URLs are allowed but got: {}", + "Only HTTP(S) or data URLs are supported but got: {}", &target_url ); process::exit(1); @@ -85,11 +85,18 @@ fn main() { let (data, final_url) = retrieve_asset(&mut cache, &client, target_url, false, "", app_args.silent) .expect("Could not retrieve assets in HTML"); - dom = html_to_dom(&data); base_url = final_url; + dom = html_to_dom(&data); } else if is_data_url(target_url) { - base_url = target_url.to_string(); - dom = html_to_dom(&data_url_to_text(target_url)); + let text: String = data_url_to_text(target_url); + + if text.len() == 0 { + eprintln!("Unsupported data URL input"); + process::exit(1); + } + + base_url = str!(); + dom = html_to_dom(&text); } else { process::exit(1); } diff --git a/src/tests/cli.rs b/src/tests/cli.rs new file mode 100644 index 0000000..9a6aa82 --- /dev/null +++ b/src/tests/cli.rs @@ -0,0 +1,182 @@ +use assert_cmd::prelude::*; +use std::process::Command; + +#[test] +fn print_version() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd.arg("-V").output().unwrap(); + + // STDOUT should contain program name and version + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")) + ); + + // STDERR should be empty + assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), ""); + + // The exit code should be 0 + out.assert().code(0); + + Ok(()) +} + +#[test] +fn bad_input() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd.arg("kernel.org").output().unwrap(); + + // STDOUT should be empty + assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), ""); + + // STDERR should contain error description + assert_eq!( + std::str::from_utf8(&out.stderr).unwrap(), + "Only HTTP(S) or data URLs are supported but got: kernel.org\n" + ); + + // The exit code should be 1 + out.assert().code(1); + + Ok(()) +} + +#[test] +fn bad_input_data_url() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap(); + + // STDOUT should contain HTML + assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), ""); + + // STDERR should contain error description + assert_eq!( + std::str::from_utf8(&out.stderr).unwrap(), + "Unsupported data URL input\n" + ); + + // The exit code should be 1 + out.assert().code(1); + + Ok(()) +} + +#[test] +fn isolate_data_url() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd + .arg("-I") + .arg("data:text/html,Hello%2C%20World!") + .output() + .unwrap(); + + // STDOUT should contain isolated HTML + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "Hello, World!\n" + ); + + // STDERR should be empty + assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), ""); + + // The exit code should be 0 + out.assert().code(0); + + Ok(()) +} + +#[test] +fn remove_css_from_data_url() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd + .arg("-c") + .arg("data:text/html,Hello") + .output() + .unwrap(); + + // STDOUT should contain HTML with no CSS + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "Hello\n" + ); + + // STDERR should be empty + assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), ""); + + // The exit code should be 0 + out.assert().code(0); + + Ok(()) +} + +#[test] +fn remove_frames_from_data_url() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd + .arg("-f") + .arg("data:text/html,Hi") + .output() + .unwrap(); + + // STDOUT should contain HTML with no iframes + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "Hi\n" + ); + + // STDERR should be empty + assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), ""); + + // The exit code should be 0 + out.assert().code(0); + + Ok(()) +} + +#[test] +fn remove_images_from_data_url() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd + .arg("-i") + .arg("data:text/html,Hi") + .output() + .unwrap(); + + // STDOUT should contain HTML with no images + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "Hi\n" + ); + + // STDERR should be empty + assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), ""); + + // The exit code should be 0 + out.assert().code(0); + + Ok(()) +} + +#[test] +fn remove_js_from_data_url() -> Result<(), Box> { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let out = cmd + .arg("-j") + .arg("data:text/html,Hi") + .output() + .unwrap(); + + // STDOUT should contain HTML with no JS + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "Hi\n" + ); + + // STDERR should be empty + assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), ""); + + // The exit code should be 0 + out.assert().code(0); + + Ok(()) +} diff --git a/src/tests/html.rs b/src/tests/html.rs index bb554c9..6ea6d76 100644 --- a/src/tests/html.rs +++ b/src/tests/html.rs @@ -503,7 +503,7 @@ fn test_stringify_document_isolate_no_frames_no_js_no_css_no_images() { "\ \ \ - \ + \ no-frame no-css no-js no-image isolated document\ \ \ diff --git a/src/tests/mod.rs b/src/tests/mod.rs index a77b631..0051cfc 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,3 +1,4 @@ +mod cli; mod html; mod http; mod js; diff --git a/src/utils.rs b/src/utils.rs index 142bf0d..c04f9e9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,7 +3,7 @@ use base64::{decode, encode}; use regex::Regex; use reqwest::blocking::Client; use std::collections::HashMap; -use url::{ParseError, Url}; +use url::{form_urlencoded, ParseError, Url}; /// This monster of a regex is used to match any kind of URL found in CSS. /// @@ -212,28 +212,56 @@ pub fn clean_url>(url: T) -> String { pub fn data_url_to_text>(url: T) -> String { let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap()); - let mut data: String = parsed_url.path().to_string(); - - if data.to_lowercase().starts_with("text/html") { - data = data.chars().skip(9).collect(); - - if data.starts_with(";") { - // Encoding specified, find out which one - data = data.chars().skip(1).collect(); - - if data.to_lowercase().starts_with("base64,") { - data = data.chars().skip(7).collect(); - String::from_utf8(decode(&data).unwrap_or(vec![])).unwrap_or(str!()) - } else if data.to_lowercase().starts_with("utf8,") { - data.chars().skip(5).collect() - } else { - str!() + let path: String = parsed_url.path().to_string(); + let comma_loc: usize = path.find(',').unwrap_or(path.len()); + + if comma_loc == path.len() { + return str!(); + } + + let meta_data: String = path.chars().take(comma_loc).collect(); + let raw_data: String = path.chars().skip(comma_loc + 1).collect(); + + let data: String = form_urlencoded::parse(raw_data.as_bytes()) + .map(|(key, val)| { + [ + key.to_string(), + if val.to_string().len() == 0 { + str!() + } else { + str!('=') + }, + val.to_string(), + ] + .concat() + }) + .collect(); + + let meta_data_items: Vec<&str> = meta_data.split(';').collect(); + let mut mime_type: &str = ""; + let mut encoding: &str = ""; + + let mut i: i8 = 0; + for item in &meta_data_items { + if i == 0 { + if item.eq_ignore_ascii_case("text/html") { + mime_type = item; + continue; } - } else if data.starts_with(",") { - // Plaintext, no encoding specified - data.chars().skip(1).collect() + } + + if item.eq_ignore_ascii_case("base64") || item.eq_ignore_ascii_case("utf8") { + encoding = item; + } + + i = i + 1; + } + + if mime_type.eq_ignore_ascii_case("text/html") { + if encoding.eq_ignore_ascii_case("base64") { + String::from_utf8(decode(&data).unwrap_or(vec![])).unwrap_or(str!()) } else { - str!() + data } } else { str!()