From 145550e637522c37597c9df54a6d0af70a28a7a4 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Mon, 29 Aug 2022 20:13:28 -0400 Subject: [PATCH] rename -D to -d, allow multiple occurrences --- README.md | 2 +- src/opts.rs | 26 +++++++++++++++++++++----- src/utils.rs | 25 ++++++++----------------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index e97879f..ed3f9ff 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html - `-b`: Use custom `base URL` - `-c`: Exclude CSS - `-C`: Save document using custom `charset` - - `-D`: Allow retrieving assets only from specified `domain(s)` + - `-d`: Allow retrieving assets only from specified `domain(s)` - `-e`: Ignore network errors - `-E`: Exclude all assets located within domains specified in whitelist - `-f`: Omit frames diff --git a/src/opts.rs b/src/opts.rs index 89d59e0..5f1fbf1 100644 --- a/src/opts.rs +++ b/src/opts.rs @@ -1,4 +1,4 @@ -use clap::{App, Arg}; +use clap::{App, Arg, ArgAction}; use std::env; #[derive(Default)] @@ -52,9 +52,17 @@ impl Options { .args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'") .args_from_usage("-c, --no-css 'Removes CSS'") .args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'") - .args_from_usage("-D, --domains=[bad.org,ads.site] 'Whitelist of domains'") + .arg( + Arg::with_name("domains") + .short('d') + .long("domains") + .takes_value(true) + .value_name("DOMAINS") + .action(ArgAction::Append) + .help("Whitelist of domains"), + ) .args_from_usage("-e, --ignore-errors 'Ignore network errors'") - .args_from_usage("-E, --exclude-domains 'Treat list of specified domains as blacklist'") + .args_from_usage("-E, --exclude-domains 'Treat specified domains as blacklist'") .args_from_usage("-f, --no-frames 'Removes frames and iframes'") .args_from_usage("-F, --no-fonts 'Removes fonts'") .args_from_usage("-i, --no-images 'Removes images'") @@ -95,8 +103,16 @@ impl Options { if let Some(charset) = app.value_of("charset") { options.charset = Some(charset.to_string()); } - if let Some(domains) = app.value_of("domains") { - options.domains = Some(domains.split(",").map(|s| s.to_string()).collect()); + if let Some(domains) = app.get_many::("domains") { + let mut final_list_of_domains: Vec = Vec::new(); + let provided_arguments: Vec<&str> = domains.map(|v| v.as_str()).collect::>(); + for provided_argument in provided_arguments { + let comma_separated_domains: Vec<&str> = provided_argument.split(",").collect(); + for comma_separated_domain in comma_separated_domains { + final_list_of_domains.push(comma_separated_domain.trim().to_string()); + } + } + options.domains = Some(final_list_of_domains); } options.ignore_errors = app.is_present("ignore-errors"); options.exclude_domains = app.is_present("exclude-domains"); diff --git a/src/utils.rs b/src/utils.rs index 461a4a7..d88a42f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -266,23 +266,14 @@ pub fn retrieve_asset( "".to_string(), )) } else { - if options.exclude_domains { - if let Some(domains) = &options.domains { - if domains - .iter() - .any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim())) - { - return Err(client.get("").send().unwrap_err()); - } - } - } else { - if let Some(domains) = &options.domains { - if domains - .iter() - .any(|d| !domain_is_within_domain(url.host_str().unwrap(), &d.trim())) - { - return Err(client.get("").send().unwrap_err()); - } + if let Some(domains) = &options.domains { + let domain_matches = domains + .iter() + .any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim())); + if (options.exclude_domains && domain_matches) + || (!options.exclude_domains && !domain_matches) + { + return Err(client.get("").send().unwrap_err()); } }