Merge branch 'master' into change-meta-charset-to-utf-8

pull/245/head
Sunshine 3 years ago committed by GitHub
commit 4921a70dda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

23
Cargo.lock generated

@ -27,9 +27,9 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"doc-comment 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-tree 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-tree 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"wait-timeout 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -608,6 +608,7 @@ name = "monolith"
version = "2.4.0"
dependencies = [
"assert_cmd 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)",
@ -858,24 +859,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "predicates"
version = "1.0.6"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "predicates-core"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "predicates-tree"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -1755,9 +1756,9 @@ dependencies = [
"checksum pkg-config 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
"checksum ppv-lite86 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
"checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
"checksum predicates 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "73dd9b7b200044694dfede9edf907c1ca19630908443e9447e624993700c6932"
"checksum predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fb3dbeaaf793584e29c58c7e3a82bbb3c7c06b63cea68d13b0e3cddc124104dc"
"checksum predicates-tree 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aee95d988ee893cb35c06b148c80ed2cd52c8eea927f50ba7a0be1a786aeab73"
"checksum predicates 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eeb433456c1a57cc93554dea3ce40b4c19c4057e41c55d4a0f3d84ea71c325aa"
"checksum predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "57e35a3326b75e49aa85f5dc6ec15b41108cf5aee58eabb1f274dd18b73c2451"
"checksum predicates-tree 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "15f553275e5721409451eb85e15fd9a860a6e5ab4496eb215987502b5f5391f2"
"checksum proc-macro-hack 0.5.19 (registry+https://github.com/rust-lang/crates.io-index)" = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
"checksum proc-macro2 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)" = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
"checksum quote 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"

@ -22,6 +22,7 @@ include = [
license = "Unlicense"
[dependencies]
atty = "0.2" # Used for highlighting network errors
base64 = "0.13.0"
chrono = "0.4.19" # Used for formatting creation timestamp
clap = "2.33.3"

@ -61,7 +61,7 @@ or
## Options
- `-a`: Exclude audio sources
- `-b`: Use custom base URL
- `-b`: Use custom `base URL`
- `-c`: Exclude CSS
- `-e`: Ignore network errors
- `-f`: Omit frames
@ -71,10 +71,10 @@ or
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information
- `-o`: Write output to file
- `-o`: Write output to `file`
- `-s`: Be quiet
- `-t`: Adjust network request timeout
- `-u`: Provide custom User-Agent
- `-t`: Adjust `network request timeout`
- `-u`: Provide `custom User-Agent`
- `-v`: Exclude videos
---------------------------------------------------

@ -1063,45 +1063,19 @@ pub fn walk_and_embed_assets(
if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") {
if parent_node_name == "picture" {
if options.no_images {
set_node_attr(node, "srcset", Some(str!(empty_image!())));
} else {
let srcset_full_url =
resolve_url(&url, source_attr_srcset_value).unwrap_or_default();
let srcset_url_fragment = get_url_fragment(srcset_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&srcset_full_url,
options,
depth + 1,
) {
Ok((srcset_data, srcset_final_url, srcset_media_type)) => {
let srcset_data_url = data_to_data_url(
&srcset_media_type,
&srcset_data,
&srcset_final_url,
);
let assembled_url: String = url_with_fragment(
srcset_data_url.as_str(),
srcset_url_fragment.as_str(),
);
set_node_attr(node, "srcset", Some(assembled_url));
}
Err(_) => {
if is_http_url(srcset_full_url.clone()) {
// Keep remote reference if unable to retrieve the asset
let assembled_url: String = url_with_fragment(
srcset_full_url.as_str(),
srcset_url_fragment.as_str(),
);
set_node_attr(node, "srcset", Some(assembled_url));
} else {
// Exclude non-remote URLs
set_node_attr(node, "srcset", None);
}
}
if !source_attr_srcset_value.is_empty() {
if options.no_images {
set_node_attr(node, "srcset", Some(str!(empty_image!())));
} else {
let resolved_srcset: String = embed_srcset(
cache,
client,
&url,
&source_attr_srcset_value,
options,
depth,
);
set_node_attr(node, "srcset", Some(resolved_srcset));
}
}
}
@ -1199,8 +1173,8 @@ pub fn walk_and_embed_assets(
// Empty inner content of STYLE tags
node.children.borrow_mut().clear();
} else {
for node in node.children.borrow_mut().iter_mut() {
if let NodeData::Text { ref contents } = node.data {
for child_node in node.children.borrow_mut().iter_mut() {
if let NodeData::Text { ref contents } = child_node.data {
let mut tendril = contents.borrow_mut();
let replacement = embed_css(
cache,
@ -1436,6 +1410,42 @@ pub fn walk_and_embed_assets(
}
}
}
"noscript" => {
for child_node in node.children.borrow_mut().iter_mut() {
match child_node.data {
NodeData::Text { ref contents } => {
// Get contents of the NOSCRIPT node
let mut noscript_contents = contents.borrow_mut();
// Parse contents of the NOSCRIPT node
let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents);
// Embed assets within the NOSCRIPT node
walk_and_embed_assets(
cache,
client,
&url,
&noscript_contents_dom.document,
&options,
depth,
);
// Get rid of original contents
noscript_contents.clear();
// Insert HTML containing embedded assets into the NOSCRIPT node
if let Some(html) =
get_child_node_by_name(&noscript_contents_dom.document, "html")
{
if let Some(body) = get_child_node_by_name(&html, "body") {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &body, SerializeOpts::default())
.expect("Unable to serialize DOM into buffer");
let result = String::from_utf8(buf).unwrap();
noscript_contents.push_slice(&result);
}
}
}
_ => {}
}
}
}
_ => {}
}

@ -1,4 +1,5 @@
use clap::{App, Arg};
use std::env;
#[derive(Default)]
pub struct Options {
@ -19,6 +20,7 @@ pub struct Options {
pub user_agent: Option<String>,
pub no_video: bool,
pub target: String,
pub no_color: bool,
}
const ASCII: &'static str = " \
@ -33,6 +35,8 @@ const ASCII: &'static str = " \
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
const ENV_VAR_TERM: &str = "TERM";
impl Options {
pub fn from_args() -> Options {
@ -98,6 +102,14 @@ impl Options {
}
options.no_video = app.is_present("no-video");
options.no_color =
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
if let Some(term) = env::var_os(ENV_VAR_TERM) {
if term == "dumb" {
options.no_color = true;
}
}
options
}
}

@ -14,10 +14,10 @@ mod passing {
use crate::opts::Options;
#[test]
fn replace_with_empty_images() {
fn small_medium_large() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x";
let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
@ -25,7 +25,28 @@ mod passing {
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
format!(
"{} 1x, {} 1.5x, {} 2x",
empty_image!(),
empty_image!(),
empty_image!(),
),
);
}
#[test]
fn small_medium_only_medium_has_scale() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png, medium.png 1.5x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{}, {} 1.5x", empty_image!(), empty_image!()),
);
}

@ -326,4 +326,45 @@ mod passing {
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
</noscript>\
</body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
</head>\
<body>\
<noscript>\
<img src=\"{}\">\
</noscript>\
</body>\
</html>",
empty_image!(),
)
);
}
}

@ -7,6 +7,8 @@ use std::path::Path;
use crate::opts::Options;
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
const INDENT: &'static str = " ";
const MAGIC: [[&[u8]; 2]; 18] = [
@ -32,7 +34,6 @@ const MAGIC: [[&[u8]; 2]; 18] = [
[b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"application/javascript",
"image/svg+xml",
@ -128,7 +129,18 @@ pub fn retrieve_asset(
Ok(mut response) => {
if !options.ignore_errors && response.status() != 200 {
if !options.silent {
eprintln!("Unable to retrieve {} (error: {})", &url, response.status());
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
response.status(),
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
return Err(client.get("").send().unwrap_err());

Loading…
Cancel
Save