diff --git a/Cargo.lock b/Cargo.lock index 6f1acc9..c9476ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -71,11 +71,40 @@ name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "bumpalo" version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "byteorder" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "bytes" version = "0.5.3" @@ -173,6 +202,14 @@ name = "difference" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "doc-comment" version = "0.3.1" @@ -210,6 +247,11 @@ dependencies = [ "serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "flate2" version = "1.0.13" @@ -308,6 +350,14 @@ dependencies = [ "slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "generic-array" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "getrandom" version = "0.1.13" @@ -581,6 +631,7 @@ dependencies = [ "cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -636,6 +687,11 @@ dependencies = [ "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "openssl" version = "0.10.26" @@ -1102,6 +1158,17 @@ dependencies = [ "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "sha2" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", + "digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "siphasher" version = "0.2.3" @@ -1267,6 +1334,11 @@ name = "try-lock" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "typenum" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unicase" version = "2.6.0" @@ -1509,7 +1581,11 @@ dependencies = [ "checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" "checksum base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d5ca2cd0adc3f48f9e9ea5a6bbdf9ccc0bfade884847e484d452414c7ccffb3" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +"checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" "checksum bumpalo 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fe2567a8d8a3aedb4e39aa39e186d5673acfd56393c6ac83b2bc5bd82f4369c" +"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" +"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" "checksum bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "10004c15deb332055f7a4a208190aed362cf9a7c2f6ab70a305fba50e1105f38" "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" "checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8" @@ -1522,11 +1598,13 @@ dependencies = [ "checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" "checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" +"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" "checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" "checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2" "checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9" "checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d" +"checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" "checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" @@ -1541,6 +1619,7 @@ dependencies = [ "checksum futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "171be33efae63c2d59e6dbba34186fe0d6394fb378069a76dfd80fdcffd43c16" "checksum futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bae52d6b29cf440e298856fec3965ee6fa71b06aa7495178615953fd669e5f9" "checksum futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c0d66274fb76985d3c62c886d1da7ac4c0903a8c9f754e8fe0f35a6a6cc39e76" +"checksum generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec" "checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407" "checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1" "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" @@ -1574,6 +1653,7 @@ dependencies = [ "checksum new_debug_unreachable 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f40f005c60db6e03bae699e414c58bf9aa7ea02a2d0b9bfbcf19286cc4c82b30" "checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" "checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72" +"checksum opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" "checksum openssl 0.10.26 (registry+https://github.com/rust-lang/crates.io-index)" = "3a3cc5799d98e1088141b8e01ff760112bbd9f19d850c124500566ca6901a585" "checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" "checksum openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)" = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f" @@ -1626,6 +1706,7 @@ dependencies = [ "checksum serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0" "checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043" "checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97" +"checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0" "checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" "checksum siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23" "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" @@ -1646,6 +1727,7 @@ dependencies = [ "checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860" "checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41" "checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382" +"checksum typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" "checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" "checksum unicode-normalization 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf" diff --git a/Cargo.toml b/Cargo.toml index 08f51e2..f63fe01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,12 +16,10 @@ base64 = "0.12.0" clap = "2.33.0" cssparser = "0.27.2" html5ever = "0.24.1" +sha2 = "0.8.1" # Used in calculating checksums during integrity checks +time = "0.1.42" # Used to render comments indicating the time the page was saved url = "2.1.1" -# Used to render comments indicating the time the page was saved -# also required by reqwest as of v0.10.0 -time = "0.1.42" - [dependencies.reqwest] version = "0.10.*" default-features = false diff --git a/src/css.rs b/src/css.rs index ed84ecc..3357a56 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,7 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token}; use reqwest::blocking::Client; use std::collections::HashMap; -use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset}; +use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset}; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ // Universal @@ -169,40 +169,39 @@ pub fn process_css<'a>( continue; } - let full_url = resolve_url(&parent_url, value).unwrap_or_default(); - let url_fragment = get_url_fragment(full_url.clone()); - let (css, final_url) = retrieve_asset( - cache, - client, - &parent_url, - &full_url, - false, - "", - opt_silent, - ) - .unwrap_or_default(); - - result.push_str( - enquote( - data_to_data_url( - "text/css", - embed_css( - cache, - client, - final_url.as_str(), - &css, - opt_no_fonts, - opt_no_images, - opt_silent, + let import_full_url = resolve_url(&parent_url, value).unwrap_or_default(); + let import_url_fragment = get_url_fragment(import_full_url.clone()); + match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) { + Ok((import_contents, import_final_url, _import_media_type)) => { + result.push_str( + enquote( + data_to_data_url( + "text/css", + embed_css( + cache, + client, + &import_final_url, + &String::from_utf8_lossy(&import_contents), + opt_no_fonts, + opt_no_images, + opt_silent, + ) + .as_bytes(), + &import_final_url, + &import_url_fragment, + ), + false, ) - .as_bytes(), - &final_url, - url_fragment.as_str(), - ), - false, - ) - .as_str(), - ); + .as_str(), + ); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(import_full_url.clone()) { + result.push_str(enquote(import_full_url, false).as_str()); + } + } + } } else { if func_name == "url" { // Skip empty url()'s @@ -214,17 +213,30 @@ pub fn process_css<'a>( result.push_str(enquote(str!(empty_image!()), false).as_str()); } else { let resolved_url = resolve_url(&parent_url, value).unwrap_or_default(); - let (data_url, _final_url) = retrieve_asset( + let url_fragment = get_url_fragment(resolved_url.clone()); + match retrieve_asset( cache, client, &parent_url, &resolved_url, - true, - "", opt_silent, - ) - .unwrap_or_default(); - result.push_str(enquote(data_url, false).as_str()); + ) { + Ok((data, final_url, media_type)) => { + let data_url = data_to_data_url( + &media_type, + &data, + &final_url, + &url_fragment, + ); + result.push_str(enquote(data_url, false).as_str()); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(resolved_url.clone()) { + result.push_str(enquote(resolved_url, false).as_str()); + } + } + } } } else { result.push_str(enquote(str!(value), false).as_str()); @@ -293,54 +305,51 @@ pub fn process_css<'a>( if is_import { let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let url_fragment = get_url_fragment(full_url.clone()); - let (css, final_url) = retrieve_asset( - cache, - client, - &parent_url, - &full_url, - false, - "", - opt_silent, - ) - .unwrap_or_default(); - - result.push_str( - enquote( - data_to_data_url( + match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) { + Ok((css, final_url, _media_type)) => { + let data_url = data_to_data_url( "text/css", embed_css( cache, client, - final_url.as_str(), - &css, + &final_url, + &String::from_utf8_lossy(&css), opt_no_fonts, opt_no_images, opt_silent, ) .as_bytes(), &final_url, - url_fragment.as_str(), - ), - false, - ) - .as_str(), - ); + &url_fragment, + ); + result.push_str(enquote(data_url, false).as_str()); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(full_url.clone()) { + result.push_str(enquote(full_url, false).as_str()); + } + } + } } else { if opt_no_images && is_image_url_prop(curr_prop.as_str()) { result.push_str(enquote(str!(empty_image!()), false).as_str()); } else { let full_url = resolve_url(&parent_url, value).unwrap_or_default(); - let (data_url, _final_url) = retrieve_asset( - cache, - client, - &parent_url, - &full_url, - true, - "", - opt_silent, - ) - .unwrap_or_default(); - result.push_str(enquote(data_url, false).as_str()); + let url_fragment = get_url_fragment(full_url.clone()); + match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) { + Ok((data, final_url, media_type)) => { + let data_url = + data_to_data_url(&media_type, &data, &final_url, &url_fragment); + result.push_str(enquote(data_url, false).as_str()); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(full_url.clone()) { + result.push_str(enquote(full_url, false).as_str()); + } + } + } } } result.push_str(")"); diff --git a/src/html.rs b/src/html.rs index dd4e025..d1b8330 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,6 +1,9 @@ use crate::css::embed_css; use crate::js::attr_is_event_handler; -use crate::utils::{data_to_data_url, is_http_url, resolve_url, retrieve_asset, url_has_protocol}; +use crate::utils::{ + data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_has_protocol, +}; +use base64; use html5ever::interface::QualName; use html5ever::parse_document; use html5ever::rcdom::{Handle, NodeData, RcDom}; @@ -9,6 +12,7 @@ use html5ever::tendril::{format_tendril, Tendril, TendrilSink}; use html5ever::tree_builder::{Attribute, TreeSink}; use html5ever::{local_name, namespace_url, ns}; use reqwest::blocking::Client; +use sha2::{Digest, Sha256, Sha384, Sha512}; use std::collections::HashMap; use std::default::Default; @@ -36,6 +40,24 @@ pub fn is_icon(attr_value: &str) -> bool { ICON_VALUES.contains(&attr_value.to_lowercase().as_str()) } +pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool { + if integrity.starts_with("sha256-") { + let mut hasher = Sha256::new(); + hasher.input(data); + base64::encode(hasher.result()) == integrity[7..] + } else if integrity.starts_with("sha384-") { + let mut hasher = Sha384::new(); + hasher.input(data); + base64::encode(hasher.result()) == integrity[7..] + } else if integrity.starts_with("sha512-") { + let mut hasher = Sha512::new(); + hasher.input(data); + base64::encode(hasher.result()) == integrity[7..] + } else { + false + } +} + pub fn walk_and_embed_assets( cache: &mut HashMap>, client: &Client, @@ -75,12 +97,13 @@ pub fn walk_and_embed_assets( match name.local.as_ref() { "link" => { - // Remove integrity attributes + // Remove integrity attributes, keep value of the last one + let mut integrity: String = str!(); let mut i = 0; while i < attrs_mut.len() { - let attr_name = attrs_mut[i].name.local.as_ref(); + let attr_name: &str = &attrs_mut[i].name.local; if attr_name.eq_ignore_ascii_case("integrity") { - attrs_mut.remove(i); + integrity = str!(attrs_mut.remove(i).value.trim()); } else { i += 1; } @@ -117,93 +140,166 @@ pub fn walk_and_embed_assets( match link_type { LinkType::Icon => { - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "href" { - if opt_no_images { - attr.value.clear(); - } else { - let href_full_url = resolve_url(&url, attr.value.as_ref()) - .unwrap_or_default(); - let (favicon_data_url, _) = retrieve_asset( - cache, - client, - &url, - &href_full_url, - true, - "", - opt_silent, - ) - .unwrap_or_default(); - attr.value.clear(); - attr.value.push_slice(favicon_data_url.as_str()); + // Find and remove href attribute(s), keep value of the last found one + let mut link_href: String = str!(); + let mut i = 0; + while i < attrs_mut.len() { + let attr_name: &str = &attrs_mut[i].name.local; + if attr_name.eq_ignore_ascii_case("href") { + link_href = str!(attrs_mut.remove(i).value.trim()); + } else { + i += 1; + } + } + + if !opt_no_images && !link_href.is_empty() { + let link_href_full_url = + resolve_url(&url, link_href).unwrap_or_default(); + let link_href_url_fragment = + get_url_fragment(link_href_full_url.clone()); + match retrieve_asset( + cache, + client, + &url, + &link_href_full_url, + opt_silent, + ) { + Ok(( + link_href_data, + link_href_final_url, + link_href_media_type, + )) => { + // Check integrity + if integrity.is_empty() + || has_proper_integrity(&link_href_data, &integrity) + { + let link_href_data_url = data_to_data_url( + &link_href_media_type, + &link_href_data, + &link_href_final_url, + &link_href_url_fragment, + ); + // Add new data URL href attribute + attrs_mut.push(Attribute { + name: QualName::new( + None, + ns!(), + local_name!("href"), + ), + value: Tendril::from_slice( + link_href_data_url.as_ref(), + ), + }); + } + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(link_href_full_url.clone()) { + attrs_mut.push(Attribute { + name: QualName::new( + None, + ns!(), + local_name!("href"), + ), + value: Tendril::from_slice( + link_href_full_url.as_ref(), + ), + }); + } } } } } LinkType::Stylesheet => { - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "href" { - if opt_no_css { - attr.value.clear(); - } else { - let href_full_url = resolve_url(&url, &attr.value.as_ref()) - .unwrap_or_default(); - let replacement_text = match retrieve_asset( - cache, - client, - &url, - &href_full_url, - false, - "text/css", - opt_silent, - ) { - // On successful retrieval, traverse CSS - Ok((css_data, final_url)) => { - let css: String = embed_css( - cache, - client, - &final_url, - &css_data, - opt_no_fonts, - opt_no_images, - opt_silent, - ); - data_to_data_url( - "text/css", - css.as_bytes(), - &final_url, - "", - ) - } - - // If a network error occured, warn - Err(e) => { - eprintln!("Warning: {}", e); - - // If failed to resolve, replace with absolute URL - href_full_url - } - }; + // Find and remove href attribute(s), keep value of the last found one + let mut link_href: String = str!(); + let mut i = 0; + while i < attrs_mut.len() { + let attr_name: &str = &attrs_mut[i].name.local; + if attr_name.eq_ignore_ascii_case("href") { + link_href = str!(attrs_mut.remove(i).value.trim()); + } else { + i += 1; + } + } - attr.value.clear(); - attr.value.push_slice(&replacement_text); + if !opt_no_css && !link_href.is_empty() { + let link_href_full_url = + resolve_url(&url, link_href).unwrap_or_default(); + match retrieve_asset( + cache, + client, + &url, + &link_href_full_url, + opt_silent, + ) { + Ok(( + link_href_data, + link_href_final_url, + _link_href_media_type, + )) => { + // Check integrity + if integrity.is_empty() + || has_proper_integrity(&link_href_data, &integrity) + { + let css: String = embed_css( + cache, + client, + &link_href_final_url, + &String::from_utf8_lossy(&link_href_data), + opt_no_fonts, + opt_no_images, + opt_silent, + ); + let link_href_data_url = data_to_data_url( + "text/css", + css.as_bytes(), + &link_href_final_url, + "", + ); + // Add new data URL href attribute + attrs_mut.push(Attribute { + name: QualName::new( + None, + ns!(), + local_name!("href"), + ), + value: Tendril::from_slice( + link_href_data_url.as_ref(), + ), + }); + } + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(link_href_full_url.clone()) { + attrs_mut.push(Attribute { + name: QualName::new( + None, + ns!(), + local_name!("href"), + ), + value: Tendril::from_slice( + link_href_full_url.as_ref(), + ), + }); + } } } } } LinkType::Preload | LinkType::DnsPrefetch => { // Since all resources are embedded as data URL, preloading and prefetching are unnecessary - if let Some(attr) = - attrs_mut.iter_mut().find(|a| &a.name.local == "href") - { - attr.value.clear(); + for _ in 0..attrs_mut.len() { + attrs_mut.remove(0); } } LinkType::Unknown => { for attr in attrs_mut.iter_mut() { - if &attr.name.local == "href" { + let attr_name: &str = &attr.name.local; + if attr_name.eq_ignore_ascii_case("href") { let href_full_url = - resolve_url(&url, attr.value.as_ref()).unwrap_or_default(); + resolve_url(&url, attr.value.trim()).unwrap_or_default(); attr.value.clear(); attr.value.push_slice(&href_full_url.as_str()); } @@ -212,186 +308,236 @@ pub fn walk_and_embed_assets( } } "body" => { - // Find and remove background attribute(s), keep reference to the last one - let mut found_background: Option = None; + // Find and remove background attribute(s), keep value of the last found one + let mut background: String = str!(); let mut i = 0; while i < attrs_mut.len() { - let attr_name = attrs_mut[i].name.local.as_ref(); + let attr_name: &str = &attrs_mut[i].name.local; if attr_name.eq_ignore_ascii_case("background") { - found_background = Some(attrs_mut.remove(i)); + background = str!(attrs_mut.remove(i).value.trim()); } else { i += 1; } } - if !opt_no_images { - if let Some((data_url, _)) = found_background - .iter() - .map(|attr| attr.value.trim()) - .filter(|background| !background.is_empty()) // Skip if empty - .next() - .and_then(|background| resolve_url(&url, background).ok()) // Make absolute - .and_then(|abs_src| // Download and convert to data_url - retrieve_asset( - cache, - client, - &url, - &abs_src, - true, - "", - opt_silent, - ).ok()) + if !opt_no_images && !background.is_empty() { + let background_full_url = resolve_url(&url, background).unwrap_or_default(); + let background_url_fragment = get_url_fragment(background_full_url.clone()); + match retrieve_asset(cache, client, &url, &background_full_url, opt_silent) { - // Add new data_url background attribute - attrs_mut.push(Attribute { - name: QualName::new(None, ns!(), local_name!("background")), - value: Tendril::from_slice(data_url.as_ref()), - }); + Ok((background_data, background_final_url, background_media_type)) => { + let background_data_url = data_to_data_url( + &background_media_type, + &background_data, + &background_final_url, + &background_url_fragment, + ); + // Add new data URL background attribute + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("background")), + value: Tendril::from_slice(background_data_url.as_ref()), + }); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(background_full_url.clone()) { + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("background")), + value: Tendril::from_slice(background_full_url.as_ref()), + }); + } + } } } } "img" => { // Find source attribute(s) - let mut found_src: Option = None; - let mut found_datasrc: Option = None; + let mut img_src: String = str!(); + let mut img_data_src: String = str!(); let mut i = 0; while i < attrs_mut.len() { - let attr_name = attrs_mut[i].name.local.as_ref(); + let attr_name: &str = &attrs_mut[i].name.local; if attr_name.eq_ignore_ascii_case("src") { - found_src = Some(attrs_mut.remove(i)); + img_src = str!(attrs_mut.remove(i).value.trim()); } else if attr_name.eq_ignore_ascii_case("data-src") { - found_datasrc = Some(attrs_mut.remove(i)); + img_data_src = str!(attrs_mut.remove(i).value.trim()); } else { i += 1; } } - // If images are disabled, clear both sources if opt_no_images { + // Add empty image src attribute attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("src")), value: Tendril::from_slice(empty_image!()), }); - } else if let Some((data_url, _)) = found_datasrc - .iter() - .chain(&found_src) // Give data_url priority - .map(|attr| attr.value.trim()) - .filter(|src| !src.is_empty()) // Skip if empty - .next() - .and_then(|src| resolve_url(&url, src).ok()) // Make absolute - .and_then(|abs_src| // Download and convert to data_url - retrieve_asset( - cache, - client, + } else { + if img_src.is_empty() && img_data_src.is_empty() { + // Add empty src attribute + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(""), + }); + } else { + // Add data URL src attribute + let img_full_url = resolve_url( &url, - &abs_src, - true, - "", - opt_silent, - ).ok()) - { - // Add new data_url src attribute - attrs_mut.push(Attribute { - name: QualName::new(None, ns!(), local_name!("src")), - value: Tendril::from_slice(data_url.as_ref()), - }); + if !img_data_src.is_empty() { + img_data_src + } else { + img_src + }, + ) + .unwrap_or_default(); + let img_url_fragment = get_url_fragment(img_full_url.clone()); + match retrieve_asset(cache, client, &url, &img_full_url, opt_silent) { + Ok((img_data, img_final_url, img_media_type)) => { + let img_data_url = data_to_data_url( + &img_media_type, + &img_data, + &img_final_url, + &img_url_fragment, + ); + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(img_data_url.as_ref()), + }); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(img_full_url.clone()) { + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(img_full_url.as_ref()), + }); + } + } + } + } + } + } + "svg" => { + if opt_no_images { + node.children.borrow_mut().clear(); } } "input" => { - let mut is_image: bool = false; + // Determine input type + let mut is_image_input: bool = false; for attr in attrs_mut.iter_mut() { let attr_name: &str = &attr.name.local; - if attr_name == "type" { - is_image = attr.value.to_string().eq_ignore_ascii_case("image"); + if attr_name.eq_ignore_ascii_case("type") { + is_image_input = attr.value.to_string().eq_ignore_ascii_case("image"); } } - if is_image { - let mut found_src: Option = None; + if is_image_input { + let mut input_image_src: String = str!(); let mut i = 0; while i < attrs_mut.len() { - let attr_name = attrs_mut[i].name.local.as_ref(); + let attr_name: &str = &attrs_mut[i].name.local; if attr_name.eq_ignore_ascii_case("src") { - found_src = Some(attrs_mut.remove(i)); + input_image_src = str!(attrs_mut.remove(i).value.trim()); } else { i += 1; } } - // If images are disabled, clear both sources - if opt_no_images { - attrs_mut.push(Attribute { - name: QualName::new(None, ns!(), local_name!("src")), - value: Tendril::from_slice(empty_image!()), - }); - } else if let Some((data_url, _)) = found_src - .iter() - .map(|attr| attr.value.trim()) - .filter(|src| !src.is_empty()) // Skip if empty - .next() - .and_then(|src| resolve_url(&url, src).ok()) // Make absolute - .and_then(|abs_src| // Download and convert to data_url - retrieve_asset( - cache, - client, - &url, - &abs_src, - true, - "", - opt_silent, - ).ok()) - { - // Add new data_url src attribute + if opt_no_images || input_image_src.is_empty() { attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("src")), - value: Tendril::from_slice(data_url.as_ref()), + value: Tendril::from_slice(if input_image_src.is_empty() { + "" + } else { + empty_image!() + }), }); + } else { + let input_image_full_url = + resolve_url(&url, input_image_src).unwrap_or_default(); + let input_image_url_fragment = + get_url_fragment(input_image_full_url.clone()); + match retrieve_asset( + cache, + client, + &url, + &input_image_full_url, + opt_silent, + ) { + Ok(( + input_image_data, + input_image_final_url, + input_image_media_type, + )) => { + let input_image_data_url = data_to_data_url( + &input_image_media_type, + &input_image_data, + &input_image_final_url, + &input_image_url_fragment, + ); + // Add data URL src attribute + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(input_image_data_url.as_ref()), + }); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(input_image_full_url.clone()) { + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice( + input_image_full_url.as_ref(), + ), + }); + } + } + } } } } - "svg" => { - if opt_no_images { - node.children.borrow_mut().clear(); - } - } "image" => { - // Find and remove (xlink:)href attribute(s), keep reference to the last one - let mut image_href: Option = None; + // Find and remove (xlink:)href attribute(s), keep value of the last one + let mut image_href: String = str!(); let mut i = 0; while i < attrs_mut.len() { - let attr_name = attrs_mut[i].name.local.as_ref(); + let attr_name: &str = &attrs_mut[i].name.local; if attr_name.eq_ignore_ascii_case("xlink:href") || attr_name.eq_ignore_ascii_case("href") { - image_href = Some(attrs_mut.remove(i)); + image_href = str!(attrs_mut.remove(i).value.trim()); } else { i += 1; } } - if !opt_no_images { - if let Some((data_url, _)) = image_href - .iter() - .map(|attr| attr.value.trim()) - .filter(|href| !href.is_empty()) // Skip if empty - .next() - .and_then(|href| resolve_url(&url, href).ok()) // Make absolute - .and_then(|abs_href| // Download and convert to data_url - retrieve_asset( - cache, - client, - &url, - &abs_href, - true, - "", - opt_silent, - ).ok()) - { - // Add new data_url href attribute - attrs_mut.push(Attribute { - name: QualName::new(None, ns!(), local_name!("href")), - value: Tendril::from_slice(data_url.as_ref()), - }); + if !opt_no_images && !image_href.is_empty() { + let image_full_url = resolve_url(&url, image_href).unwrap_or_default(); + let image_url_fragment = get_url_fragment(image_full_url.clone()); + match retrieve_asset(cache, client, &url, &image_full_url, opt_silent) { + Ok((image_data, image_final_url, image_media_type)) => { + let image_data_url = data_to_data_url( + &image_media_type, + &image_data, + &image_final_url, + &image_url_fragment, + ); + // Add new data URL href attribute + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("href")), + value: Tendril::from_slice(image_data_url.as_ref()), + }); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(image_full_url.clone()) { + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("href")), + value: Tendril::from_slice(image_full_url.as_ref()), + }); + } + } } } } @@ -399,12 +545,12 @@ pub fn walk_and_embed_assets( for attr in attrs_mut.iter_mut() { let attr_name: &str = &attr.name.local; - if attr_name == "src" { + if attr_name.eq_ignore_ascii_case("src") { let src_full_url = resolve_url(&url, attr.value.trim()) .unwrap_or_else(|_| attr.value.to_string()); attr.value.clear(); attr.value.push_slice(src_full_url.as_str()); - } else if attr_name == "srcset" { + } else if attr_name.eq_ignore_ascii_case("srcset") { if get_node_name(&get_parent_node(&node)) == Some("picture") { if opt_no_images { attr.value.clear(); @@ -412,18 +558,38 @@ pub fn walk_and_embed_assets( } else { let srcset_full_url = resolve_url(&url, attr.value.trim()).unwrap_or_default(); - let (source_data_url, _) = retrieve_asset( + let srcset_url_fragment = + get_url_fragment(srcset_full_url.clone()); + match retrieve_asset( cache, client, &url, &srcset_full_url, - true, - "", opt_silent, - ) - .unwrap_or((str!(), str!())); - attr.value.clear(); - attr.value.push_slice(source_data_url.as_str()); + ) { + Ok((srcset_data, srcset_final_url, srcset_media_type)) => { + let srcset_data_url = data_to_data_url( + &srcset_media_type, + &srcset_data, + &srcset_final_url, + &srcset_url_fragment, + ); + attr.value.clear(); + attr.value.push_slice(srcset_data_url.as_str()); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(srcset_full_url.clone()) { + attr.value.clear(); + attr.value.push_slice(srcset_full_url.as_str()); + if !srcset_url_fragment.is_empty() { + attr.value.push_slice("#"); + attr.value + .push_slice(srcset_url_fragment.as_str()); + } + } + } + } } } } @@ -431,7 +597,8 @@ pub fn walk_and_embed_assets( } "a" | "area" => { for attr in attrs_mut.iter_mut() { - if &attr.name.local == "href" { + let attr_name: &str = &attr.name.local; + if attr_name.eq_ignore_ascii_case("href") { let attr_value = attr.value.trim(); if opt_no_js && attr_value.starts_with("javascript:") { @@ -453,44 +620,55 @@ pub fn walk_and_embed_assets( } } "script" => { - // Remove integrity attributes + // Remove integrity and src attributes, keep values of the last ones + let mut script_integrity: String = str!(); + let mut script_src: String = str!(); let mut i = 0; while i < attrs_mut.len() { - let attr_name = attrs_mut[i].name.local.as_ref(); + let attr_name: &str = &attrs_mut[i].name.local; if attr_name.eq_ignore_ascii_case("integrity") { - attrs_mut.remove(i); + script_integrity = str!(attrs_mut.remove(i).value.trim()); + } else if attr_name.eq_ignore_ascii_case("src") { + script_src = str!(attrs_mut.remove(i).value.trim()); } else { i += 1; } } if opt_no_js { - // Empty src and inner content of SCRIPT tags - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "src" { - attr.value.clear(); - } - } + // Empty inner content (src is already gone) node.children.borrow_mut().clear(); - } else { - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "src" { - let src_full_url = - resolve_url(&url, attr.value.trim()).unwrap_or_default(); - let (js_data_url, _) = retrieve_asset( - cache, - client, - &url, - &src_full_url, - true, - "application/javascript", - opt_silent, - ) - .unwrap_or((str!(), str!())); - attr.value.clear(); - attr.value.push_slice(js_data_url.as_str()); + } else if !script_src.is_empty() { + let script_full_url = resolve_url(&url, script_src).unwrap_or_default(); + match retrieve_asset(cache, client, &url, &script_full_url, opt_silent) { + Ok((script_data, script_final_url, _script_media_type)) => { + // Only embed if we're able to validate integrity + if script_integrity.is_empty() + || has_proper_integrity(&script_data, &script_integrity) + { + let script_data_url = data_to_data_url( + "application/javascript", + &script_data, + &script_final_url, + "", + ); + // Add new data URL src attribute + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(script_data_url.as_ref()), + }); + } } - } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(script_full_url.clone()) { + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(script_full_url.as_ref()), + }); + } + } + }; } } "style" => { @@ -518,21 +696,23 @@ pub fn walk_and_embed_assets( } "form" => { for attr in attrs_mut.iter_mut() { - if &attr.name.local == "action" { - let attr_value = attr.value.trim(); - // Modify action to be a full URL - if !is_http_url(attr_value) { - let href_full_url = - resolve_url(&url, attr_value).unwrap_or_default(); + let attr_name: &str = &attr.name.local; + if attr_name.eq_ignore_ascii_case("action") { + let form_action = attr.value.trim(); + // Modify action property to ensure it's a full URL + if !is_http_url(form_action) { + let form_action_full_url = + resolve_url(&url, form_action).unwrap_or_default(); attr.value.clear(); - attr.value.push_slice(href_full_url.as_str()); + attr.value.push_slice(form_action_full_url.as_str()); } } } } "frame" | "iframe" => { for attr in attrs_mut.iter_mut() { - if &attr.name.local == "src" { + let attr_name: &str = &attr.name.local; + if attr_name.eq_ignore_ascii_case("src") { if opt_no_frames { // Empty the src attribute attr.value.clear(); @@ -546,65 +726,99 @@ pub fn walk_and_embed_assets( continue; } - let src_full_url = resolve_url(&url, frame_src).unwrap_or_default(); - let (frame_data, frame_final_url) = retrieve_asset( - cache, - client, - &url, - &src_full_url, - false, - "text/html", - opt_silent, - ) - .unwrap_or((str!(), src_full_url)); - let dom = html_to_dom(&frame_data); - walk_and_embed_assets( - cache, - client, - &frame_final_url, - &dom.document, - opt_no_css, - opt_no_fonts, - opt_no_frames, - opt_no_js, - opt_no_images, - opt_silent, - ); - let mut buf: Vec = Vec::new(); - serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); - let iframe_data_url = data_to_data_url("text/html", &buf, "", ""); - attr.value.clear(); - attr.value.push_slice(iframe_data_url.as_str()); + let frame_full_url = resolve_url(&url, frame_src).unwrap_or_default(); + let frame_url_fragment = get_url_fragment(frame_full_url.clone()); + match retrieve_asset(cache, client, &url, &frame_full_url, opt_silent) { + Ok((frame_data, frame_final_url, frame_media_type)) => { + let frame_dom = + html_to_dom(&String::from_utf8_lossy(&frame_data)); + walk_and_embed_assets( + cache, + client, + &frame_final_url, + &frame_dom.document, + opt_no_css, + opt_no_fonts, + opt_no_frames, + opt_no_js, + opt_no_images, + opt_silent, + ); + let mut frame_data: Vec = Vec::new(); + serialize( + &mut frame_data, + &frame_dom.document, + SerializeOpts::default(), + ) + .unwrap(); + let frame_data_url = data_to_data_url( + &frame_media_type, + &frame_data, + &frame_final_url, + &frame_url_fragment, + ); + attr.value.clear(); + attr.value.push_slice(frame_data_url.as_str()); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(frame_full_url.clone()) { + attr.value.clear(); + attr.value.push_slice(frame_full_url.as_str()); + } + } + } } } } "video" => { for attr in attrs_mut.iter_mut() { - if &attr.name.local == "poster" { - let video_poster = attr.value.trim(); + let attr_name: &str = &attr.name.local; + if attr_name.eq_ignore_ascii_case("poster") { + let video_poster_url = attr.value.trim(); // Skip posters with empty source - if video_poster.is_empty() { + if video_poster_url.is_empty() { continue; } if opt_no_images { attr.value.clear(); - } else { - let poster_full_url = - resolve_url(&url, video_poster).unwrap_or_default(); - let (poster_data_url, _) = retrieve_asset( - cache, - client, - &url, - &poster_full_url, - true, - "", - opt_silent, - ) - .unwrap_or((poster_full_url, str!())); - attr.value.clear(); - attr.value.push_slice(poster_data_url.as_str()); + continue; + } + + let video_poster_full_url = + resolve_url(&url, video_poster_url).unwrap_or_default(); + let video_poster_url_fragment = + get_url_fragment(video_poster_full_url.clone()); + match retrieve_asset( + cache, + client, + &url, + &video_poster_full_url, + opt_silent, + ) { + Ok(( + video_poster_data, + video_poster_final_url, + video_poster_media_type, + )) => { + let video_poster_data_url = data_to_data_url( + &video_poster_media_type, + &video_poster_data, + &video_poster_final_url, + &video_poster_url_fragment, + ); + attr.value.clear(); + attr.value.push_slice(video_poster_data_url.as_str()); + } + Err(_) => { + // Keep remote reference if unable to retrieve the asset + if is_http_url(video_poster_full_url.clone()) { + attr.value.clear(); + attr.value.push_slice(video_poster_full_url.as_str()); + } + } } } } @@ -615,16 +829,15 @@ pub fn walk_and_embed_assets( // Process style attributes if opt_no_css { // Get rid of style attributes - let mut style_attr_indexes = Vec::new(); - for (i, attr) in attrs_mut.iter_mut().enumerate() { - if attr.name.local.as_ref().eq_ignore_ascii_case("style") { - style_attr_indexes.push(i); + let mut i = 0; + while i < attrs_mut.len() { + let attr_name: &str = &attrs_mut[i].name.local; + if attr_name.eq_ignore_ascii_case("style") { + attrs_mut.remove(i); + } else { + i += 1; } } - style_attr_indexes.reverse(); - for attr_index in style_attr_indexes { - attrs_mut.remove(attr_index); - } } else { // Otherwise, parse any links found in the attributes for attribute in attrs_mut diff --git a/src/main.rs b/src/main.rs index 2daf966..5e095f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; -use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset}; +use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset}; use reqwest::blocking::Client; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::Url; @@ -110,26 +110,24 @@ fn main() { // Retrieve root document if is_file_url(target_url) || is_http_url(target_url) { - let (data, final_url) = retrieve_asset( - &mut cache, - &client, - target_url, - target_url, - false, - "", - app_args.silent, - ) - .expect("Could not retrieve target document"); - base_url = final_url; - dom = html_to_dom(&data); + match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) { + Ok((data, final_url, _media_type)) => { + base_url = final_url; + dom = html_to_dom(&String::from_utf8_lossy(&data)); + } + Err(_) => { + eprintln!("Could not retrieve target document"); + process::exit(1); + } + } } else if is_data_url(target_url) { - let (media_type, text): (String, String) = data_url_to_text(target_url); + let (media_type, data): (String, Vec) = data_url_to_data(target_url); if !media_type.eq_ignore_ascii_case("text/html") { eprintln!("Unsupported data URL media type"); process::exit(1); } base_url = str!(target_url); - dom = html_to_dom(&text); + dom = html_to_dom(&String::from_utf8_lossy(&data)); } else { process::exit(1); } @@ -163,8 +161,8 @@ fn main() { let mut clean_url = Url::parse(&base_url).unwrap(); clean_url.set_fragment(None); // Don't include credentials - clean_url.set_username(""); - clean_url.set_password(None); + clean_url.set_username("").unwrap(); + clean_url.set_password(None).unwrap(); let metadata_comment = if is_http_url(&base_url) { format!( "\n", diff --git a/src/tests/cli.rs b/src/tests/cli.rs index f541a67..905823a 100644 --- a/src/tests/cli.rs +++ b/src/tests/cli.rs @@ -242,9 +242,9 @@ fn passing_local_file_target_input() -> Result<(), Box> { \n \ \n \ Local HTML file\n \ -\n \ -\n\n\n\n \ -\"\"\n \ +\n \ +\n\n\n\n \ +\"\"\n \ Tricky href\n \ Remote URL\n \ \n\n\n\n\ @@ -306,12 +306,12 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box\n \ \n \ Local HTML file\n \ -\n \ -\n\n\n\n \ +\n \ +\n\n\n\n \ \"\"\n \ Tricky href\n \ Remote URL\n \ -\n\n\n\n\ +\n\n\n\n\ \n\ ", empty_image = empty_image!() @@ -368,12 +368,12 @@ fn passing_local_file_url_target_input() -> Result<(), Box\n \ \n \ Local HTML file\n \ -\n \ -\n\n\n\n \ +\n \ +\n\n\n\n \ \"\"\n \ Tricky href\n \ Remote URL\n \ -\n\n\n\n\ +\n\n\n\n\ \n\ ", empty_image = empty_image!() @@ -417,7 +417,7 @@ fn passing_security_disallow_local_assets_within_data_url_targets( // STDOUT should contain HTML with no JS in it assert_eq!( std::str::from_utf8(&out.stdout).unwrap(), - "\n" + "\n" ); // STDERR should be empty diff --git a/src/tests/html/has_proper_integrity.rs b/src/tests/html/has_proper_integrity.rs new file mode 100644 index 0000000..639bf45 --- /dev/null +++ b/src/tests/html/has_proper_integrity.rs @@ -0,0 +1,92 @@ +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod passing { + use crate::html; + + #[test] + fn empty_input_sha256() { + assert!(html::has_proper_integrity( + "".as_bytes(), + "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=" + )); + } + + #[test] + fn sha256() { + assert!(html::has_proper_integrity( + "abcdef0123456789".as_bytes(), + "sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM=" + )); + } + + #[test] + fn sha384() { + assert!(html::has_proper_integrity( + "abcdef0123456789".as_bytes(), + "sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw" + )); + } + + #[test] + fn sha512() { + assert!(html::has_proper_integrity( + "abcdef0123456789".as_bytes(), + "sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww==" + )); + } +} + +// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ +// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ +// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ +// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod failing { + use crate::html; + + #[test] + fn empty_hash() { + assert!(!html::has_proper_integrity( + "abcdef0123456789".as_bytes(), + "" + )); + } + + #[test] + fn empty_input_empty_hash() { + assert!(!html::has_proper_integrity("".as_bytes(), "")); + } + + #[test] + fn sha256() { + assert!(!html::has_proper_integrity( + "abcdef0123456789".as_bytes(), + "sha256-badhash" + )); + } + + #[test] + fn sha384() { + assert!(!html::has_proper_integrity( + "abcdef0123456789".as_bytes(), + "sha384-badhash" + )); + } + + #[test] + fn sha512() { + assert!(!html::has_proper_integrity( + "abcdef0123456789".as_bytes(), + "sha512-badhash" + )); + } +} diff --git a/src/tests/html/mod.rs b/src/tests/html/mod.rs index bacc18a..a084e1c 100644 --- a/src/tests/html/mod.rs +++ b/src/tests/html/mod.rs @@ -1,4 +1,5 @@ mod get_node_name; +mod has_proper_integrity; mod is_icon; mod stringify_document; mod walk_and_embed_assets; diff --git a/src/tests/html/walk_and_embed_assets.rs b/src/tests/html/walk_and_embed_assets.rs index b41beb2..00397e5 100644 --- a/src/tests/html/walk_and_embed_assets.rs +++ b/src/tests/html/walk_and_embed_assets.rs @@ -162,7 +162,7 @@ fn passing_no_css() { buf.iter().map(|&c| c as char).collect::(), "\ \ - \ + \ \ \ \ @@ -210,7 +210,7 @@ fn passing_no_images() { format!( "\ \ - \ + \ \ \
\ @@ -372,7 +372,7 @@ fn passing_no_js() { assert_eq!( buf.iter().map(|&c| c as char).collect::(), - "
\ + "
\
" ); } @@ -412,7 +412,7 @@ fn passing_with_no_integrity() { assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ - No integrity\ + No integrity\ \ " ); diff --git a/src/tests/utils/data_url_to_text.rs b/src/tests/utils/data_url_to_data.rs similarity index 78% rename from src/tests/utils/data_url_to_text.rs rename to src/tests/utils/data_url_to_data.rs index 815a7d6..b239574 100644 --- a/src/tests/utils/data_url_to_text.rs +++ b/src/tests/utils/data_url_to_data.rs @@ -9,74 +9,74 @@ use crate::utils; #[test] fn passing_parse_text_html_base64() { - let (media_type, text) = utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); + let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); assert_eq!(media_type, "text/html"); assert_eq!( - text, + String::from_utf8_lossy(&data), "Work expands so as to fill the time available for its completion" ); } #[test] fn passing_parse_text_html_utf8() { - let (media_type, text) = utils::data_url_to_text( + let (media_type, data) = utils::data_url_to_data( "data:text/html;utf8,Work expands so as to fill the time available for its completion", ); assert_eq!(media_type, "text/html"); assert_eq!( - text, + String::from_utf8_lossy(&data), "Work expands so as to fill the time available for its completion" ); } #[test] fn passing_parse_text_html_plaintext() { - let (media_type, text) = utils::data_url_to_text( + let (media_type, data) = utils::data_url_to_data( "data:text/html,Work expands so as to fill the time available for its completion", ); assert_eq!(media_type, "text/html"); assert_eq!( - text, + String::from_utf8_lossy(&data), "Work expands so as to fill the time available for its completion" ); } #[test] fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() { - let (media_type, text) = utils::data_url_to_text(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "); + let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "); assert_eq!(media_type, "text/html"); assert_eq!( - text, + String::from_utf8_lossy(&data), "Work expands so as to fill the time available for its completion" ); } #[test] fn passing_parse_text_css_url_encoded() { - let (media_type, text) = utils::data_url_to_text("data:text/css,div{background-color:%23000}"); + let (media_type, data) = utils::data_url_to_data("data:text/css,div{background-color:%23000}"); assert_eq!(media_type, "text/css"); - assert_eq!(text, "div{background-color:#000}"); + assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}"); } #[test] fn passing_parse_no_media_type_base64() { - let (media_type, text) = utils::data_url_to_text("data:;base64,dGVzdA=="); + let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA=="); assert_eq!(media_type, ""); - assert_eq!(text, "test"); + assert_eq!(String::from_utf8_lossy(&data), "test"); } #[test] fn passing_parse_no_media_type_no_encoding() { - let (media_type, text) = utils::data_url_to_text("data:;,test%20test"); + let (media_type, data) = utils::data_url_to_data("data:;,test%20test"); assert_eq!(media_type, ""); - assert_eq!(text, "test test"); + assert_eq!(String::from_utf8_lossy(&data), "test test"); } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ @@ -88,8 +88,8 @@ fn passing_parse_no_media_type_no_encoding() { #[test] fn failing_just_word_data() { - let (media_type, text) = utils::data_url_to_text("data"); + let (media_type, data) = utils::data_url_to_data("data"); assert_eq!(media_type, ""); - assert_eq!(text, ""); + assert_eq!(String::from_utf8_lossy(&data), ""); } diff --git a/src/tests/utils/decode_url.rs b/src/tests/utils/decode_url.rs index a26ff17..f436605 100644 --- a/src/tests/utils/decode_url.rs +++ b/src/tests/utils/decode_url.rs @@ -1,5 +1,3 @@ -use crate::utils; - // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ @@ -7,30 +5,35 @@ use crate::utils; // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ -#[test] -fn passing_decode_unicode_characters() { - assert_eq!( - utils::decode_url(str!( - "%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5" - )), - "検ヒム解塗ゃッ = サ" - ); -} +#[cfg(test)] +mod passing { + use crate::utils; -#[test] -fn passing_decode_file_url() { - assert_eq!( - utils::decode_url(str!("file:///tmp/space%20here/test%231.html")), - "file:///tmp/space here/test#1.html" - ); -} + #[test] + fn decode_unicode_characters() { + assert_eq!( + utils::decode_url(str!( + "%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5" + )), + "検ヒム解塗ゃッ = サ" + ); + } + + #[test] + fn decode_file_url() { + assert_eq!( + utils::decode_url(str!("file:///tmp/space%20here/test%231.html")), + "file:///tmp/space here/test#1.html" + ); + } -#[test] -fn passing_plus_sign() { - assert_eq!( - utils::decode_url(str!( + #[test] + fn plus_sign() { + assert_eq!( + utils::decode_url(str!( + "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" + )), "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" - )), - "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" - ); + ); + } } diff --git a/src/tests/utils/mod.rs b/src/tests/utils/mod.rs index 8e5d95e..8fc7e47 100644 --- a/src/tests/utils/mod.rs +++ b/src/tests/utils/mod.rs @@ -1,6 +1,6 @@ mod clean_url; mod data_to_data_url; -mod data_url_to_text; +mod data_url_to_data; mod decode_url; mod detect_media_type; mod file_url_to_fs_path; diff --git a/src/tests/utils/retrieve_asset.rs b/src/tests/utils/retrieve_asset.rs index bfa4dc2..7808e22 100644 --- a/src/tests/utils/retrieve_asset.rs +++ b/src/tests/utils/retrieve_asset.rs @@ -17,38 +17,23 @@ fn passing_read_data_url() { // If both source and target are data URLs, // ensure the result contains target data URL - let (retrieved_data, final_url) = utils::retrieve_asset( + let (data, final_url, media_type) = utils::retrieve_asset( cache, &client, - "data:text/html;base64,SoUrCe", - "data:text/html;base64,TaRgEt", - true, - "", + "data:text/html;base64,c291cmNl", + "data:text/html;base64,dGFyZ2V0", false, ) .unwrap(); - assert_eq!(&retrieved_data, "data:text/html;base64,TaRgEt"); - assert_eq!(&final_url, "data:text/html;base64,TaRgEt"); -} - -#[test] -fn passing_read_data_url_ignore_suggested_media_type() { - let cache = &mut HashMap::new(); - let client = Client::new(); - - // Media type parameter should not influence data URLs - let (data, final_url) = utils::retrieve_asset( - cache, - &client, - "data:text/html;base64,SoUrCe", - "data:text/html;base64,TaRgEt", - true, - "image/png", - false, - ) - .unwrap(); - assert_eq!(&data, "data:text/html;base64,TaRgEt"); - assert_eq!(&final_url, "data:text/html;base64,TaRgEt"); + assert_eq!( + utils::data_to_data_url(&media_type, &data, &final_url, ""), + utils::data_to_data_url("text/html", "target".as_bytes(), "", "") + ); + assert_eq!( + final_url, + utils::data_to_data_url("text/html", "target".as_bytes(), "", "") + ); + assert_eq!(&media_type, "text/html"); } #[test] @@ -60,7 +45,7 @@ fn passing_read_local_file_with_file_url_parent() { // Inclusion of local assets from local sources should be allowed let cwd = env::current_dir().unwrap(); - let (data, final_url) = utils::retrieve_asset( + let (data, final_url, _media_type) = utils::retrieve_asset( cache, &client, &format!( @@ -73,12 +58,10 @@ fn passing_read_local_file_with_file_url_parent() { file = file_url_protocol, cwd = cwd.to_str().unwrap() ), - true, - "application/javascript", false, ) .unwrap(); - assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); + assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); assert_eq!( &final_url, &format!( @@ -102,18 +85,20 @@ fn failing_read_local_file_with_data_url_parent() { let client = Client::new(); // Inclusion of local assets from data URL sources should not be allowed - let (data, final_url) = utils::retrieve_asset( + match utils::retrieve_asset( cache, &client, "data:text/html;base64,SoUrCe", "file:///etc/passwd", - true, - "", false, - ) - .unwrap(); - assert_eq!(&data, ""); - assert_eq!(&final_url, ""); + ) { + Ok((..)) => { + assert!(false); + } + Err(_) => { + assert!(true); + } + } } #[test] @@ -122,16 +107,18 @@ fn failing_read_local_file_with_https_parent() { let client = Client::new(); // Inclusion of local assets from remote sources should not be allowed - let (data, final_url) = utils::retrieve_asset( + match utils::retrieve_asset( cache, &client, "https://kernel.org/", "file:///etc/passwd", - true, - "", false, - ) - .unwrap(); - assert_eq!(&data, ""); - assert_eq!(&final_url, ""); + ) { + Ok((..)) => { + assert!(false); + } + Err(_) => { + assert!(true); + } + } } diff --git a/src/utils.rs b/src/utils.rs index dce98ce..11aecb2 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -140,21 +140,19 @@ pub fn clean_url>(input: T) -> String { result } -pub fn data_url_to_text>(url: T) -> (String, String) { - let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap()); +pub fn data_url_to_data>(url: T) -> (String, Vec) { + let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap()); let path: String = parsed_url.path().to_string(); let comma_loc: usize = path.find(',').unwrap_or(path.len()); let meta_data: String = path.chars().take(comma_loc).collect(); let raw_data: String = path.chars().skip(comma_loc + 1).collect(); - let data: String = decode_url(raw_data); + let text: String = decode_url(raw_data); let meta_data_items: Vec<&str> = meta_data.split(';').collect(); - let mut encoding: &str = ""; - let mut media_type: String = str!(); - let mut text: String = str!(); + let mut encoding: &str = ""; let mut i: i8 = 0; for item in &meta_data_items { @@ -172,15 +170,13 @@ pub fn data_url_to_text>(url: T) -> (String, String) { i = i + 1; } - if is_plaintext_media_type(&media_type) || media_type.is_empty() { - if encoding.eq_ignore_ascii_case("base64") { - text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!()) - } else { - text = data - } - } + let data: Vec = if encoding.eq_ignore_ascii_case("base64") { + base64::decode(&text).unwrap_or(vec![]) + } else { + text.as_bytes().to_vec() + }; - (media_type, text) + (media_type, data) } pub fn decode_url(input: String) -> String { @@ -228,74 +224,52 @@ pub fn retrieve_asset( client: &Client, parent_url: &str, url: &str, - as_data_url: bool, - media_type: &str, opt_silent: bool, -) -> Result<(String, String), reqwest::Error> { +) -> Result<(Vec, String, String), reqwest::Error> { if url.len() == 0 { - return Ok((str!(), str!())); + // Provoke error + client.get("").send()?; } if is_data_url(&url) { - if as_data_url { - Ok((url.to_string(), url.to_string())) - } else { - let (_media_type, text) = data_url_to_text(url); - - Ok((text, url.to_string())) - } + let (media_type, data) = data_url_to_data(url); + Ok((data, url.to_string(), media_type)) } else if is_file_url(&url) { // Check if parent_url is also file:/// // (if not, then we don't embed the asset) if !is_file_url(&parent_url) { - return Ok((str!(), str!())); + // Provoke error + client.get("").send()?; } let fs_file_path: String = file_url_to_fs_path(url); let path = Path::new(&fs_file_path); - let url_fragment = get_url_fragment(url); if path.exists() { if !opt_silent { eprintln!("{}", &url); } - if as_data_url { - let data_url: String = data_to_data_url( - &media_type, - &fs::read(&fs_file_path).unwrap(), - &fs_file_path, - &url_fragment, - ); - Ok((data_url, url.to_string())) - } else { - let data: String = fs::read_to_string(&fs_file_path).expect(url); - Ok((data, url.to_string())) - } + Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!())) } else { - Ok((str!(), url.to_string())) + // Provoke error + Err(client.get("").send().unwrap_err()) } } else { let cache_key: String = clean_url(&url); if cache.contains_key(&cache_key) { - // URL is in cache, we retrieve it - let data = cache.get(&cache_key).unwrap(); - + // URL is in cache, we get and return it if !opt_silent { eprintln!("{} (from cache)", &url); } - if as_data_url { - let url_fragment = get_url_fragment(url); - Ok(( - data_to_data_url(media_type, data, url, &url_fragment), - url.to_string(), - )) - } else { - Ok((String::from_utf8_lossy(data).to_string(), url.to_string())) - } + Ok(( + cache.get(&cache_key).unwrap().to_vec(), + url.to_string(), + str!(), + )) } else { - // URL not in cache, we request it + // URL not in cache, we retrieve the file let mut response = client.get(url).send()?; let res_url = response.url().to_string(); @@ -309,36 +283,21 @@ pub fn retrieve_asset( let new_cache_key: String = clean_url(&res_url); - if as_data_url { - // Convert response into a byte array - let mut data: Vec = vec![]; - response.copy_to(&mut data)?; - - // Attempt to obtain media type by reading the Content-Type header - let media_type = if media_type == "" { - response - .headers() - .get(CONTENT_TYPE) - .and_then(|header| header.to_str().ok()) - .unwrap_or(&media_type) - } else { - media_type - }; - let url_fragment = get_url_fragment(url); - let data_url = data_to_data_url(&media_type, &data, url, &url_fragment); - - // Add to cache - cache.insert(new_cache_key, data); + // Convert response into a byte array + let mut data: Vec = vec![]; + response.copy_to(&mut data)?; - Ok((data_url, res_url)) - } else { - let content = response.text().unwrap(); + // Attempt to obtain media type by reading the Content-Type header + let media_type = response + .headers() + .get(CONTENT_TYPE) + .and_then(|header| header.to_str().ok()) + .unwrap_or(""); - // Add to cache - cache.insert(new_cache_key, content.as_bytes().to_vec()); + // Add to cache + cache.insert(new_cache_key, data.clone()); - Ok((content, res_url)) - } + Ok((data, res_url, media_type.to_string())) } } }