refactor code and implement integrity validation

pull/173/head
Sunshine 4 years ago
parent 2bc8414cc1
commit a18df74946
No known key found for this signature in database
GPG Key ID: B80CA68703CD8AB1

82
Cargo.lock generated

@ -71,11 +71,40 @@ name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "block-buffer"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "block-padding"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bumpalo"
version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byte-tools"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bytes"
version = "0.5.3"
@ -173,6 +202,14 @@ name = "difference"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "digest"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "doc-comment"
version = "0.3.1"
@ -210,6 +247,11 @@ dependencies = [
"serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fake-simd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "flate2"
version = "1.0.13"
@ -308,6 +350,14 @@ dependencies = [
"slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "generic-array"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "getrandom"
version = "0.1.13"
@ -581,6 +631,7 @@ dependencies = [
"cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)",
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -636,6 +687,11 @@ dependencies = [
"libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "opaque-debug"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "openssl"
version = "0.10.26"
@ -1102,6 +1158,17 @@ dependencies = [
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "sha2"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "siphasher"
version = "0.2.3"
@ -1267,6 +1334,11 @@ name = "try-lock"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "typenum"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicase"
version = "2.6.0"
@ -1509,7 +1581,11 @@ dependencies = [
"checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
"checksum base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d5ca2cd0adc3f48f9e9ea5a6bbdf9ccc0bfade884847e484d452414c7ccffb3"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
"checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
"checksum bumpalo 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fe2567a8d8a3aedb4e39aa39e186d5673acfd56393c6ac83b2bc5bd82f4369c"
"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
"checksum bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "10004c15deb332055f7a4a208190aed362cf9a7c2f6ab70a305fba50e1105f38"
"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
"checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8"
@ -1522,11 +1598,13 @@ dependencies = [
"checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
"checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
"checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e"
"checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2"
"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9"
"checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d"
"checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
"checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
@ -1541,6 +1619,7 @@ dependencies = [
"checksum futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "171be33efae63c2d59e6dbba34186fe0d6394fb378069a76dfd80fdcffd43c16"
"checksum futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bae52d6b29cf440e298856fec3965ee6fa71b06aa7495178615953fd669e5f9"
"checksum futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c0d66274fb76985d3c62c886d1da7ac4c0903a8c9f754e8fe0f35a6a6cc39e76"
"checksum generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec"
"checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407"
"checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
@ -1574,6 +1653,7 @@ dependencies = [
"checksum new_debug_unreachable 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f40f005c60db6e03bae699e414c58bf9aa7ea02a2d0b9bfbcf19286cc4c82b30"
"checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
"checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72"
"checksum opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
"checksum openssl 0.10.26 (registry+https://github.com/rust-lang/crates.io-index)" = "3a3cc5799d98e1088141b8e01ff760112bbd9f19d850c124500566ca6901a585"
"checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de"
"checksum openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)" = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f"
@ -1626,6 +1706,7 @@ dependencies = [
"checksum serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0"
"checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043"
"checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97"
"checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
"checksum siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23"
"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
@ -1646,6 +1727,7 @@ dependencies = [
"checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860"
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
"checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382"
"checksum typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33"
"checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
"checksum unicode-normalization 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"

@ -16,12 +16,10 @@ base64 = "0.12.0"
clap = "2.33.0"
cssparser = "0.27.2"
html5ever = "0.24.1"
sha2 = "0.8.1" # Used in calculating checksums during integrity checks
time = "0.1.42" # Used to render comments indicating the time the page was saved
url = "2.1.1"
# Used to render comments indicating the time the page was saved
# also required by reqwest as of v0.10.0
time = "0.1.42"
[dependencies.reqwest]
version = "0.10.*"
default-features = false

@ -2,7 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset};
use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset};
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal
@ -169,40 +169,39 @@ pub fn process_css<'a>(
continue;
}
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
opt_no_fonts,
opt_no_images,
opt_silent,
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) {
Ok((import_contents, import_final_url, _import_media_type)) => {
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
opt_no_fonts,
opt_no_images,
opt_silent,
)
.as_bytes(),
&import_final_url,
&import_url_fragment,
),
false,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
.as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
result.push_str(enquote(import_full_url, false).as_str());
}
}
}
} else {
if func_name == "url" {
// Skip empty url()'s
@ -214,17 +213,30 @@ pub fn process_css<'a>(
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
let url_fragment = get_url_fragment(resolved_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&resolved_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(
&media_type,
&data,
&final_url,
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
result.push_str(enquote(resolved_url, false).as_str());
}
}
}
}
} else {
result.push_str(enquote(str!(value), false).as_str());
@ -293,54 +305,51 @@ pub fn process_css<'a>(
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
&final_url,
&String::from_utf8_lossy(&css),
opt_no_fonts,
opt_no_images,
opt_silent,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
} else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
Ok((data, final_url, media_type)) => {
let data_url =
data_to_data_url(&media_type, &data, &final_url, &url_fragment);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
}
}
result.push_str(")");

@ -1,6 +1,9 @@
use crate::css::embed_css;
use crate::js::attr_is_event_handler;
use crate::utils::{data_to_data_url, is_http_url, resolve_url, retrieve_asset, url_has_protocol};
use crate::utils::{
data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_has_protocol,
};
use base64;
use html5ever::interface::QualName;
use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
@ -9,6 +12,7 @@ use html5ever::tendril::{format_tendril, Tendril, TendrilSink};
use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns};
use reqwest::blocking::Client;
use sha2::{Digest, Sha256, Sha384, Sha512};
use std::collections::HashMap;
use std::default::Default;
@ -36,6 +40,24 @@ pub fn is_icon(attr_value: &str) -> bool {
ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
}
pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool {
if integrity.starts_with("sha256-") {
let mut hasher = Sha256::new();
hasher.input(data);
base64::encode(hasher.result()) == integrity[7..]
} else if integrity.starts_with("sha384-") {
let mut hasher = Sha384::new();
hasher.input(data);
base64::encode(hasher.result()) == integrity[7..]
} else if integrity.starts_with("sha512-") {
let mut hasher = Sha512::new();
hasher.input(data);
base64::encode(hasher.result()) == integrity[7..]
} else {
false
}
}
pub fn walk_and_embed_assets(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
@ -75,12 +97,13 @@ pub fn walk_and_embed_assets(
match name.local.as_ref() {
"link" => {
// Remove integrity attributes
// Remove integrity attributes, keep value of the last one
let mut integrity: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("integrity") {
attrs_mut.remove(i);
integrity = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
@ -117,93 +140,166 @@ pub fn walk_and_embed_assets(
match link_type {
LinkType::Icon => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" {
if opt_no_images {
attr.value.clear();
} else {
let href_full_url = resolve_url(&url, attr.value.as_ref())
.unwrap_or_default();
let (favicon_data_url, _) = retrieve_asset(
cache,
client,
&url,
&href_full_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
attr.value.clear();
attr.value.push_slice(favicon_data_url.as_str());
// Find and remove href attribute(s), keep value of the last found one
let mut link_href: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("href") {
link_href = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
if !opt_no_images && !link_href.is_empty() {
let link_href_full_url =
resolve_url(&url, link_href).unwrap_or_default();
let link_href_url_fragment =
get_url_fragment(link_href_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&link_href_full_url,
opt_silent,
) {
Ok((
link_href_data,
link_href_final_url,
link_href_media_type,
)) => {
// Check integrity
if integrity.is_empty()
|| has_proper_integrity(&link_href_data, &integrity)
{
let link_href_data_url = data_to_data_url(
&link_href_media_type,
&link_href_data,
&link_href_final_url,
&link_href_url_fragment,
);
// Add new data URL href attribute
attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_data_url.as_ref(),
),
});
}
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(link_href_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_full_url.as_ref(),
),
});
}
}
}
}
}
LinkType::Stylesheet => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" {
if opt_no_css {
attr.value.clear();
} else {
let href_full_url = resolve_url(&url, &attr.value.as_ref())
.unwrap_or_default();
let replacement_text = match retrieve_asset(
cache,
client,
&url,
&href_full_url,
false,
"text/css",
opt_silent,
) {
// On successful retrieval, traverse CSS
Ok((css_data, final_url)) => {
let css: String = embed_css(
cache,
client,
&final_url,
&css_data,
opt_no_fonts,
opt_no_images,
opt_silent,
);
data_to_data_url(
"text/css",
css.as_bytes(),
&final_url,
"",
)
}
// If a network error occured, warn
Err(e) => {
eprintln!("Warning: {}", e);
// If failed to resolve, replace with absolute URL
href_full_url
}
};
// Find and remove href attribute(s), keep value of the last found one
let mut link_href: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("href") {
link_href = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
attr.value.clear();
attr.value.push_slice(&replacement_text);
if !opt_no_css && !link_href.is_empty() {
let link_href_full_url =
resolve_url(&url, link_href).unwrap_or_default();
match retrieve_asset(
cache,
client,
&url,
&link_href_full_url,
opt_silent,
) {
Ok((
link_href_data,
link_href_final_url,
_link_href_media_type,
)) => {
// Check integrity
if integrity.is_empty()
|| has_proper_integrity(&link_href_data, &integrity)
{
let css: String = embed_css(
cache,
client,
&link_href_final_url,
&String::from_utf8_lossy(&link_href_data),
opt_no_fonts,
opt_no_images,
opt_silent,
);
let link_href_data_url = data_to_data_url(
"text/css",
css.as_bytes(),
&link_href_final_url,
"",
);
// Add new data URL href attribute
attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_data_url.as_ref(),
),
});
}
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(link_href_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(
None,
ns!(),
local_name!("href"),
),
value: Tendril::from_slice(
link_href_full_url.as_ref(),
),
});
}
}
}
}
}
LinkType::Preload | LinkType::DnsPrefetch => {
// Since all resources are embedded as data URL, preloading and prefetching are unnecessary
if let Some(attr) =
attrs_mut.iter_mut().find(|a| &a.name.local == "href")
{
attr.value.clear();
for _ in 0..attrs_mut.len() {
attrs_mut.remove(0);
}
}
LinkType::Unknown => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("href") {
let href_full_url =
resolve_url(&url, attr.value.as_ref()).unwrap_or_default();
resolve_url(&url, attr.value.trim()).unwrap_or_default();
attr.value.clear();
attr.value.push_slice(&href_full_url.as_str());
}
@ -212,186 +308,236 @@ pub fn walk_and_embed_assets(
}
}
"body" => {
// Find and remove background attribute(s), keep reference to the last one
let mut found_background: Option<Attribute> = None;
// Find and remove background attribute(s), keep value of the last found one
let mut background: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("background") {
found_background = Some(attrs_mut.remove(i));
background = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
if !opt_no_images {
if let Some((data_url, _)) = found_background
.iter()
.map(|attr| attr.value.trim())
.filter(|background| !background.is_empty()) // Skip if empty
.next()
.and_then(|background| resolve_url(&url, background).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
if !opt_no_images && !background.is_empty() {
let background_full_url = resolve_url(&url, background).unwrap_or_default();
let background_url_fragment = get_url_fragment(background_full_url.clone());
match retrieve_asset(cache, client, &url, &background_full_url, opt_silent)
{
// Add new data_url background attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("background")),
value: Tendril::from_slice(data_url.as_ref()),
});
Ok((background_data, background_final_url, background_media_type)) => {
let background_data_url = data_to_data_url(
&background_media_type,
&background_data,
&background_final_url,
&background_url_fragment,
);
// Add new data URL background attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("background")),
value: Tendril::from_slice(background_data_url.as_ref()),
});
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(background_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("background")),
value: Tendril::from_slice(background_full_url.as_ref()),
});
}
}
}
}
}
"img" => {
// Find source attribute(s)
let mut found_src: Option<Attribute> = None;
let mut found_datasrc: Option<Attribute> = None;
let mut img_src: String = str!();
let mut img_data_src: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("src") {
found_src = Some(attrs_mut.remove(i));
img_src = str!(attrs_mut.remove(i).value.trim());
} else if attr_name.eq_ignore_ascii_case("data-src") {
found_datasrc = Some(attrs_mut.remove(i));
img_data_src = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
// If images are disabled, clear both sources
if opt_no_images {
// Add empty image src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(empty_image!()),
});
} else if let Some((data_url, _)) = found_datasrc
.iter()
.chain(&found_src) // Give data_url priority
.map(|attr| attr.value.trim())
.filter(|src| !src.is_empty()) // Skip if empty
.next()
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
} else {
if img_src.is_empty() && img_data_src.is_empty() {
// Add empty src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(""),
});
} else {
// Add data URL src attribute
let img_full_url = resolve_url(
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
{
// Add new data_url src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(data_url.as_ref()),
});
if !img_data_src.is_empty() {
img_data_src
} else {
img_src
},
)
.unwrap_or_default();
let img_url_fragment = get_url_fragment(img_full_url.clone());
match retrieve_asset(cache, client, &url, &img_full_url, opt_silent) {
Ok((img_data, img_final_url, img_media_type)) => {
let img_data_url = data_to_data_url(
&img_media_type,
&img_data,
&img_final_url,
&img_url_fragment,
);
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(img_data_url.as_ref()),
});
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(img_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(img_full_url.as_ref()),
});
}
}
}
}
}
}
"svg" => {
if opt_no_images {
node.children.borrow_mut().clear();
}
}
"input" => {
let mut is_image: bool = false;
// Determine input type
let mut is_image_input: bool = false;
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name == "type" {
is_image = attr.value.to_string().eq_ignore_ascii_case("image");
if attr_name.eq_ignore_ascii_case("type") {
is_image_input = attr.value.to_string().eq_ignore_ascii_case("image");
}
}
if is_image {
let mut found_src: Option<Attribute> = None;
if is_image_input {
let mut input_image_src: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("src") {
found_src = Some(attrs_mut.remove(i));
input_image_src = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
// If images are disabled, clear both sources
if opt_no_images {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(empty_image!()),
});
} else if let Some((data_url, _)) = found_src
.iter()
.map(|attr| attr.value.trim())
.filter(|src| !src.is_empty()) // Skip if empty
.next()
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
{
// Add new data_url src attribute
if opt_no_images || input_image_src.is_empty() {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(data_url.as_ref()),
value: Tendril::from_slice(if input_image_src.is_empty() {
""
} else {
empty_image!()
}),
});
} else {
let input_image_full_url =
resolve_url(&url, input_image_src).unwrap_or_default();
let input_image_url_fragment =
get_url_fragment(input_image_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&input_image_full_url,
opt_silent,
) {
Ok((
input_image_data,
input_image_final_url,
input_image_media_type,
)) => {
let input_image_data_url = data_to_data_url(
&input_image_media_type,
&input_image_data,
&input_image_final_url,
&input_image_url_fragment,
);
// Add data URL src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(input_image_data_url.as_ref()),
});
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(input_image_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(
input_image_full_url.as_ref(),
),
});
}
}
}
}
}
}
"svg" => {
if opt_no_images {
node.children.borrow_mut().clear();
}
}
"image" => {
// Find and remove (xlink:)href attribute(s), keep reference to the last one
let mut image_href: Option<Attribute> = None;
// Find and remove (xlink:)href attribute(s), keep value of the last one
let mut image_href: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("xlink:href")
|| attr_name.eq_ignore_ascii_case("href")
{
image_href = Some(attrs_mut.remove(i));
image_href = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
if !opt_no_images {
if let Some((data_url, _)) = image_href
.iter()
.map(|attr| attr.value.trim())
.filter(|href| !href.is_empty()) // Skip if empty
.next()
.and_then(|href| resolve_url(&url, href).ok()) // Make absolute
.and_then(|abs_href| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_href,
true,
"",
opt_silent,
).ok())
{
// Add new data_url href attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("href")),
value: Tendril::from_slice(data_url.as_ref()),
});
if !opt_no_images && !image_href.is_empty() {
let image_full_url = resolve_url(&url, image_href).unwrap_or_default();
let image_url_fragment = get_url_fragment(image_full_url.clone());
match retrieve_asset(cache, client, &url, &image_full_url, opt_silent) {
Ok((image_data, image_final_url, image_media_type)) => {
let image_data_url = data_to_data_url(
&image_media_type,
&image_data,
&image_final_url,
&image_url_fragment,
);
// Add new data URL href attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("href")),
value: Tendril::from_slice(image_data_url.as_ref()),
});
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(image_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("href")),
value: Tendril::from_slice(image_full_url.as_ref()),
});
}
}
}
}
}
@ -399,12 +545,12 @@ pub fn walk_and_embed_assets(
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name == "src" {
if attr_name.eq_ignore_ascii_case("src") {
let src_full_url = resolve_url(&url, attr.value.trim())
.unwrap_or_else(|_| attr.value.to_string());
attr.value.clear();
attr.value.push_slice(src_full_url.as_str());
} else if attr_name == "srcset" {
} else if attr_name.eq_ignore_ascii_case("srcset") {
if get_node_name(&get_parent_node(&node)) == Some("picture") {
if opt_no_images {
attr.value.clear();
@ -412,18 +558,38 @@ pub fn walk_and_embed_assets(
} else {
let srcset_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default();
let (source_data_url, _) = retrieve_asset(
let srcset_url_fragment =
get_url_fragment(srcset_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&srcset_full_url,
true,
"",
opt_silent,
)
.unwrap_or((str!(), str!()));
attr.value.clear();
attr.value.push_slice(source_data_url.as_str());
) {
Ok((srcset_data, srcset_final_url, srcset_media_type)) => {
let srcset_data_url = data_to_data_url(
&srcset_media_type,
&srcset_data,
&srcset_final_url,
&srcset_url_fragment,
);
attr.value.clear();
attr.value.push_slice(srcset_data_url.as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(srcset_full_url.clone()) {
attr.value.clear();
attr.value.push_slice(srcset_full_url.as_str());
if !srcset_url_fragment.is_empty() {
attr.value.push_slice("#");
attr.value
.push_slice(srcset_url_fragment.as_str());
}
}
}
}
}
}
}
@ -431,7 +597,8 @@ pub fn walk_and_embed_assets(
}
"a" | "area" => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("href") {
let attr_value = attr.value.trim();
if opt_no_js && attr_value.starts_with("javascript:") {
@ -453,44 +620,55 @@ pub fn walk_and_embed_assets(
}
}
"script" => {
// Remove integrity attributes
// Remove integrity and src attributes, keep values of the last ones
let mut script_integrity: String = str!();
let mut script_src: String = str!();
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("integrity") {
attrs_mut.remove(i);
script_integrity = str!(attrs_mut.remove(i).value.trim());
} else if attr_name.eq_ignore_ascii_case("src") {
script_src = str!(attrs_mut.remove(i).value.trim());
} else {
i += 1;
}
}
if opt_no_js {
// Empty src and inner content of SCRIPT tags
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" {
attr.value.clear();
}
}
// Empty inner content (src is already gone)
node.children.borrow_mut().clear();
} else {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" {
let src_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default();
let (js_data_url, _) = retrieve_asset(
cache,
client,
&url,
&src_full_url,
true,
"application/javascript",
opt_silent,
)
.unwrap_or((str!(), str!()));
attr.value.clear();
attr.value.push_slice(js_data_url.as_str());
} else if !script_src.is_empty() {
let script_full_url = resolve_url(&url, script_src).unwrap_or_default();
match retrieve_asset(cache, client, &url, &script_full_url, opt_silent) {
Ok((script_data, script_final_url, _script_media_type)) => {
// Only embed if we're able to validate integrity
if script_integrity.is_empty()
|| has_proper_integrity(&script_data, &script_integrity)
{
let script_data_url = data_to_data_url(
"application/javascript",
&script_data,
&script_final_url,
"",
);
// Add new data URL src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(script_data_url.as_ref()),
});
}
}
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(script_full_url.clone()) {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(script_full_url.as_ref()),
});
}
}
};
}
}
"style" => {
@ -518,21 +696,23 @@ pub fn walk_and_embed_assets(
}
"form" => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "action" {
let attr_value = attr.value.trim();
// Modify action to be a full URL
if !is_http_url(attr_value) {
let href_full_url =
resolve_url(&url, attr_value).unwrap_or_default();
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("action") {
let form_action = attr.value.trim();
// Modify action property to ensure it's a full URL
if !is_http_url(form_action) {
let form_action_full_url =
resolve_url(&url, form_action).unwrap_or_default();
attr.value.clear();
attr.value.push_slice(href_full_url.as_str());
attr.value.push_slice(form_action_full_url.as_str());
}
}
}
}
"frame" | "iframe" => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("src") {
if opt_no_frames {
// Empty the src attribute
attr.value.clear();
@ -546,65 +726,99 @@ pub fn walk_and_embed_assets(
continue;
}
let src_full_url = resolve_url(&url, frame_src).unwrap_or_default();
let (frame_data, frame_final_url) = retrieve_asset(
cache,
client,
&url,
&src_full_url,
false,
"text/html",
opt_silent,
)
.unwrap_or((str!(), src_full_url));
let dom = html_to_dom(&frame_data);
walk_and_embed_assets(
cache,
client,
&frame_final_url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let iframe_data_url = data_to_data_url("text/html", &buf, "", "");
attr.value.clear();
attr.value.push_slice(iframe_data_url.as_str());
let frame_full_url = resolve_url(&url, frame_src).unwrap_or_default();
let frame_url_fragment = get_url_fragment(frame_full_url.clone());
match retrieve_asset(cache, client, &url, &frame_full_url, opt_silent) {
Ok((frame_data, frame_final_url, frame_media_type)) => {
let frame_dom =
html_to_dom(&String::from_utf8_lossy(&frame_data));
walk_and_embed_assets(
cache,
client,
&frame_final_url,
&frame_dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut frame_data: Vec<u8> = Vec::new();
serialize(
&mut frame_data,
&frame_dom.document,
SerializeOpts::default(),
)
.unwrap();
let frame_data_url = data_to_data_url(
&frame_media_type,
&frame_data,
&frame_final_url,
&frame_url_fragment,
);
attr.value.clear();
attr.value.push_slice(frame_data_url.as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(frame_full_url.clone()) {
attr.value.clear();
attr.value.push_slice(frame_full_url.as_str());
}
}
}
}
}
}
"video" => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "poster" {
let video_poster = attr.value.trim();
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("poster") {
let video_poster_url = attr.value.trim();
// Skip posters with empty source
if video_poster.is_empty() {
if video_poster_url.is_empty() {
continue;
}
if opt_no_images {
attr.value.clear();
} else {
let poster_full_url =
resolve_url(&url, video_poster).unwrap_or_default();
let (poster_data_url, _) = retrieve_asset(
cache,
client,
&url,
&poster_full_url,
true,
"",
opt_silent,
)
.unwrap_or((poster_full_url, str!()));
attr.value.clear();
attr.value.push_slice(poster_data_url.as_str());
continue;
}
let video_poster_full_url =
resolve_url(&url, video_poster_url).unwrap_or_default();
let video_poster_url_fragment =
get_url_fragment(video_poster_full_url.clone());
match retrieve_asset(
cache,
client,
&url,
&video_poster_full_url,
opt_silent,
) {
Ok((
video_poster_data,
video_poster_final_url,
video_poster_media_type,
)) => {
let video_poster_data_url = data_to_data_url(
&video_poster_media_type,
&video_poster_data,
&video_poster_final_url,
&video_poster_url_fragment,
);
attr.value.clear();
attr.value.push_slice(video_poster_data_url.as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(video_poster_full_url.clone()) {
attr.value.clear();
attr.value.push_slice(video_poster_full_url.as_str());
}
}
}
}
}
@ -615,16 +829,15 @@ pub fn walk_and_embed_assets(
// Process style attributes
if opt_no_css {
// Get rid of style attributes
let mut style_attr_indexes = Vec::new();
for (i, attr) in attrs_mut.iter_mut().enumerate() {
if attr.name.local.as_ref().eq_ignore_ascii_case("style") {
style_attr_indexes.push(i);
let mut i = 0;
while i < attrs_mut.len() {
let attr_name: &str = &attrs_mut[i].name.local;
if attr_name.eq_ignore_ascii_case("style") {
attrs_mut.remove(i);
} else {
i += 1;
}
}
style_attr_indexes.reverse();
for attr_index in style_attr_indexes {
attrs_mut.remove(attr_index);
}
} else {
// Otherwise, parse any links found in the attributes
for attribute in attrs_mut

@ -1,5 +1,5 @@
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Url;
@ -110,26 +110,24 @@ fn main() {
// Retrieve root document
if is_file_url(target_url) || is_http_url(target_url) {
let (data, final_url) = retrieve_asset(
&mut cache,
&client,
target_url,
target_url,
false,
"",
app_args.silent,
)
.expect("Could not retrieve target document");
base_url = final_url;
dom = html_to_dom(&data);
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
Ok((data, final_url, _media_type)) => {
base_url = final_url;
dom = html_to_dom(&String::from_utf8_lossy(&data));
}
Err(_) => {
eprintln!("Could not retrieve target document");
process::exit(1);
}
}
} else if is_data_url(target_url) {
let (media_type, text): (String, String) = data_url_to_text(target_url);
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
eprintln!("Unsupported data URL media type");
process::exit(1);
}
base_url = str!(target_url);
dom = html_to_dom(&text);
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else {
process::exit(1);
}
@ -163,8 +161,8 @@ fn main() {
let mut clean_url = Url::parse(&base_url).unwrap();
clean_url.set_fragment(None);
// Don't include credentials
clean_url.set_username("");
clean_url.set_password(None);
clean_url.set_username("").unwrap();
clean_url.set_password(None).unwrap();
let metadata_comment = if is_http_url(&base_url) {
format!(
"<!-- Saved from {} at {} using {} v{} -->\n",

@ -242,9 +242,9 @@ fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"\">\n \
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
@ -306,12 +306,12 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
@ -368,12 +368,12 @@ fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
@ -417,7 +417,7 @@ fn passing_security_disallow_local_assets_within_data_url_targets(
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script src=\"\"></script></head><body></body></html>\n"
"<html><head><script></script></head><body></body></html>\n"
);
// STDERR should be empty

@ -0,0 +1,92 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn empty_input_sha256() {
assert!(html::has_proper_integrity(
"".as_bytes(),
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
));
}
#[test]
fn sha256() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
));
}
#[test]
fn sha384() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
));
}
#[test]
fn sha512() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_hash() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
""
));
}
#[test]
fn empty_input_empty_hash() {
assert!(!html::has_proper_integrity("".as_bytes(), ""));
}
#[test]
fn sha256() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-badhash"
));
}
#[test]
fn sha384() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-badhash"
));
}
#[test]
fn sha512() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-badhash"
));
}
}

@ -1,4 +1,5 @@
mod get_node_name;
mod has_proper_integrity;
mod is_icon;
mod stringify_document;
mod walk_and_embed_assets;

@ -162,7 +162,7 @@ fn passing_no_css() {
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\" href=\"\">\
<link rel=\"stylesheet\">\
<style></style>\
</head>\
<body>\
@ -210,7 +210,7 @@ fn passing_no_images() {
format!(
"<html>\
<head>\
<link rel=\"icon\" href=\"\">\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
@ -372,7 +372,7 @@ fn passing_no_js() {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
);
}
@ -412,7 +412,7 @@ fn passing_with_no_integrity() {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);

@ -9,74 +9,74 @@ use crate::utils;
#[test]
fn passing_parse_text_html_base64() {
let (media_type, text) = utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
text,
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_utf8() {
let (media_type, text) = utils::data_url_to_text(
let (media_type, data) = utils::data_url_to_data(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
text,
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_plaintext() {
let (media_type, text) = utils::data_url_to_text(
let (media_type, data) = utils::data_url_to_data(
"data:text/html,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
text,
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, text) = utils::data_url_to_text(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
text,
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_css_url_encoded() {
let (media_type, text) = utils::data_url_to_text("data:text/css,div{background-color:%23000}");
let (media_type, data) = utils::data_url_to_data("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(text, "div{background-color:#000}");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
}
#[test]
fn passing_parse_no_media_type_base64() {
let (media_type, text) = utils::data_url_to_text("data:;base64,dGVzdA==");
let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(text, "test");
assert_eq!(String::from_utf8_lossy(&data), "test");
}
#[test]
fn passing_parse_no_media_type_no_encoding() {
let (media_type, text) = utils::data_url_to_text("data:;,test%20test");
let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(text, "test test");
assert_eq!(String::from_utf8_lossy(&data), "test test");
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@ -88,8 +88,8 @@ fn passing_parse_no_media_type_no_encoding() {
#[test]
fn failing_just_word_data() {
let (media_type, text) = utils::data_url_to_text("data");
let (media_type, data) = utils::data_url_to_data("data");
assert_eq!(media_type, "");
assert_eq!(text, "");
assert_eq!(String::from_utf8_lossy(&data), "");
}

@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@ -7,30 +5,35 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn passing_decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[test]
fn decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn passing_plus_sign() {
assert_eq!(
utils::decode_url(str!(
#[test]
fn plus_sign() {
assert_eq!(
utils::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
);
);
}
}

@ -1,6 +1,6 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_text;
mod data_url_to_data;
mod decode_url;
mod detect_media_type;
mod file_url_to_fs_path;

@ -17,38 +17,23 @@ fn passing_read_data_url() {
// If both source and target are data URLs,
// ensure the result contains target data URL
let (retrieved_data, final_url) = utils::retrieve_asset(
let (data, final_url, media_type) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"",
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
false,
)
.unwrap();
assert_eq!(&retrieved_data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
}
#[test]
fn passing_read_data_url_ignore_suggested_media_type() {
let cache = &mut HashMap::new();
let client = Client::new();
// Media type parameter should not influence data URLs
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
assert_eq!(
utils::data_to_data_url(&media_type, &data, &final_url, ""),
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
);
assert_eq!(
final_url,
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
);
assert_eq!(&media_type, "text/html");
}
#[test]
@ -60,7 +45,7 @@ fn passing_read_local_file_with_file_url_parent() {
// Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap();
let (data, final_url) = utils::retrieve_asset(
let (data, final_url, _media_type) = utils::retrieve_asset(
cache,
&client,
&format!(
@ -73,12 +58,10 @@ fn passing_read_local_file_with_file_url_parent() {
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
true,
"application/javascript",
false,
)
.unwrap();
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(
@ -102,18 +85,20 @@ fn failing_read_local_file_with_data_url_parent() {
let client = Client::new();
// Inclusion of local assets from data URL sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
match utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}
#[test]
@ -122,16 +107,18 @@ fn failing_read_local_file_with_https_parent() {
let client = Client::new();
// Inclusion of local assets from remote sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
match utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}

@ -140,21 +140,19 @@ pub fn clean_url<T: AsRef<str>>(input: T) -> String {
result
}
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let data: String = decode_url(raw_data);
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut encoding: &str = "";
let mut media_type: String = str!();
let mut text: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
@ -172,15 +170,13 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
i = i + 1;
}
if is_plaintext_media_type(&media_type) || media_type.is_empty() {
if encoding.eq_ignore_ascii_case("base64") {
text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else {
text = data
}
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, text)
(media_type, data)
}
pub fn decode_url(input: String) -> String {
@ -228,74 +224,52 @@ pub fn retrieve_asset(
client: &Client,
parent_url: &str,
url: &str,
as_data_url: bool,
media_type: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
return Ok((str!(), str!()));
// Provoke error
client.get("").send()?;
}
if is_data_url(&url) {
if as_data_url {
Ok((url.to_string(), url.to_string()))
} else {
let (_media_type, text) = data_url_to_text(url);
Ok((text, url.to_string()))
}
let (media_type, data) = data_url_to_data(url);
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
return Ok((str!(), str!()));
// Provoke error
client.get("").send()?;
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
let url_fragment = get_url_fragment(url);
if path.exists() {
if !opt_silent {
eprintln!("{}", &url);
}
if as_data_url {
let data_url: String = data_to_data_url(
&media_type,
&fs::read(&fs_file_path).unwrap(),
&fs_file_path,
&url_fragment,
);
Ok((data_url, url.to_string()))
} else {
let data: String = fs::read_to_string(&fs_file_path).expect(url);
Ok((data, url.to_string()))
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
} else {
Ok((str!(), url.to_string()))
// Provoke error
Err(client.get("").send().unwrap_err())
}
} else {
let cache_key: String = clean_url(&url);
if cache.contains_key(&cache_key) {
// URL is in cache, we retrieve it
let data = cache.get(&cache_key).unwrap();
// URL is in cache, we get and return it
if !opt_silent {
eprintln!("{} (from cache)", &url);
}
if as_data_url {
let url_fragment = get_url_fragment(url);
Ok((
data_to_data_url(media_type, data, url, &url_fragment),
url.to_string(),
))
} else {
Ok((String::from_utf8_lossy(data).to_string(), url.to_string()))
}
Ok((
cache.get(&cache_key).unwrap().to_vec(),
url.to_string(),
str!(),
))
} else {
// URL not in cache, we request it
// URL not in cache, we retrieve the file
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
@ -309,36 +283,21 @@ pub fn retrieve_asset(
let new_cache_key: String = clean_url(&res_url);
if as_data_url {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = if media_type == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&media_type)
} else {
media_type
};
let url_fragment = get_url_fragment(url);
let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
// Add to cache
cache.insert(new_cache_key, data);
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
Ok((data_url, res_url))
} else {
let content = response.text().unwrap();
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or("");
// Add to cache
cache.insert(new_cache_key, content.as_bytes().to_vec());
// Add to cache
cache.insert(new_cache_key, data.clone());
Ok((content, res_url))
}
Ok((data, res_url, media_type.to_string()))
}
}
}

Loading…
Cancel
Save