From edacd09dc83658754ea44227b66a45c0433e92e1 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Fri, 10 Apr 2020 20:43:29 -0400 Subject: [PATCH] store blobs instead of data URLs in cache --- src/css.rs | 4 +-- src/html.rs | 2 +- src/tests/utils/clean_url.rs | 8 ++++++ src/utils.rs | 51 +++++++++++++++++++++++++----------- 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/css.rs b/src/css.rs index 16cb929..cce992d 100644 --- a/src/css.rs +++ b/src/css.rs @@ -54,7 +54,7 @@ pub fn escape(value: &str) -> String { } pub fn process_css<'a>( - cache: &mut HashMap, + cache: &mut HashMap>, client: &Client, parent_url: &str, parser: &mut Parser, @@ -364,7 +364,7 @@ pub fn process_css<'a>( } pub fn embed_css( - cache: &mut HashMap, + cache: &mut HashMap>, client: &Client, parent_url: &str, css: &str, diff --git a/src/html.rs b/src/html.rs index 0078996..4fba459 100644 --- a/src/html.rs +++ b/src/html.rs @@ -37,7 +37,7 @@ pub fn is_icon(attr_value: &str) -> bool { } pub fn walk_and_embed_assets( - cache: &mut HashMap, + cache: &mut HashMap>, client: &Client, url: &str, node: &Handle, diff --git a/src/tests/utils/clean_url.rs b/src/tests/utils/clean_url.rs index 7e794be..f4db935 100644 --- a/src/tests/utils/clean_url.rs +++ b/src/tests/utils/clean_url.rs @@ -30,3 +30,11 @@ fn passing_removes_empty_query_and_empty_fragment() { "https://somewhere.com/font.eot" ); } + +#[test] +fn passing_removes_empty_query_amp_and_empty_fragment() { + assert_eq!( + utils::clean_url("https://somewhere.com/font.eot?a=b&#"), + "https://somewhere.com/font.eot?a=b" + ); +} diff --git a/src/utils.rs b/src/utils.rs index 6221837..dce98ce 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -120,17 +120,24 @@ pub fn get_url_fragment>(url: T) -> String { } } -pub fn clean_url>(url: T) -> String { - let mut result = Url::parse(url.as_ref()).unwrap(); +pub fn clean_url>(input: T) -> String { + let mut url = Url::parse(input.as_ref()).unwrap(); // Clear fragment - result.set_fragment(None); + url.set_fragment(None); // Get rid of stray question mark - if result.query() == Some("") { - result.set_query(None); + if url.query() == Some("") { + url.set_query(None); } - result.to_string() + + // Remove empty trailing ampersand(s) + let mut result: String = url.to_string(); + while result.ends_with("&") { + result.pop(); + } + + result } pub fn data_url_to_text>(url: T) -> (String, String) { @@ -217,7 +224,7 @@ pub fn file_url_to_fs_path(url: &str) -> String { } pub fn retrieve_asset( - cache: &mut HashMap, + cache: &mut HashMap>, client: &Client, parent_url: &str, url: &str, @@ -229,8 +236,6 @@ pub fn retrieve_asset( return Ok((str!(), str!())); } - let cache_key = clean_url(&url); - if is_data_url(&url) { if as_data_url { Ok((url.to_string(), url.to_string())) @@ -270,13 +275,25 @@ pub fn retrieve_asset( Ok((str!(), url.to_string())) } } else { + let cache_key: String = clean_url(&url); + if cache.contains_key(&cache_key) { - // URL is in cache + // URL is in cache, we retrieve it + let data = cache.get(&cache_key).unwrap(); + if !opt_silent { eprintln!("{} (from cache)", &url); } - let data = cache.get(&cache_key).unwrap(); - Ok((data.to_string(), url.to_string())) + + if as_data_url { + let url_fragment = get_url_fragment(url); + Ok(( + data_to_data_url(media_type, data, url, &url_fragment), + url.to_string(), + )) + } else { + Ok((String::from_utf8_lossy(data).to_string(), url.to_string())) + } } else { // URL not in cache, we request it let mut response = client.get(url).send()?; @@ -290,7 +307,7 @@ pub fn retrieve_asset( } } - let new_cache_key = clean_url(&res_url); + let new_cache_key: String = clean_url(&res_url); if as_data_url { // Convert response into a byte array @@ -309,13 +326,17 @@ pub fn retrieve_asset( }; let url_fragment = get_url_fragment(url); let data_url = data_to_data_url(&media_type, &data, url, &url_fragment); + // Add to cache - cache.insert(new_cache_key, data_url.clone()); + cache.insert(new_cache_key, data); + Ok((data_url, res_url)) } else { let content = response.text().unwrap(); + // Add to cache - cache.insert(new_cache_key, content.clone()); + cache.insert(new_cache_key, content.as_bytes().to_vec()); + Ok((content, res_url)) } }