From 614a518475e74b967c606c7ed98099872774669b Mon Sep 17 00:00:00 2001 From: Sunshine Date: Fri, 25 Dec 2020 21:54:52 -1000 Subject: [PATCH] fix srcset parsing --- Cargo.lock | 37 +++++++++++++++++++++++++++++ Cargo.toml | 1 + src/html.rs | 5 ++-- src/tests/html/embed_srcset.rs | 43 +++++++++++++++++++++++++++++++--- 4 files changed, 81 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bb23aa6..6fb1ae5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,6 +5,14 @@ name = "adler" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "ansi_term" version = "0.11.0" @@ -634,6 +642,7 @@ dependencies = [ "clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)", "cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.10.9 (registry+https://github.com/rust-lang/crates.io-index)", "sha2 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1064,6 +1073,22 @@ name = "redox_syscall" version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "regex" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.21 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -1304,6 +1329,14 @@ dependencies = [ "unicode-width 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "time" version = "0.1.43" @@ -1623,6 +1656,7 @@ dependencies = [ [metadata] "checksum adler 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" +"checksum aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum assert_cmd 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c88b9ca26f9c16ec830350d309397e74ee9abdfd8eb1f71cb6ecc71a3fc818da" "checksum async-compression 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9021768bcce77296b64648cc7a7460e3df99979b97ed5c925c38d1cc83778d98" @@ -1749,6 +1783,8 @@ dependencies = [ "checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" "checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" "checksum redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)" = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" +"checksum regex 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" +"checksum regex-syntax 0.6.21 (registry+https://github.com/rust-lang/crates.io-index)" = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" "checksum remove_dir_all 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" "checksum reqwest 0.10.9 (registry+https://github.com/rust-lang/crates.io-index)" = "fb15d6255c792356a0f578d8a645c677904dc02e862bebe2ecc18e0c01b9a0ce" "checksum ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" @@ -1774,6 +1810,7 @@ dependencies = [ "checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" "checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" "checksum tinyvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "53953d2d3a5ad81d9f844a32f14ebb121f50b650cd59d0ee2a07cf13c617efed" "checksum tokio 0.2.22 (registry+https://github.com/rust-lang/crates.io-index)" = "5d34ca54d84bf2b5b4d7d31e901a8464f7b60ac145a284fba25ceb801f2ddccd" diff --git a/Cargo.toml b/Cargo.toml index e9926e9..fd9062f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ chrono = "0.4.19" # Used for formatting creation timestamp clap = "2.33.3" cssparser = "0.27.2" html5ever = "0.24.1" +regex = "1.4.2" # Used for parsing srcset sha2 = "0.9.2" # Used for calculating checksums during integrity checks url = "2.2.0" diff --git a/src/html.rs b/src/html.rs index 9eae562..4c2b582 100644 --- a/src/html.rs +++ b/src/html.rs @@ -7,6 +7,7 @@ use html5ever::serialize::{serialize, SerializeOpts}; use html5ever::tendril::{format_tendril, TendrilSink}; use html5ever::tree_builder::{Attribute, TreeSink}; use html5ever::{local_name, namespace_url, ns, LocalName}; +use regex::Regex; use reqwest::blocking::Client; use reqwest::Url; use sha2::{Digest, Sha256, Sha384, Sha512}; @@ -156,8 +157,8 @@ pub fn embed_srcset( depth: u32, ) -> String { let mut array: Vec = vec![]; - let srcset_items: Vec<&str> = srcset.split(',').collect(); - for srcset_item in srcset_items { + let re = Regex::new(r",\s+").unwrap(); + for srcset_item in re.split(srcset) { let parts: Vec<&str> = srcset_item.trim().split_whitespace().collect(); if parts.len() > 0 { let path = parts[0].trim(); diff --git a/src/tests/html/embed_srcset.rs b/src/tests/html/embed_srcset.rs index 0fa2b2b..a2c2f59 100644 --- a/src/tests/html/embed_srcset.rs +++ b/src/tests/html/embed_srcset.rs @@ -24,8 +24,45 @@ mod passing { let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); assert_eq!( + embedded_css, format!("{} 1x, {} 2x", empty_image!(), empty_image!()), - embedded_css + ); + } + + #[test] + fn commas_within_file_names() { + let cache = &mut HashMap::new(); + let client = Client::new(); + let srcset_value = "small,s.png 1x, large,l.png 2x"; + let mut options = Options::default(); + options.no_images = true; + options.silent = true; + let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); + + assert_eq!( + embedded_css, + format!("{} 1x, {} 2x", empty_image!(), empty_image!()), + ); + } + + #[test] + fn tabs_and_newlines_after_commas() { + let cache = &mut HashMap::new(); + let client = Client::new(); + let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x"; + let mut options = Options::default(); + options.no_images = true; + options.silent = true; + let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); + + assert_eq!( + embedded_css, + format!( + "{} 1x, {} 2x, {} 3x", + empty_image!(), + empty_image!(), + empty_image!() + ), ); } } @@ -56,8 +93,8 @@ mod failing { let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0); assert_eq!( - format!("{} 1x, {} 2x", empty_image!(), empty_image!()), - embedded_css + embedded_css, + format!("{} 1x, {} 2x,", empty_image!(), empty_image!()), ); } }