diff --git a/Cargo.lock b/Cargo.lock index f417d39..b72db67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,14 +5,6 @@ name = "adler32" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "aho-corasick" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "ansi_term" version = "0.11.0" @@ -146,6 +138,31 @@ dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", + "dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "difference" version = "2.0.0" @@ -161,6 +178,14 @@ name = "dtoa" version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "dtoa-short" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "encoding_rs" version = "0.8.20" @@ -543,15 +568,15 @@ dependencies = [ [[package]] name = "monolith" -version = "2.2.0" +version = "2.2.1" dependencies = [ "assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)", "html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -648,6 +673,16 @@ dependencies = [ "phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf_macros 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-hack 0.5.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "phf_codegen" version = "0.7.24" @@ -666,6 +701,28 @@ dependencies = [ "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "phf_generator 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-hack 0.5.14 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "phf_shared" version = "0.7.24" @@ -674,6 +731,14 @@ dependencies = [ "siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "pin-project" version = "0.4.6" @@ -740,6 +805,11 @@ dependencies = [ "treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "proc-macro2" version = "1.0.6" @@ -784,6 +854,7 @@ dependencies = [ "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -881,6 +952,14 @@ dependencies = [ "rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rand_xorshift" version = "0.1.1" @@ -902,22 +981,6 @@ name = "redox_syscall" version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "regex" -version = "1.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "regex-syntax" -version = "0.6.14" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "remove_dir_all" version = "0.5.2" @@ -1038,6 +1101,11 @@ name = "siphasher" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "siphasher" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "slab" version = "0.4.2" @@ -1130,14 +1198,6 @@ dependencies = [ "unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "thread_local" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "time" version = "0.1.42" @@ -1434,7 +1494,6 @@ dependencies = [ [metadata] "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" -"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" "checksum assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6283bac8dd7226470d491bc4737816fea4ca1fba7a2847f2e9097fd6bfb4624c" @@ -1453,9 +1512,12 @@ dependencies = [ "checksum core-foundation 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "25b9e03f145fd4f2bf705e07b900cd41fc636598fe5dc452fd0db1441c3f496d" "checksum core-foundation-sys 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e7ca8a5221364ef15ce201e8ed2f609fc312682a8f4e0e3d4aa5879764e0fa3b" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +"checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +"checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97" "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" +"checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2" "checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9" "checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d" "checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f" @@ -1510,9 +1572,13 @@ dependencies = [ "checksum openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)" = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f" "checksum percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" "checksum phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18" +"checksum phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" "checksum phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e" "checksum phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662" +"checksum phf_generator 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +"checksum phf_macros 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" "checksum phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" +"checksum phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" "checksum pin-project 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "94b90146c7216e4cb534069fb91366de4ea0ea353105ee45ed297e2d1619e469" "checksum pin-project-internal 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "44ca92f893f0656d3cba8158dd0f2b99b94de256a4a54e870bd6922fcc6c8355" "checksum pin-project-lite 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e8822eb8bb72452f038ebf6048efa02c3fe22bf83f76519c9583e47fc194a422" @@ -1523,6 +1589,7 @@ dependencies = [ "checksum predicates 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a9bfe52247e5cc9b2f943682a85a5549fb9662245caf094504e69a2f03fe64d4" "checksum predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178" "checksum predicates-tree 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124" +"checksum proc-macro-hack 0.5.14 (registry+https://github.com/rust-lang/crates.io-index)" = "fcfdefadc3d57ca21cf17990a28ef4c0f7c61383a28cb7604cf4a18e6ede1420" "checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" "checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" @@ -1538,11 +1605,10 @@ dependencies = [ "checksum rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" "checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" "checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" +"checksum rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" "checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" "checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" "checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" -"checksum regex 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" -"checksum regex-syntax 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)" = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" "checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" "checksum reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "03c6cbd2bc1c1cb7052dbe30f4a70cf65811967c800f2dfbb2e6036dc9ee2553" "checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" @@ -1554,6 +1620,7 @@ dependencies = [ "checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043" "checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97" "checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" +"checksum siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23" "checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" "checksum smallvec 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecf3b85f68e8abaa7555aa5abdb1153079387e60b718283d732f03897fcfc86" "checksum sourcefile 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4bf77cb82ba8453b42b6ae1d692e4cdc92f9a47beaf89a847c8be83f4e328ad3" @@ -1565,7 +1632,6 @@ dependencies = [ "checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" "checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" "checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" "checksum tokio 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0e1bef565a52394086ecac0a6fa3b8ace4cb3a138ee1d96bd2b93283b56824e3" "checksum tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bde02a3a5291395f59b06ec6945a3077602fac2b07eeeaf0dee2122f3619828" diff --git a/Cargo.toml b/Cargo.toml index 52a85eb..e409ed3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "monolith" -version = "2.2.0" +version = "2.2.1" edition = "2018" authors = [ "Sunshine ", @@ -14,9 +14,8 @@ description = "CLI tool for saving web pages as a single HTML file" [dependencies] base64 = "0.11.0" clap = "2.33.0" +cssparser = "0.27.2" html5ever = "0.24.1" -lazy_static = "1.4.0" -regex = "1.3.4" url = "2.1.1" [dependencies.reqwest] @@ -26,3 +25,4 @@ features = ["default-tls", "blocking", "gzip"] [dev-dependencies] assert_cmd = "0.12.0" +tempfile = "3.1.0" diff --git a/src/css.rs b/src/css.rs new file mode 100644 index 0000000..0be8eff --- /dev/null +++ b/src/css.rs @@ -0,0 +1,370 @@ +use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token}; +use reqwest::blocking::Client; +use std::collections::HashMap; + +use crate::utils::{data_to_data_url, decode_url, get_url_fragment, resolve_url, retrieve_asset}; + +const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ + "background", + "background-image", + "border", + "border-image", + "border-image-source", + "content", + "cursor", + "list-style", + "list-style-image", + "mask", + "mask-image", +]; + +const TRANSPARENT_PIXEL: &str = "data:image/png;base64,\ + iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="; + +pub fn is_image_url_prop(prop_name: &str) -> bool { + CSS_PROPS_WITH_IMAGE_URLS + .iter() + .find(|p| prop_name.eq_ignore_ascii_case(p)) + .is_some() +} + +pub fn enquote(input: String, double: bool) -> String { + if double { + format!("\"{}\"", input.replace("\"", "\\\"")) + } else { + format!("'{}'", input.replace("'", "\\'")) + } +} + +pub fn process_css<'a>( + cache: &mut HashMap, + client: &Client, + parent_url: &str, + parser: &mut Parser, + rule_name: &str, + prop_name: &str, + func_name: &str, + opt_no_images: bool, + opt_silent: bool, +) -> Result> { + let mut result: String = str!(); + + let mut curr_rule: String = str!(rule_name.clone()); + let mut curr_prop: String = str!(prop_name.clone()); + let mut token: &Token; + let mut token_offset: SourcePosition; + + loop { + token_offset = parser.position(); + token = match parser.next_including_whitespace_and_comments() { + Ok(token) => token, + Err(_) => { + break; + } + }; + + match *token { + Token::Comment(_) => { + let token_slice = parser.slice_from(token_offset); + result.push_str(str!(token_slice).as_str()); + } + Token::Semicolon => result.push_str(";"), + Token::Colon => result.push_str(":"), + Token::Comma => result.push_str(","), + Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => { + let closure: &str; + if token == &Token::ParenthesisBlock { + result.push_str("("); + closure = ")"; + } else if token == &Token::SquareBracketBlock { + result.push_str("["); + closure = "]"; + } else { + result.push_str("{"); + closure = "}"; + } + + let block_css: String = parser + .parse_nested_block(|parser| { + process_css( + cache, + client, + parent_url, + parser, + rule_name, + curr_prop.as_str(), + func_name, + opt_no_images, + opt_silent, + ) + }) + .unwrap(); + result.push_str(block_css.as_str()); + + result.push_str(closure); + } + Token::CloseParenthesis => result.push_str(")"), + Token::CloseSquareBracket => result.push_str("]"), + Token::CloseCurlyBracket => result.push_str("}"), + Token::IncludeMatch => result.push_str("~="), + Token::DashMatch => result.push_str("|="), + Token::PrefixMatch => result.push_str("^="), + Token::SuffixMatch => result.push_str("$="), + Token::SubstringMatch => result.push_str("*="), + Token::CDO => result.push_str(""), + Token::WhiteSpace(ref value) => { + result.push_str(value); + } + Token::Ident(ref value) => { + curr_prop = str!(value); + result.push_str(value); + } + Token::AtKeyword(ref value) => { + curr_rule = str!(value); + result.push_str("@"); + result.push_str(value); + } + Token::Hash(ref value) => { + result.push_str("#"); + result.push_str(value); + } + Token::QuotedString(ref value) => { + let is_import: bool = curr_rule == "import"; + if is_import { + // Reset current at-rule value + curr_rule = str!(); + } + + if is_import { + // Skip empty import values + if value.len() < 1 { + result.push_str("''"); + continue; + } + + let full_url = resolve_url(&parent_url, value).unwrap_or_default(); + let url_fragment = get_url_fragment(full_url.clone()); + let full_url_decoded = decode_url(full_url); + let (css, final_url) = retrieve_asset( + cache, + client, + &parent_url, + &full_url_decoded, + false, + "", + opt_silent, + ) + .unwrap_or_default(); + + result.push_str( + enquote( + data_to_data_url( + "text/css", + embed_css( + cache, + client, + final_url.as_str(), + &css, + opt_no_images, + opt_silent, + ) + .as_bytes(), + &final_url, + url_fragment.as_str(), + ), + false, + ) + .as_str(), + ); + } else { + if func_name == "url" { + // Skip empty url()'s + if value.len() < 1 { + continue; + } + + if opt_no_images && is_image_url_prop(curr_prop.as_str()) { + result.push_str(enquote(str!(TRANSPARENT_PIXEL), false).as_str()); + } else { + let resolved_url = resolve_url(&parent_url, value).unwrap_or_default(); + let (data_url, _final_url) = retrieve_asset( + cache, + client, + &parent_url, + &resolved_url, + true, + "", + opt_silent, + ) + .unwrap_or_default(); + result.push_str(enquote(data_url, false).as_str()); + } + } else { + result.push_str(enquote(str!(value), false).as_str()); + } + } + } + Token::Number { + ref has_sign, + ref value, + .. + } => { + if *has_sign && *value >= 0. { + result.push_str("+"); + } + result.push_str(&value.to_string()) + } + Token::Percentage { + ref has_sign, + ref unit_value, + .. + } => { + if *has_sign { + result.push_str("-"); + } + result.push_str(str!(unit_value * 100.).as_str()); + result.push_str("%"); + } + Token::Dimension { + ref value, + ref unit, + .. + } => { + result.push_str(str!(value).as_str()); + result.push_str(str!(unit).as_str()); + } + Token::IDHash(ref value) => { + result.push_str("#"); + result.push_str(value); + } + Token::UnquotedUrl(ref value) => { + let is_import: bool = curr_rule == "import"; + if is_import { + // Reset current at-rule value + curr_rule = str!(); + } + + // Skip empty url()'s + if value.len() < 1 { + result.push_str("url()"); + continue; + } else if value.starts_with("#") { + result.push_str("url("); + result.push_str(value); + result.push_str(")"); + continue; + } + + result.push_str("url("); + if is_import { + let full_url = resolve_url(&parent_url, value).unwrap_or_default(); + let url_fragment = get_url_fragment(full_url.clone()); + let full_url_decoded = decode_url(full_url); + let (css, final_url) = retrieve_asset( + cache, + client, + &parent_url, + &full_url_decoded, + false, + "", + opt_silent, + ) + .unwrap_or_default(); + + result.push_str( + enquote( + data_to_data_url( + "text/css", + embed_css( + cache, + client, + final_url.as_str(), + &css, + opt_no_images, + opt_silent, + ) + .as_bytes(), + &final_url, + url_fragment.as_str(), + ), + false, + ) + .as_str(), + ); + } else { + if opt_no_images && is_image_url_prop(curr_prop.as_str()) { + result.push_str(enquote(str!(TRANSPARENT_PIXEL), false).as_str()); + } else { + let full_url = resolve_url(&parent_url, value).unwrap_or_default(); + let (data_url, _final_url) = retrieve_asset( + cache, + client, + &parent_url, + &full_url, + true, + "", + opt_silent, + ) + .unwrap_or_default(); + result.push_str(enquote(data_url, false).as_str()); + } + } + result.push_str(")"); + } + Token::Delim(ref value) => result.push_str(&value.to_string()), + Token::Function(ref name) => { + let function_name: &str = &name.clone(); + result.push_str(function_name); + result.push_str("("); + + let block_css: String = parser + .parse_nested_block(|parser| { + process_css( + cache, + client, + parent_url, + parser, + curr_rule.as_str(), + curr_prop.as_str(), + function_name, + opt_no_images, + opt_silent, + ) + }) + .unwrap(); + result.push_str(block_css.as_str()); + + result.push_str(")"); + } + Token::BadUrl(_) | Token::BadString(_) => {} + } + } + + Ok(result) +} + +pub fn embed_css( + cache: &mut HashMap, + client: &Client, + parent_url: &str, + css: &str, + opt_no_images: bool, + opt_silent: bool, +) -> String { + let mut input = ParserInput::new(&css); + let mut parser = Parser::new(&mut input); + + process_css( + cache, + client, + parent_url, + &mut parser, + "", + "", + "", + opt_no_images, + opt_silent, + ) + .unwrap() +} diff --git a/src/html.rs b/src/html.rs index 9f4a4f0..66b319f 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,8 +1,6 @@ +use crate::css::embed_css; use crate::js::attr_is_event_handler; -use crate::utils::{ - data_to_data_url, is_http_url, resolve_css_imports, resolve_url, retrieve_asset, - url_has_protocol, -}; +use crate::utils::{data_to_data_url, is_http_url, resolve_url, retrieve_asset, url_has_protocol}; use html5ever::interface::QualName; use html5ever::parse_document; use html5ever::rcdom::{Handle, NodeData, RcDom}; @@ -164,16 +162,22 @@ pub fn walk_and_embed_assets( opt_silent, ) { // On successful retrieval, traverse CSS - Ok((css_data, _)) => resolve_css_imports( - cache, - client, - &css_data, - true, - &url, - &href_full_url, - opt_no_images, - opt_silent, - ), + Ok((css_data, final_url)) => { + let x: String = embed_css( + cache, + client, + &final_url, + &css_data, + opt_no_images, + opt_silent, + ); + data_to_data_url( + "text/css", + x.as_bytes(), + &final_url, + "", + ) + } // If a network error occured, warn Err(e) => { @@ -402,13 +406,11 @@ pub fn walk_and_embed_assets( for node in node.children.borrow_mut().iter_mut() { if let NodeData::Text { ref contents } = node.data { let mut tendril = contents.borrow_mut(); - let replacement = resolve_css_imports( + let replacement = embed_css( cache, client, - tendril.as_ref(), - false, - &url, &url, + tendril.as_ref(), opt_no_images, opt_silent, ); @@ -473,7 +475,7 @@ pub fn walk_and_embed_assets( ); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); - let iframe_data_url = data_to_data_url("text/html", &buf, ""); + let iframe_data_url = data_to_data_url("text/html", &buf, "", ""); attr.value.clear(); attr.value.push_slice(iframe_data_url.as_str()); } @@ -518,7 +520,7 @@ pub fn walk_and_embed_assets( // Get rid of style attributes let mut style_attr_indexes = Vec::new(); for (i, attr) in attrs_mut.iter_mut().enumerate() { - if attr.name.local.to_lowercase() == "style" { + if attr.name.local.as_ref().eq_ignore_ascii_case("style") { style_attr_indexes.push(i); } } @@ -532,16 +534,15 @@ pub fn walk_and_embed_assets( .iter_mut() .filter(|a| a.name.local.as_ref().eq_ignore_ascii_case("style")) { - let replacement = resolve_css_imports( + let replacement = embed_css( cache, client, - attribute.value.as_ref(), - false, - &url, &url, + attribute.value.as_ref(), opt_no_images, opt_silent, ); + // let replacement = str!(); attribute.value.clear(); attribute.value.push_slice(&replacement); } diff --git a/src/lib.rs b/src/lib.rs index 348c4ff..a9034d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,7 @@ -#[macro_use] -extern crate lazy_static; - #[macro_use] mod macros; +pub mod css; pub mod html; pub mod js; pub mod utils; diff --git a/src/main.rs b/src/main.rs index 22147bf..3e7a06b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,3 @@ -#[macro_use] -extern crate clap; - -mod args; -mod macros; - -use crate::args::AppArgs; use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset}; use reqwest::blocking::Client; @@ -17,6 +10,13 @@ use std::path::Path; use std::process; use std::time::Duration; +mod args; +mod macros; + +#[macro_use] +extern crate clap; +use crate::args::AppArgs; + enum Output { Stdout(io::Stdout), File(fs::File), @@ -47,7 +47,7 @@ impl Output { fn main() { let app_args = AppArgs::get(); - let mut original_target: String = app_args.url_target.clone(); + let original_target: &str = &app_args.url_target; let target_url: &str; let base_url; let dom; @@ -55,33 +55,34 @@ fn main() { // Pre-process the input let cwd_normalized: String = str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/"); - let path = Path::new(original_target.as_str()); + let path = Path::new(original_target); + let mut target: String = str!(original_target.clone()).replace("\\", "/"); let path_is_relative: bool = path.is_relative(); - if original_target.clone().len() == 0 { + + if target.clone().len() == 0 { eprintln!("No target specified"); process::exit(1); - } else if is_http_url(original_target.clone()) || is_data_url(original_target.clone()) { - target_url = original_target.as_str(); - } else if is_file_url(original_target.clone()) { - target_url = original_target.as_str(); + } else if is_http_url(target.clone()) || is_data_url(target.clone()) { + target_url = target.as_str(); + } else if is_file_url(target.clone()) { + target_url = target.as_str(); } else if path.exists() { if !path.is_file() { eprintln!("Local target is not a file: {}", original_target); process::exit(1); } - original_target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" }); - original_target = original_target.replace("\\", "/"); + target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" }); if path_is_relative { - original_target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized); - original_target.insert_str( + target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized); + target.insert_str( if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(), "/", ); } - target_url = original_target.as_str(); + target_url = target.as_str(); } else { - original_target.insert_str(0, "http://"); - target_url = original_target.as_str(); + target.insert_str(0, "http://"); + target_url = target.as_str(); } let mut output = Output::new(&app_args.output).expect("Could not prepare output"); diff --git a/src/tests/cli.rs b/src/tests/cli.rs index 9e9527b..b2732de 100644 --- a/src/tests/cli.rs +++ b/src/tests/cli.rs @@ -1,6 +1,8 @@ use assert_cmd::prelude::*; use std::env; +use std::io::Write; use std::process::Command; +use tempfile::NamedTempFile; // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ @@ -316,21 +318,22 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box Result<(), Box> { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; - let cwd = env::current_dir().unwrap(); + let cwd_normalized: String = + str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/"); let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; let out = cmd .arg("-cji") .arg(if cfg!(windows) { format!( - "{file}{cwd}\\src\\tests\\data\\local-file.html", + "{file}{cwd}/src/tests/data/local-file.html", file = file_url_protocol, - cwd = cwd.to_str().unwrap(), + cwd = cwd_normalized, ) } else { format!( "{file}{cwd}/src/tests/data/local-file.html", file = file_url_protocol, - cwd = cwd.to_str().unwrap(), + cwd = cwd_normalized, ) }) .output() @@ -357,15 +360,15 @@ fn passing_local_file_url_target_input() -> Result<(), Box Result<(), Box> { + let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" }; + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let mut file_svg = NamedTempFile::new()?; + writeln!(file_svg, "\ +\ +\ +SVG\ +\n")?; + let mut file_html = NamedTempFile::new()?; + writeln!( + file_html, + "
\n", + file = file_url_prefix, + path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"), + )?; + let out = cmd.arg(file_html.path()).output().unwrap(); + + // STDOUT should contain HTML with date URL for background-image in it + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "
\n", + file = file_url_prefix, + css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"), + )?; + let out = cmd.arg(file_html.path()).output().unwrap(); + + // STDOUT should contain embedded CSS url()'s + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "\n\n\n" + ); + + // STDERR should list temporary files that got retrieved + assert_eq!( + std::str::from_utf8(&out.stderr).unwrap(), + format!( + "\ +{file}{html_path}\n\ +{file}{css_path}\n\ +{file}{css_path}\n\ +{file}{css_path}\n\ +", + file = file_url_prefix, + html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"), + css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"), + ) + ); + + // The exit code should be 0 + out.assert().code(0); + + Ok(()) +} diff --git a/src/tests/css/embed_css.rs b/src/tests/css/embed_css.rs new file mode 100644 index 0000000..2304b39 --- /dev/null +++ b/src/tests/css/embed_css.rs @@ -0,0 +1,200 @@ +use reqwest::blocking::Client; +use std::collections::HashMap; + +use crate::css; + +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[test] +fn passing_empty_input() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + assert_eq!(css::embed_css(cache, &client, "", "", false, false,), ""); +} + +#[test] +fn passing_style_exclude_unquoted_images() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + const STYLE: &str = "/* border: none;*/\ +background-image: url(https://somewhere.com/bg.png); \ +list-style: url(/assets/images/bullet.svg);\ +width:99.998%; \ +margin-top: -20px; \ +line-height: -1; \ +height: calc(100vh - 10pt)"; + + assert_eq!( + css::embed_css( + cache, + &client, + "https://doesntmatter.local/", + &STYLE, + true, + true, + ), + "/* border: none;*/\ +background-image: url(''); \ +list-style: url('');\ +width:99.998%; \ +margin-top: -20px; \ +line-height: -1; \ +height: calc(100vh - 10pt)" + ); +} + +#[test] +fn passing_style_exclude_single_quoted_images() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + const STYLE: &str = "/* border: none;*/\ +background-image: url('https://somewhere.com/bg.png'); \ +list-style: url('/assets/images/bullet.svg');\ +width:99.998%; \ +margin-top: -20px; \ +line-height: -1; \ +height: calc(100vh - 10pt)"; + + assert_eq!( + css::embed_css( + cache, + &client, + "", + &STYLE, + true, + true, + ), + "/* border: none;*/\ +background-image: url(''); \ +list-style: url('');\ +width:99.998%; \ +margin-top: -20px; \ +line-height: -1; \ +height: calc(100vh - 10pt)" + ); +} + +#[test] +fn passing_style_block() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + const CSS: &str = "\ +#id.class-name:not(:nth-child(3n+0)) {\n \ + // border: none;\n \ + background-image: url('');\n\ +}\n\ +\n\ +html > body {}"; + + assert_eq!( + css::embed_css(cache, &client, "file:///", &CSS, false, true,), + CSS + ); +} + +#[test] +fn passing_attribute_selectors() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + const CSS: &str = "\ +[data-value] { + /* Attribute exists */ +} + +[data-value='foo'] { + /* Attribute has this exact value */ +} + +[data-value*='foo'] { + /* Attribute value contains this value somewhere in it */ +} + +[data-value~='foo'] { + /* Attribute has this value in a space-separated list somewhere */ +} + +[data-value^='foo'] { + /* Attribute value starts with this */ +} + +[data-value|='foo'] { + /* Attribute value starts with this in a dash-separated list */ +} + +[data-value$='foo'] { + /* Attribute value ends with this */ +} +"; + + assert_eq!(css::embed_css(cache, &client, "", &CSS, false, false,), CSS); +} + +#[test] +fn passing_import_string() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + const CSS: &str = "\ +@charset 'UTF-8';\n\ +\n\ +@import 'data:text/css,html{background-color:%23000}';\n\ +\n\ +@import url('data:text/css,html{color:%23fff}')\n\ +"; + + assert_eq!( + css::embed_css( + cache, + &client, + "https://doesntmatter.local/", + &CSS, + false, + true, + ), + "\ +@charset 'UTF-8';\n\ +\n\ +@import 'data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2JhY2tncm91bmQtY29sb3I6IzAwMH0=';\n\ +\n\ +@import url('data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2NvbG9yOiNmZmZ9')\n\ +" + ); +} + +#[test] +fn passing_hash_urls() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + const CSS: &str = "\ +body {\n \ + behavior: url(#default#something);\n\ +}\n\ +\n\ +.scissorHalf {\n \ + offset-path: url(#somePath);\n\ +}\n\ +"; + + assert_eq!( + css::embed_css( + cache, + &client, + "https://doesntmatter.local/", + &CSS, + false, + true, + ), + CSS + ); +} diff --git a/src/tests/css/enquote.rs b/src/tests/css/enquote.rs new file mode 100644 index 0000000..21304b0 --- /dev/null +++ b/src/tests/css/enquote.rs @@ -0,0 +1,50 @@ +use crate::css; + +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[test] +fn passing_empty_input_single_quotes() { + assert_eq!(css::enquote(str!(""), false), "''"); +} + +#[test] +fn passing_empty_input_double_quotes() { + assert_eq!(css::enquote(str!(""), true), "\"\""); +} + +#[test] +fn passing_apostrophes_single_quotes() { + assert_eq!( + css::enquote(str!("It's a lovely day, don't you think?"), false), + "'It\\'s a lovely day, don\\'t you think?'" + ); +} + +#[test] +fn passing_apostrophes_double_quotes() { + assert_eq!( + css::enquote(str!("It's a lovely day, don't you think?"), true), + "\"It's a lovely day, don't you think?\"" + ); +} + +#[test] +fn passing_feet_and_inches_single_quotes() { + assert_eq!( + css::enquote(str!("5'2\", 6'5\""), false), + "'5\\'2\", 6\\'5\"'" + ); +} + +#[test] +fn passing_feet_and_inches_double_quotes() { + assert_eq!( + css::enquote(str!("5'2\", 6'5\""), true), + "\"5'2\\\", 6'5\\\"\"" + ); +} diff --git a/src/tests/css/is_image_url_prop.rs b/src/tests/css/is_image_url_prop.rs new file mode 100644 index 0000000..19f71fd --- /dev/null +++ b/src/tests/css/is_image_url_prop.rs @@ -0,0 +1,88 @@ +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod passing { + use crate::css; + + #[test] + fn backrgound() { + assert!(css::is_image_url_prop("background")); + } + + #[test] + fn backrgound_image() { + assert!(css::is_image_url_prop("background-image")); + } + + #[test] + fn backrgound_image_uppercase() { + assert!(css::is_image_url_prop("BACKGROUND-IMAGE")); + } + + #[test] + fn border_image() { + assert!(css::is_image_url_prop("border-image")); + } + + #[test] + fn content() { + assert!(css::is_image_url_prop("content")); + } + + #[test] + fn cursor() { + assert!(css::is_image_url_prop("cursor")); + } + + #[test] + fn list_style() { + assert!(css::is_image_url_prop("list-style")); + } + + #[test] + fn list_style_image() { + assert!(css::is_image_url_prop("list-style-image")); + } + + #[test] + fn mask_image() { + assert!(css::is_image_url_prop("mask-image")); + } +} + +// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ +// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ +// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ +// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod failing { + use crate::css; + + #[test] + fn empty() { + assert!(!css::is_image_url_prop("")); + } + + #[test] + fn width() { + assert!(!css::is_image_url_prop("width")); + } + + #[test] + fn color() { + assert!(!css::is_image_url_prop("color")); + } + + #[test] + fn z_index() { + assert!(!css::is_image_url_prop("z-index")); + } +} diff --git a/src/tests/css/mod.rs b/src/tests/css/mod.rs new file mode 100644 index 0000000..5f17fd3 --- /dev/null +++ b/src/tests/css/mod.rs @@ -0,0 +1,3 @@ +mod embed_css; +mod enquote; +mod is_image_url_prop; diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 6b77599..0d1368a 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,4 +1,5 @@ mod cli; +mod css; mod html; mod js; mod utils; diff --git a/src/tests/utils/data_to_data_url.rs b/src/tests/utils/data_to_data_url.rs index b5ed0ca..0ee0604 100644 --- a/src/tests/utils/data_to_data_url.rs +++ b/src/tests/utils/data_to_data_url.rs @@ -11,10 +11,18 @@ use crate::utils; fn passing_encode_string_with_specific_media_type() { let mime = "application/javascript"; let data = "var word = 'hello';\nalert(word);\n"; - let data_url = utils::data_to_data_url(mime, data.as_bytes(), ""); + let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", ""); assert_eq!( &data_url, "data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK" ); } + +#[test] +fn passing_encode_append_fragment() { + let data = "\n"; + let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment"); + + assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment"); +} diff --git a/src/tests/utils/decode_url.rs b/src/tests/utils/decode_url.rs index e839ac1..6639e83 100644 --- a/src/tests/utils/decode_url.rs +++ b/src/tests/utils/decode_url.rs @@ -18,6 +18,9 @@ fn passing_decode_unicode_characters() { } #[test] -fn passing_decode_whitespaces() { - assert_eq!(utils::decode_url(str!("%20 %20")), " "); +fn passing_decode_file_url() { + assert_eq!( + utils::decode_url(str!("file:///tmp/space%20here/test%231.html")), + "file:///tmp/space here/test#1.html" + ); } diff --git a/src/tests/utils/file_url_to_fs_path.rs b/src/tests/utils/file_url_to_fs_path.rs new file mode 100644 index 0000000..b8efa29 --- /dev/null +++ b/src/tests/utils/file_url_to_fs_path.rs @@ -0,0 +1,23 @@ +use crate::utils; + +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[test] +fn passing_remove_protocl_and_fragment() { + if cfg!(windows) { + assert_eq!( + utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"), + "C:\\documents\\some-path\\some-file.svg" + ); + } else { + assert_eq!( + utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"), + "/tmp/some-path/some-file.svg" + ); + } +} diff --git a/src/tests/utils/get_url_fragment.rs b/src/tests/utils/get_url_fragment.rs new file mode 100644 index 0000000..b0be8d7 --- /dev/null +++ b/src/tests/utils/get_url_fragment.rs @@ -0,0 +1,23 @@ +use crate::utils; + +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[test] +fn passing_data_url() { + assert_eq!( + utils::get_url_fragment( + "#test" + ), + "test" + ); +} + +#[test] +fn passing_https_empty() { + assert_eq!(utils::get_url_fragment("https://kernel.org#"), ""); +} diff --git a/src/tests/utils/mod.rs b/src/tests/utils/mod.rs index c11f975..8e5d95e 100644 --- a/src/tests/utils/mod.rs +++ b/src/tests/utils/mod.rs @@ -3,6 +3,8 @@ mod data_to_data_url; mod data_url_to_text; mod decode_url; mod detect_media_type; +mod file_url_to_fs_path; +mod get_url_fragment; mod is_data_url; mod is_file_url; mod is_http_url; diff --git a/src/tests/utils/resolve_url.rs b/src/tests/utils/resolve_url.rs index c9090ce..7491cc8 100644 --- a/src/tests/utils/resolve_url.rs +++ b/src/tests/utils/resolve_url.rs @@ -1,6 +1,7 @@ -use crate::utils; use url::ParseError; +use crate::utils; + // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ @@ -171,6 +172,42 @@ fn passing_from_data_url_to_file_url() -> Result<(), ParseError> { Ok(()) } +#[test] +fn passing_preserve_fragment() -> Result<(), ParseError> { + let resolved_url = utils::resolve_url( + "http://doesnt-matter.local/", + "css/fonts/fontmarvelous.svg#fontmarvelous", + ) + .unwrap_or(str!()); + + assert_eq!( + resolved_url.as_str(), + "http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous" + ); + + Ok(()) +} + +#[test] +fn passing_resolve_from_file_url_to_file_url() -> Result<(), ParseError> { + let resolved_url = if cfg!(windows) { + utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!()) + } else { + utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!()) + }; + + assert_eq!( + resolved_url.as_str(), + if cfg!(windows) { + "file:///c:/image.png" + } else { + "file:///tmp/image.png" + } + ); + + Ok(()) +} + // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ diff --git a/src/utils.rs b/src/utils.rs index 8b5398b..bb75ad5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,5 +1,4 @@ use base64; -use regex::Regex; use reqwest::blocking::Client; use reqwest::header::CONTENT_TYPE; use std::collections::HashMap; @@ -7,41 +6,6 @@ use std::fs; use std::path::Path; use url::{form_urlencoded, ParseError, Url}; -/// This monster of a regex is used to match any kind of URL found in CSS. -/// -/// There are roughly three different categories that a found URL could fit -/// into: -/// - Font [found after a src: property in an @font-family rule] -/// - Stylesheet [denoted by an @import before the url -/// - Image [covers all other uses of the url() function] -/// -/// This regex aims to extract the following information: -/// - What type of URL is it (font/image/css) -/// - Where is the part that needs to be replaced (incl any wrapping quotes) -/// - What is the URL (excl any wrapping quotes) -/// -/// Essentially, the regex can be broken down into two parts: -/// -/// `(?:(?P@import)|(?Psrc\s*:)\s+)?` -/// This matches the precursor to a font or CSS URL, and fills in a match under -/// either `` (if it's a CSS URL) or `` (if it's a font). -/// Determining whether or not it's an image can be done by the negation of both -/// of these. Either zero or one of these can match. -/// -/// `url\((?P['"]?(?P[^"'\)]+)['"]?)\)` -/// This matches the actual URL part of the url(), and must always match. It also -/// sets `` and `` which correspond to everything within -/// `url(...)` and a usable URL, respectively. -/// -/// Note, however, that this does not perform any validation of the found URL. -/// Malformed CSS could lead to an invalid URL being present. It is therefore -/// recomended that the URL gets manually validated. -const CSS_URL_REGEX_STR: &str = r###"(?:(?:(?P@import)|(?Psrc\s*:))\s+)?url\((?P['"]?(?P[^"'\)]+)['"]?)\)"###; - -lazy_static! { - static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap(); -} - const MAGIC: [[&[u8]; 2]; 18] = [ // Image [b"GIF87a", b"image/gif"], @@ -66,13 +30,24 @@ const MAGIC: [[&[u8]; 2]; 18] = [ [b"\x1A\x45\xDF\xA3", b"video/webm"], ]; -pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String { - let media_type = if media_type.is_empty() { +pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String { + let media_type: String = if media_type.is_empty() { detect_media_type(data, &url) } else { media_type.to_string() }; - format!("data:{};base64,{}", media_type, base64::encode(data)) + let hash: String = if fragment != "" { + format!("#{}", fragment) + } else { + str!() + }; + + format!( + "data:{};base64,{}{}", + media_type, + base64::encode(data), + hash + ) } pub fn detect_media_type(data: &[u8], url: &str) -> String { @@ -125,94 +100,11 @@ pub fn resolve_url, U: AsRef>(from: T, to: U) -> Result, - client: &Client, - css_string: &str, - as_data_url: bool, - parent_url: &str, - href: &str, - opt_no_images: bool, - opt_silent: bool, -) -> String { - let mut resolved_css = String::from(css_string); - - for link in REGEX_CSS_URL.captures_iter(&css_string) { - let target_link = link.name("url").unwrap().as_str(); - - // Determine linked asset type - let is_stylesheet = link.name("stylesheet").is_some(); - let is_font = link.name("font").is_some(); - let is_image = !is_stylesheet && !is_font; - - // Generate absolute URL for the content - let embedded_url = match resolve_url(href, target_link) { - Ok(url) => url, - Err(_) => continue, // Malformed URL - }; - - // Download the asset. If it's more CSS, resolve that too - let content = if is_stylesheet { - // The link is an @import link - retrieve_asset( - cache, - client, - &parent_url, - &embedded_url, - false, // Formatting as data URL will be done later - "text/css", // Expect CSS - opt_silent, - ) - .map(|(content, _)| { - resolve_css_imports( - cache, - client, - &content, - true, // Finally, convert to a data URL - &parent_url, - &embedded_url, - opt_no_images, - opt_silent, - ) - }) - } else if (is_image && !opt_no_images) || is_font { - // The link is some other, non-@import link - retrieve_asset( - cache, - client, - &parent_url, - &embedded_url, - true, // Format as data URL - "", // Unknown media type - opt_silent, - ) - .map(|(a, _)| a) - } else { - // If it's a datatype that has been opt_no'd out of, replace with - // absolute URL - - Ok(embedded_url.clone()) - } - .unwrap_or_else(|e| { - eprintln!("Warning: {}", e); - - // If failed to resolve, replace with absolute URL - embedded_url - }); - - let replacement = format!("\"{}\"", &content); - let dest = link.name("to_repl").unwrap(); - if resolved_css.len() > css_string.len() { - let offset = resolved_css.len() - css_string.len(); - let target_range = (dest.start() + offset)..(dest.end() + offset); - resolved_css.replace_range(target_range, &replacement); - } - } - - if as_data_url { - data_to_data_url("text/css", resolved_css.as_bytes(), "") +pub fn get_url_fragment>(url: T) -> String { + if Url::parse(url.as_ref()).unwrap().fragment() == None { + str!() } else { - resolved_css + str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap()) } } @@ -291,6 +183,26 @@ pub fn decode_url(input: String) -> String { .collect() } +pub fn file_url_to_fs_path(url: &str) -> String { + if !is_file_url(url) { + return str!(); + } + + let cutoff_l = if cfg!(windows) { 8 } else { 7 }; + let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string()); + let url_fragment = get_url_fragment(url); + if url_fragment != "" { + let max_len = fs_file_path.len() - 1 - url_fragment.len(); + fs_file_path = fs_file_path[0..max_len].to_string(); + } + + if cfg!(windows) { + fs_file_path = fs_file_path.replace("/", "\\"); + } + + fs_file_path +} + pub fn retrieve_asset( cache: &mut HashMap, client: &Client, @@ -310,14 +222,14 @@ pub fn retrieve_asset( Ok((url.to_string(), url.to_string())) } else if is_file_url(&url) { // Check if parent_url is also file:/// - // (if not then we don't download/embed the asset) + // (if not, then we don't embed the asset) if !is_file_url(&parent_url) { return Ok((str!(), str!())); } - let cutoff = if cfg!(windows) { 8 } else { 7 }; - let fs_file_path: String = decode_url(url.to_string()[cutoff..].to_string()); + let fs_file_path: String = file_url_to_fs_path(url); let path = Path::new(&fs_file_path); + let url_fragment = get_url_fragment(url); if path.exists() { if !opt_silent { eprintln!("{}", &url); @@ -328,6 +240,7 @@ pub fn retrieve_asset( &media_type, &fs::read(&fs_file_path).unwrap(), &fs_file_path, + &url_fragment, ); Ok((data_url, url.to_string())) } else { @@ -375,7 +288,8 @@ pub fn retrieve_asset( } else { media_type }; - let data_url = data_to_data_url(&media_type, &data, url); + let url_fragment = get_url_fragment(url); + let data_url = data_to_data_url(&media_type, &data, url, &url_fragment); // Add to cache cache.insert(new_cache_key, data_url.clone()); Ok((data_url, res_url))