diff --git a/src/css.rs b/src/css.rs index 560c163..1abe8cd 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,9 +2,8 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token}; use reqwest::blocking::Client; use std::collections::HashMap; -use crate::utils::{ - data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_with_fragment, -}; +use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment}; +use crate::utils::retrieve_asset; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ // Universal diff --git a/src/html.rs b/src/html.rs index 13a5f01..e2dead3 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,9 +1,3 @@ -use crate::css::embed_css; -use crate::js::attr_is_event_handler; -use crate::utils::{ - data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_has_protocol, - url_with_fragment, -}; use base64; use html5ever::interface::QualName; use html5ever::parse_document; @@ -17,6 +11,14 @@ use sha2::{Digest, Sha256, Sha384, Sha512}; use std::collections::HashMap; use std::default::Default; +use crate::css::embed_css; +use crate::js::attr_is_event_handler; +use crate::url::{ + data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_has_protocol, + url_with_fragment, +}; +use crate::utils::retrieve_asset; + struct SrcSetItem<'a> { path: &'a str, descriptor: &'a str, diff --git a/src/lib.rs b/src/lib.rs index a9034d5..024a8c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ mod macros; pub mod css; pub mod html; pub mod js; +pub mod url; pub mod utils; #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index 330f1cd..0a72c14 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,4 @@ use chrono::prelude::*; -use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; -use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset}; use reqwest::blocking::Client; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::Url; @@ -12,6 +10,10 @@ use std::path::Path; use std::process; use std::time::Duration; +use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; +use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url}; +use monolith::utils::retrieve_asset; + mod args; mod macros; diff --git a/src/tests/utils/clean_url.rs b/src/tests/url/clean_url.rs similarity index 81% rename from src/tests/utils/clean_url.rs rename to src/tests/url/clean_url.rs index 9e3b9f7..4c1de79 100644 --- a/src/tests/utils/clean_url.rs +++ b/src/tests/url/clean_url.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn removes_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot#iefix"), + url::clean_url("https://somewhere.com/font.eot#iefix"), "https://somewhere.com/font.eot" ); } @@ -20,7 +20,7 @@ mod passing { #[test] fn removes_empty_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot#"), + url::clean_url("https://somewhere.com/font.eot#"), "https://somewhere.com/font.eot" ); } @@ -28,7 +28,7 @@ mod passing { #[test] fn removes_empty_query_and_empty_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot?#"), + url::clean_url("https://somewhere.com/font.eot?#"), "https://somewhere.com/font.eot" ); } @@ -36,7 +36,7 @@ mod passing { #[test] fn removes_empty_query_amp_and_empty_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot?a=b&#"), + url::clean_url("https://somewhere.com/font.eot?a=b&#"), "https://somewhere.com/font.eot?a=b" ); } @@ -44,7 +44,7 @@ mod passing { #[test] fn keeps_credentials() { assert_eq!( - utils::clean_url("https://cookie:monster@gibson.internet/"), + url::clean_url("https://cookie:monster@gibson.internet/"), "https://cookie:monster@gibson.internet/" ); } diff --git a/src/tests/utils/data_to_data_url.rs b/src/tests/url/data_to_data_url.rs similarity index 88% rename from src/tests/utils/data_to_data_url.rs rename to src/tests/url/data_to_data_url.rs index 2bd63c1..f10e4a8 100644 --- a/src/tests/utils/data_to_data_url.rs +++ b/src/tests/url/data_to_data_url.rs @@ -7,13 +7,13 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn encode_string_with_specific_media_type() { let mime = "application/javascript"; let data = "var word = 'hello';\nalert(word);\n"; - let data_url = utils::data_to_data_url(mime, data.as_bytes(), ""); + let data_url = url::data_to_data_url(mime, data.as_bytes(), ""); assert_eq!( &data_url, @@ -24,7 +24,7 @@ mod passing { #[test] fn encode_append_fragment() { let data = "\n"; - let data_url = utils::data_to_data_url("image/svg+xml", data.as_bytes(), ""); + let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), ""); assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"); } diff --git a/src/tests/utils/data_url_to_data.rs b/src/tests/url/data_url_to_data.rs similarity index 82% rename from src/tests/utils/data_url_to_data.rs rename to src/tests/url/data_url_to_data.rs index 7d8b766..2ad5437 100644 --- a/src/tests/utils/data_url_to_data.rs +++ b/src/tests/url/data_url_to_data.rs @@ -7,11 +7,11 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn parse_text_html_base64() { - let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); + let (media_type, data) = url::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); assert_eq!(media_type, "text/html"); assert_eq!( @@ -22,7 +22,7 @@ mod passing { #[test] fn parse_text_html_utf8() { - let (media_type, data) = utils::data_url_to_data( + let (media_type, data) = url::data_url_to_data( "data:text/html;utf8,Work expands so as to fill the time available for its completion", ); @@ -35,7 +35,7 @@ mod passing { #[test] fn parse_text_html_plaintext() { - let (media_type, data) = utils::data_url_to_data( + let (media_type, data) = url::data_url_to_data( "data:text/html,Work expands so as to fill the time available for its completion", ); @@ -48,7 +48,7 @@ mod passing { #[test] fn parse_text_html_charset_utf_8_between_two_whitespaces() { - let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "); + let (media_type, data) = url::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "); assert_eq!(media_type, "text/html"); assert_eq!( @@ -60,7 +60,7 @@ mod passing { #[test] fn parse_text_css_url_encoded() { let (media_type, data) = - utils::data_url_to_data("data:text/css,div{background-color:%23000}"); + url::data_url_to_data("data:text/css,div{background-color:%23000}"); assert_eq!(media_type, "text/css"); assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}"); @@ -68,7 +68,7 @@ mod passing { #[test] fn parse_no_media_type_base64() { - let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA=="); + let (media_type, data) = url::data_url_to_data("data:;base64,dGVzdA=="); assert_eq!(media_type, ""); assert_eq!(String::from_utf8_lossy(&data), "test"); @@ -76,7 +76,7 @@ mod passing { #[test] fn parse_no_media_type_no_encoding() { - let (media_type, data) = utils::data_url_to_data("data:;,test%20test"); + let (media_type, data) = url::data_url_to_data("data:;,test%20test"); assert_eq!(media_type, ""); assert_eq!(String::from_utf8_lossy(&data), "test test"); @@ -92,11 +92,11 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn just_word_data() { - let (media_type, data) = utils::data_url_to_data("data"); + let (media_type, data) = url::data_url_to_data("data"); assert_eq!(media_type, ""); assert_eq!(String::from_utf8_lossy(&data), ""); diff --git a/src/tests/utils/decode_url.rs b/src/tests/url/decode_url.rs similarity index 90% rename from src/tests/utils/decode_url.rs rename to src/tests/url/decode_url.rs index f436605..5cec664 100644 --- a/src/tests/utils/decode_url.rs +++ b/src/tests/url/decode_url.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn decode_unicode_characters() { assert_eq!( - utils::decode_url(str!( + url::decode_url(str!( "%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5" )), "検ヒム解塗ゃッ = サ" @@ -22,7 +22,7 @@ mod passing { #[test] fn decode_file_url() { assert_eq!( - utils::decode_url(str!("file:///tmp/space%20here/test%231.html")), + url::decode_url(str!("file:///tmp/space%20here/test%231.html")), "file:///tmp/space here/test#1.html" ); } @@ -30,7 +30,7 @@ mod passing { #[test] fn plus_sign() { assert_eq!( - utils::decode_url(str!( + url::decode_url(str!( "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" )), "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" diff --git a/src/tests/utils/file_url_to_fs_path.rs b/src/tests/url/file_url_to_fs_path.rs similarity index 79% rename from src/tests/utils/file_url_to_fs_path.rs rename to src/tests/url/file_url_to_fs_path.rs index 437f6bd..6194e3f 100644 --- a/src/tests/utils/file_url_to_fs_path.rs +++ b/src/tests/url/file_url_to_fs_path.rs @@ -7,18 +7,18 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn remove_protocl_and_fragment() { if cfg!(windows) { assert_eq!( - utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"), + url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"), "C:\\documents\\some-path\\some-file.svg" ); } else { assert_eq!( - utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"), + url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"), "/tmp/some-path/some-file.svg" ); } @@ -28,12 +28,12 @@ mod passing { fn decodes_urls() { if cfg!(windows) { assert_eq!( - utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"), + url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"), "C:\\Documents and Settings\\some-file.html" ); } else { assert_eq!( - utils::file_url_to_fs_path("file:///home/user/My%20Documents"), + url::file_url_to_fs_path("file:///home/user/My%20Documents"), "/home/user/My Documents" ); } diff --git a/src/tests/utils/get_url_fragment.rs b/src/tests/url/get_url_fragment.rs similarity index 90% rename from src/tests/utils/get_url_fragment.rs rename to src/tests/url/get_url_fragment.rs index 94cb8a6..9d65a21 100644 --- a/src/tests/utils/get_url_fragment.rs +++ b/src/tests/url/get_url_fragment.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn data_url() { assert_eq!( - utils::get_url_fragment( + url::get_url_fragment( "data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test" ), "test" @@ -21,6 +21,6 @@ mod passing { #[test] fn https_empty() { - assert_eq!(utils::get_url_fragment("https://kernel.org#"), ""); + assert_eq!(url::get_url_fragment("https://kernel.org#"), ""); } } diff --git a/src/tests/utils/is_data_url.rs b/src/tests/url/is_data_url.rs similarity index 88% rename from src/tests/utils/is_data_url.rs rename to src/tests/url/is_data_url.rs index 92c896d..efd059c 100644 --- a/src/tests/utils/is_data_url.rs +++ b/src/tests/url/is_data_url.rs @@ -7,18 +7,18 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn data_url_text_html() { - assert!(utils::is_data_url( + assert!(url::is_data_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } #[test] fn data_url_no_media_type() { - assert!(utils::is_data_url( + assert!(url::is_data_url( "data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } @@ -33,20 +33,20 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn https_url() { - assert!(!utils::is_data_url("https://kernel.org")); + assert!(!url::is_data_url("https://kernel.org")); } #[test] fn no_protocol_url() { - assert!(!utils::is_data_url("//kernel.org")); + assert!(!url::is_data_url("//kernel.org")); } #[test] fn empty_string() { - assert!(!utils::is_data_url("")); + assert!(!url::is_data_url("")); } } diff --git a/src/tests/utils/is_file_url.rs b/src/tests/url/is_file_url.rs similarity index 84% rename from src/tests/utils/is_file_url.rs rename to src/tests/url/is_file_url.rs index 5e6e360..927b793 100644 --- a/src/tests/utils/is_file_url.rs +++ b/src/tests/url/is_file_url.rs @@ -7,32 +7,32 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn unix_file_url() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:///home/user/Websites/my-website/index.html" )); } #[test] fn windows_file_url() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png" )); } #[test] fn unix_url_with_backslashes() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:\\\\\\home\\user\\Websites\\my-website\\index.html" )); } #[test] fn windows_file_url_with_backslashes() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png" )); } @@ -47,37 +47,37 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn url_with_no_protocl() { - assert!(!utils::is_file_url("//kernel.org")); + assert!(!url::is_file_url("//kernel.org")); } #[test] fn dot_slash_filename() { - assert!(!utils::is_file_url("./index.html")); + assert!(!url::is_file_url("./index.html")); } #[test] fn just_filename() { - assert!(!utils::is_file_url("some-local-page.htm")); + assert!(!url::is_file_url("some-local-page.htm")); } #[test] fn https_ip_port_url() { - assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html")); + assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html")); } #[test] fn data_url() { - assert!(!utils::is_file_url( + assert!(!url::is_file_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } #[test] fn just_word_file() { - assert!(!utils::is_file_url("file")); + assert!(!url::is_file_url("file")); } } diff --git a/src/tests/utils/is_http_url.rs b/src/tests/url/is_http_url.rs similarity index 81% rename from src/tests/utils/is_http_url.rs rename to src/tests/url/is_http_url.rs index 981accc..622d340 100644 --- a/src/tests/utils/is_http_url.rs +++ b/src/tests/url/is_http_url.rs @@ -7,21 +7,21 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn http_url() { - assert!(utils::is_http_url("http://kernel.org")); + assert!(url::is_http_url("http://kernel.org")); } #[test] fn https_url() { - assert!(utils::is_http_url("https://www.rust-lang.org/")); + assert!(url::is_http_url("https://www.rust-lang.org/")); } #[test] fn http_url_with_backslashes() { - assert!(utils::is_http_url("http:\\\\freebsd.org\\")); + assert!(url::is_http_url("http:\\\\freebsd.org\\")); } } @@ -34,31 +34,31 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn url_with_no_protocol() { - assert!(!utils::is_http_url("//kernel.org")); + assert!(!url::is_http_url("//kernel.org")); } #[test] fn dot_slash_filename() { - assert!(!utils::is_http_url("./index.html")); + assert!(!url::is_http_url("./index.html")); } #[test] fn just_filename() { - assert!(!utils::is_http_url("some-local-page.htm")); + assert!(!url::is_http_url("some-local-page.htm")); } #[test] fn https_ip_port_url() { - assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html")); + assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html")); } #[test] fn data_url() { - assert!(!utils::is_http_url( + assert!(!url::is_http_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } diff --git a/src/tests/url/mod.rs b/src/tests/url/mod.rs new file mode 100644 index 0000000..226c388 --- /dev/null +++ b/src/tests/url/mod.rs @@ -0,0 +1,12 @@ +mod clean_url; +mod data_to_data_url; +mod data_url_to_data; +mod decode_url; +mod file_url_to_fs_path; +mod get_url_fragment; +mod is_data_url; +mod is_file_url; +mod is_http_url; +mod resolve_url; +mod url_has_protocol; +mod url_with_fragment; diff --git a/src/tests/utils/resolve_url.rs b/src/tests/url/resolve_url.rs similarity index 88% rename from src/tests/utils/resolve_url.rs rename to src/tests/url/resolve_url.rs index c3b1797..d584a32 100644 --- a/src/tests/utils/resolve_url.rs +++ b/src/tests/url/resolve_url.rs @@ -7,13 +7,13 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; use url::ParseError; #[test] fn from_https_to_level_up_relative() -> Result<(), ParseError> { let resolved_url = - utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?; + url::resolve_url("https://www.kernel.org", "../category/signatures.html")?; assert_eq!( resolved_url.as_str(), @@ -25,7 +25,7 @@ mod passing { #[test] fn from_just_filename_to_full_https_url() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "saved_page.htm", "https://www.kernel.org/category/signatures.html", )?; @@ -40,7 +40,7 @@ mod passing { #[test] fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.kernel.org", "//www.kernel.org/theme/images/logos/tux.png", )?; @@ -56,7 +56,7 @@ mod passing { #[test] fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.kernel.org", "//another-host.org/theme/images/logos/tux.png", )?; @@ -71,7 +71,7 @@ mod passing { #[test] fn from_https_url_to_relative_root_path() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.kernel.org/category/signatures.html", "/theme/images/logos/tux.png", )?; @@ -86,7 +86,7 @@ mod passing { #[test] fn from_https_to_just_filename() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.w3schools.com/html/html_iframe.asp", "default.asp", )?; @@ -101,7 +101,7 @@ mod passing { #[test] fn from_data_url_to_https() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "https://www.kernel.org/category/signatures.html", )?; @@ -116,7 +116,7 @@ mod passing { #[test] fn from_data_url_to_data_url() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K", )?; @@ -131,7 +131,7 @@ mod passing { #[test] fn from_file_url_to_relative_path() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "file:///home/user/Websites/my-website/index.html", "assets/images/logo.png", ) @@ -147,7 +147,7 @@ mod passing { #[test] fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "file:\\\\\\home\\user\\Websites\\my-website\\index.html", "assets\\images\\logo.png", ) @@ -163,7 +163,7 @@ mod passing { #[test] fn from_data_url_to_file_url() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "file:///etc/passwd", ) @@ -176,7 +176,7 @@ mod passing { #[test] fn preserve_fragment() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "http://doesnt-matter.local/", "css/fonts/fontmarvelous.svg#fontmarvelous", ) @@ -193,9 +193,9 @@ mod passing { #[test] fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> { let resolved_url = if cfg!(windows) { - utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!()) + url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!()) } else { - utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!()) + url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!()) }; assert_eq!( @@ -220,12 +220,12 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; use url::ParseError; #[test] fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "//www.w3schools.com/html/html_iframe.asp", ) diff --git a/src/tests/utils/url_has_protocol.rs b/src/tests/url/url_has_protocol.rs similarity index 78% rename from src/tests/utils/url_has_protocol.rs rename to src/tests/url/url_has_protocol.rs index 3c03619..1111b4a 100644 --- a/src/tests/utils/url_has_protocol.rs +++ b/src/tests/url/url_has_protocol.rs @@ -7,50 +7,50 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn mailto() { - assert!(utils::url_has_protocol( + assert!(url::url_has_protocol( "mailto:somebody@somewhere.com?subject=hello" )); } #[test] fn tel() { - assert!(utils::url_has_protocol("tel:5551234567")); + assert!(url::url_has_protocol("tel:5551234567")); } #[test] fn ftp_no_slashes() { - assert!(utils::url_has_protocol("ftp:some-ftp-server.com")); + assert!(url::url_has_protocol("ftp:some-ftp-server.com")); } #[test] fn ftp_with_credentials() { - assert!(utils::url_has_protocol( + assert!(url::url_has_protocol( "ftp://user:password@some-ftp-server.com" )); } #[test] fn javascript() { - assert!(utils::url_has_protocol("javascript:void(0)")); + assert!(url::url_has_protocol("javascript:void(0)")); } #[test] fn http() { - assert!(utils::url_has_protocol("http://news.ycombinator.com")); + assert!(url::url_has_protocol("http://news.ycombinator.com")); } #[test] fn https() { - assert!(utils::url_has_protocol("https://github.com")); + assert!(url::url_has_protocol("https://github.com")); } #[test] fn mailto_uppercase() { - assert!(utils::url_has_protocol( + assert!(url::url_has_protocol( "MAILTO:somebody@somewhere.com?subject=hello" )); } @@ -69,23 +69,23 @@ mod failing { #[test] fn url_with_no_protocol() { - assert!(!utils::url_has_protocol( + assert!(!url::url_has_protocol( "//some-hostname.com/some-file.html" )); } #[test] fn relative_path() { - assert!(!utils::url_has_protocol("some-hostname.com/some-file.html")); + assert!(!url::url_has_protocol("some-hostname.com/some-file.html")); } #[test] fn relative_to_root_path() { - assert!(!utils::url_has_protocol("/some-file.html")); + assert!(!url::url_has_protocol("/some-file.html")); } #[test] fn empty_string() { - assert!(!utils::url_has_protocol("")); + assert!(!url::url_has_protocol("")); } } diff --git a/src/tests/utils/url_with_fragment.rs b/src/tests/url/url_with_fragment.rs similarity index 87% rename from src/tests/utils/url_with_fragment.rs rename to src/tests/url/url_with_fragment.rs index 50a51f9..955acf3 100644 --- a/src/tests/utils/url_with_fragment.rs +++ b/src/tests/url/url_with_fragment.rs @@ -7,13 +7,13 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn url_with_fragment_url() { let url = "https://localhost.localdomain/path/"; let fragment = "test"; - let assembled_url = utils::url_with_fragment(url, fragment); + let assembled_url = url::url_with_fragment(url, fragment); assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test"); } @@ -21,7 +21,7 @@ mod passing { fn url_with_fragment_empty_url() { let url = "https://localhost.localdomain/path/"; let fragment = ""; - let assembled_url = utils::url_with_fragment(url, fragment); + let assembled_url = url::url_with_fragment(url, fragment); assert_eq!(&assembled_url, "https://localhost.localdomain/path/"); } @@ -30,7 +30,7 @@ mod passing { fn url_with_fragment_data_url() { let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"; let fragment = "fragment"; - let assembled_url = utils::url_with_fragment(url, fragment); + let assembled_url = url::url_with_fragment(url, fragment); assert_eq!( &assembled_url, diff --git a/src/tests/utils/mod.rs b/src/tests/utils/mod.rs index 90378a7..8d13447 100644 --- a/src/tests/utils/mod.rs +++ b/src/tests/utils/mod.rs @@ -1,14 +1,2 @@ -mod clean_url; -mod data_to_data_url; -mod data_url_to_data; -mod decode_url; mod detect_media_type; -mod file_url_to_fs_path; -mod get_url_fragment; -mod is_data_url; -mod is_file_url; -mod is_http_url; -mod resolve_url; mod retrieve_asset; -mod url_has_protocol; -mod url_with_fragment; diff --git a/src/tests/utils/retrieve_asset.rs b/src/tests/utils/retrieve_asset.rs index b04c63e..3c3d1e0 100644 --- a/src/tests/utils/retrieve_asset.rs +++ b/src/tests/utils/retrieve_asset.rs @@ -7,11 +7,13 @@ #[cfg(test)] mod passing { - use crate::utils; use reqwest::blocking::Client; use std::collections::HashMap; use std::env; + use crate::url; + use crate::utils; + #[test] fn read_data_url() { let cache = &mut HashMap::new(); @@ -28,12 +30,12 @@ mod passing { ) .unwrap(); assert_eq!( - utils::data_to_data_url(&media_type, &data, &final_url), - utils::data_to_data_url("text/html", "target".as_bytes(), "") + url::data_to_data_url(&media_type, &data, &final_url), + url::data_to_data_url("text/html", "target".as_bytes(), "") ); assert_eq!( final_url, - utils::data_to_data_url("text/html", "target".as_bytes(), "") + url::data_to_data_url("text/html", "target".as_bytes(), "") ); assert_eq!(&media_type, "text/html"); } @@ -63,7 +65,7 @@ mod passing { false, ) .unwrap(); - assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); + assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); assert_eq!( &final_url, &format!( diff --git a/src/url.rs b/src/url.rs new file mode 100644 index 0000000..e493ce1 --- /dev/null +++ b/src/url.rs @@ -0,0 +1,168 @@ +use base64; +use url::{form_urlencoded, ParseError, Url}; + +use crate::utils::detect_media_type; + +pub fn clean_url>(input: T) -> String { + let mut url = Url::parse(input.as_ref()).unwrap(); + + // Clear fragment + url.set_fragment(None); + + // Get rid of stray question mark + if url.query() == Some("") { + url.set_query(None); + } + + // Remove empty trailing ampersand(s) + let mut result: String = url.to_string(); + while result.ends_with("&") { + result.pop(); + } + + result +} + +pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String { + let media_type: String = if media_type.is_empty() { + detect_media_type(data, &url) + } else { + media_type.to_string() + }; + + format!("data:{};base64,{}", media_type, base64::encode(data)) +} + +pub fn data_url_to_data>(url: T) -> (String, Vec) { + let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap()); + let path: String = parsed_url.path().to_string(); + let comma_loc: usize = path.find(',').unwrap_or(path.len()); + + let meta_data: String = path.chars().take(comma_loc).collect(); + let raw_data: String = path.chars().skip(comma_loc + 1).collect(); + + let text: String = decode_url(raw_data); + + let meta_data_items: Vec<&str> = meta_data.split(';').collect(); + let mut media_type: String = str!(); + let mut encoding: &str = ""; + + let mut i: i8 = 0; + for item in &meta_data_items { + if i == 0 { + media_type = str!(item); + } else { + if item.eq_ignore_ascii_case("base64") + || item.eq_ignore_ascii_case("utf8") + || item.eq_ignore_ascii_case("charset=UTF-8") + { + encoding = item; + } + } + + i = i + 1; + } + + let data: Vec = if encoding.eq_ignore_ascii_case("base64") { + base64::decode(&text).unwrap_or(vec![]) + } else { + text.as_bytes().to_vec() + }; + + (media_type, data) +} + +pub fn decode_url(input: String) -> String { + let input: String = input.replace("+", "%2B"); + + form_urlencoded::parse(input.as_bytes()) + .map(|(key, val)| { + [ + key.to_string(), + if val.to_string().len() == 0 { + str!() + } else { + str!('=') + }, + val.to_string(), + ] + .concat() + }) + .collect() +} + +pub fn file_url_to_fs_path(url: &str) -> String { + if !is_file_url(url) { + return str!(); + } + + let cutoff_l = if cfg!(windows) { 8 } else { 7 }; + let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string()); + let url_fragment = get_url_fragment(url); + if url_fragment != "" { + let max_len = fs_file_path.len() - 1 - url_fragment.len(); + fs_file_path = fs_file_path[0..max_len].to_string(); + } + + if cfg!(windows) { + fs_file_path = fs_file_path.replace("/", "\\"); + } + + // File paths should not be %-encoded + decode_url(fs_file_path) +} + +pub fn get_url_fragment>(url: T) -> String { + if Url::parse(url.as_ref()).unwrap().fragment() == None { + str!() + } else { + str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap()) + } +} + +pub fn is_data_url>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme() == "data")) + .unwrap_or(false) +} + +pub fn is_file_url>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme() == "file")) + .unwrap_or(false) +} + +pub fn is_http_url>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https")) + .unwrap_or(false) +} + +pub fn resolve_url, U: AsRef>(from: T, to: U) -> Result { + let result = if is_http_url(to.as_ref()) { + to.as_ref().to_string() + } else { + Url::parse(from.as_ref())? + .join(to.as_ref())? + .as_ref() + .to_string() + }; + Ok(result) +} + +pub fn url_has_protocol>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme().len() > 0)) + .unwrap_or(false) +} + +pub fn url_with_fragment(url: &str, fragment: &str) -> String { + let mut result = str!(&url); + + if !fragment.is_empty() { + result += "#"; + result += fragment; + } + + result +} diff --git a/src/utils.rs b/src/utils.rs index a628c04..de0ca45 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,10 +1,10 @@ -use base64; use reqwest::blocking::Client; use reqwest::header::CONTENT_TYPE; use std::collections::HashMap; use std::fs; use std::path::Path; -use url::{form_urlencoded, ParseError, Url}; + +use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url}; const MAGIC: [[&[u8]; 2]; 18] = [ // Image @@ -38,16 +38,6 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ "text/plain", ]; -pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String { - let media_type: String = if media_type.is_empty() { - detect_media_type(data, &url) - } else { - media_type.to_string() - }; - - format!("data:{};base64,{}", media_type, base64::encode(data)) -} - pub fn detect_media_type(data: &[u8], url: &str) -> String { for item in MAGIC.iter() { if data.starts_with(item[0]) { @@ -62,153 +52,10 @@ pub fn detect_media_type(data: &[u8], url: &str) -> String { str!() } -pub fn url_has_protocol>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme().len() > 0)) - .unwrap_or(false) -} - -pub fn is_data_url>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme() == "data")) - .unwrap_or(false) -} - -pub fn is_file_url>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme() == "file")) - .unwrap_or(false) -} - -pub fn is_http_url>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https")) - .unwrap_or(false) -} - pub fn is_plaintext_media_type(media_type: &str) -> bool { PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str()) } -pub fn resolve_url, U: AsRef>(from: T, to: U) -> Result { - let result = if is_http_url(to.as_ref()) { - to.as_ref().to_string() - } else { - Url::parse(from.as_ref())? - .join(to.as_ref())? - .as_ref() - .to_string() - }; - Ok(result) -} - -pub fn get_url_fragment>(url: T) -> String { - if Url::parse(url.as_ref()).unwrap().fragment() == None { - str!() - } else { - str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap()) - } -} - -pub fn clean_url>(input: T) -> String { - let mut url = Url::parse(input.as_ref()).unwrap(); - - // Clear fragment - url.set_fragment(None); - - // Get rid of stray question mark - if url.query() == Some("") { - url.set_query(None); - } - - // Remove empty trailing ampersand(s) - let mut result: String = url.to_string(); - while result.ends_with("&") { - result.pop(); - } - - result -} - -pub fn data_url_to_data>(url: T) -> (String, Vec) { - let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap()); - let path: String = parsed_url.path().to_string(); - let comma_loc: usize = path.find(',').unwrap_or(path.len()); - - let meta_data: String = path.chars().take(comma_loc).collect(); - let raw_data: String = path.chars().skip(comma_loc + 1).collect(); - - let text: String = decode_url(raw_data); - - let meta_data_items: Vec<&str> = meta_data.split(';').collect(); - let mut media_type: String = str!(); - let mut encoding: &str = ""; - - let mut i: i8 = 0; - for item in &meta_data_items { - if i == 0 { - media_type = str!(item); - } else { - if item.eq_ignore_ascii_case("base64") - || item.eq_ignore_ascii_case("utf8") - || item.eq_ignore_ascii_case("charset=UTF-8") - { - encoding = item; - } - } - - i = i + 1; - } - - let data: Vec = if encoding.eq_ignore_ascii_case("base64") { - base64::decode(&text).unwrap_or(vec![]) - } else { - text.as_bytes().to_vec() - }; - - (media_type, data) -} - -pub fn decode_url(input: String) -> String { - let input: String = input.replace("+", "%2B"); - - form_urlencoded::parse(input.as_bytes()) - .map(|(key, val)| { - [ - key.to_string(), - if val.to_string().len() == 0 { - str!() - } else { - str!('=') - }, - val.to_string(), - ] - .concat() - }) - .collect() -} - -pub fn file_url_to_fs_path(url: &str) -> String { - if !is_file_url(url) { - return str!(); - } - - let cutoff_l = if cfg!(windows) { 8 } else { 7 }; - let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string()); - let url_fragment = get_url_fragment(url); - if url_fragment != "" { - let max_len = fs_file_path.len() - 1 - url_fragment.len(); - fs_file_path = fs_file_path[0..max_len].to_string(); - } - - if cfg!(windows) { - fs_file_path = fs_file_path.replace("/", "\\"); - } - - // File paths should not be %-encoded - decode_url(fs_file_path) -} - pub fn retrieve_asset( cache: &mut HashMap>, client: &Client, @@ -291,14 +138,3 @@ pub fn retrieve_asset( } } } - -pub fn url_with_fragment(url: &str, fragment: &str) -> String { - let mut result = str!(&url); - - if !fragment.is_empty() { - result += "#"; - result += fragment; - } - - result -}