diff --git a/src/css.rs b/src/css.rs index 3357a56..1abe8cd 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,7 +2,8 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token}; use reqwest::blocking::Client; use std::collections::HashMap; -use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset}; +use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment}; +use crate::utils::retrieve_asset; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ // Universal @@ -173,32 +174,34 @@ pub fn process_css<'a>( let import_url_fragment = get_url_fragment(import_full_url.clone()); match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) { Ok((import_contents, import_final_url, _import_media_type)) => { - result.push_str( - enquote( - data_to_data_url( - "text/css", - embed_css( - cache, - client, - &import_final_url, - &String::from_utf8_lossy(&import_contents), - opt_no_fonts, - opt_no_images, - opt_silent, - ) - .as_bytes(), - &import_final_url, - &import_url_fragment, - ), - false, + let import_data_url = data_to_data_url( + "text/css", + embed_css( + cache, + client, + &import_final_url, + &String::from_utf8_lossy(&import_contents), + opt_no_fonts, + opt_no_images, + opt_silent, ) - .as_str(), + .as_bytes(), + &import_final_url, + ); + let assembled_url: String = url_with_fragment( + import_data_url.as_str(), + import_url_fragment.as_str(), ); + result.push_str(enquote(assembled_url, false).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(import_full_url.clone()) { - result.push_str(enquote(import_full_url, false).as_str()); + let assembled_url: String = url_with_fragment( + import_full_url.as_str(), + import_url_fragment.as_str(), + ); + result.push_str(enquote(assembled_url, false).as_str()); } } } @@ -222,18 +225,19 @@ pub fn process_css<'a>( opt_silent, ) { Ok((data, final_url, media_type)) => { - let data_url = data_to_data_url( - &media_type, - &data, - &final_url, - &url_fragment, - ); - result.push_str(enquote(data_url, false).as_str()); + let data_url = data_to_data_url(&media_type, &data, &final_url); + let assembled_url: String = + url_with_fragment(data_url.as_str(), url_fragment.as_str()); + result.push_str(enquote(assembled_url, false).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(resolved_url.clone()) { - result.push_str(enquote(resolved_url, false).as_str()); + let assembled_url: String = url_with_fragment( + resolved_url.as_str(), + url_fragment.as_str(), + ); + result.push_str(enquote(assembled_url, false).as_str()); } } } @@ -320,14 +324,17 @@ pub fn process_css<'a>( ) .as_bytes(), &final_url, - &url_fragment, ); - result.push_str(enquote(data_url, false).as_str()); + let assembled_url: String = + url_with_fragment(data_url.as_str(), url_fragment.as_str()); + result.push_str(enquote(assembled_url, false).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(full_url.clone()) { - result.push_str(enquote(full_url, false).as_str()); + let assembled_url: String = + url_with_fragment(full_url.as_str(), url_fragment.as_str()); + result.push_str(enquote(assembled_url, false).as_str()); } } } @@ -339,14 +346,17 @@ pub fn process_css<'a>( let url_fragment = get_url_fragment(full_url.clone()); match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) { Ok((data, final_url, media_type)) => { - let data_url = - data_to_data_url(&media_type, &data, &final_url, &url_fragment); - result.push_str(enquote(data_url, false).as_str()); + let data_url = data_to_data_url(&media_type, &data, &final_url); + let assembled_url: String = + url_with_fragment(data_url.as_str(), url_fragment.as_str()); + result.push_str(enquote(assembled_url, false).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(full_url.clone()) { - result.push_str(enquote(full_url, false).as_str()); + let assembled_url: String = + url_with_fragment(full_url.as_str(), url_fragment.as_str()); + result.push_str(enquote(assembled_url, false).as_str()); } } } diff --git a/src/html.rs b/src/html.rs index 8590655..e2dead3 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,8 +1,3 @@ -use crate::css::embed_css; -use crate::js::attr_is_event_handler; -use crate::utils::{ - data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset, url_has_protocol, -}; use base64; use html5ever::interface::QualName; use html5ever::parse_document; @@ -16,6 +11,14 @@ use sha2::{Digest, Sha256, Sha384, Sha512}; use std::collections::HashMap; use std::default::Default; +use crate::css::embed_css; +use crate::js::attr_is_event_handler; +use crate::url::{ + data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_has_protocol, + url_with_fragment, +}; +use crate::utils::retrieve_asset; + struct SrcSetItem<'a> { path: &'a str, descriptor: &'a str, @@ -91,19 +94,19 @@ pub fn embed_srcset( let image_url_fragment = get_url_fragment(image_full_url.clone()); match retrieve_asset(cache, client, &parent_url, &image_full_url, opt_silent) { Ok((image_data, image_final_url, image_media_type)) => { - let image_data_url = data_to_data_url( - &image_media_type, - &image_data, - &image_final_url, - &image_url_fragment, - ); + let image_data_url = + data_to_data_url(&image_media_type, &image_data, &image_final_url); // Append retreved asset as a data URL - result.push_str(image_data_url.as_ref()); + let assembled_url: String = + url_with_fragment(image_data_url.as_str(), image_url_fragment.as_str()); + result.push_str(assembled_url.as_ref()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(image_full_url.clone()) { - result.push_str(image_full_url.as_ref()); + let assembled_url: String = + url_with_fragment(image_full_url.as_str(), image_url_fragment.as_str()); + result.push_str(assembled_url.as_ref()); } else { // Avoid breaking the structure in case if not an HTTP(S) URL result.push_str(empty_image!()); @@ -246,33 +249,36 @@ pub fn walk_and_embed_assets( &link_href_media_type, &link_href_data, &link_href_final_url, - &link_href_url_fragment, ); // Add new data URL href attribute + let assembled_url: String = url_with_fragment( + link_href_data_url.as_str(), + link_href_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new( None, ns!(), local_name!("href"), ), - value: Tendril::from_slice( - link_href_data_url.as_ref(), - ), + value: Tendril::from_slice(assembled_url.as_ref()), }); } } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(link_href_full_url.clone()) { + let assembled_url: String = url_with_fragment( + link_href_full_url.as_str(), + link_href_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new( None, ns!(), local_name!("href"), ), - value: Tendril::from_slice( - link_href_full_url.as_ref(), - ), + value: Tendril::from_slice(assembled_url.as_ref()), }); } } @@ -324,7 +330,6 @@ pub fn walk_and_embed_assets( "text/css", css.as_bytes(), &link_href_final_url, - "", ); // Add new data URL href attribute attrs_mut.push(Attribute { @@ -399,20 +404,27 @@ pub fn walk_and_embed_assets( &background_media_type, &background_data, &background_final_url, - &background_url_fragment, ); // Add new data URL background attribute + let assembled_url: String = url_with_fragment( + background_data_url.as_str(), + background_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("background")), - value: Tendril::from_slice(background_data_url.as_ref()), + value: Tendril::from_slice(assembled_url.as_ref()), }); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(background_full_url.clone()) { + let assembled_url: String = url_with_fragment( + background_full_url.as_str(), + background_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("background")), - value: Tendril::from_slice(background_full_url.as_ref()), + value: Tendril::from_slice(assembled_url.as_ref()), }); } } @@ -469,19 +481,26 @@ pub fn walk_and_embed_assets( &img_media_type, &img_data, &img_final_url, - &img_url_fragment, + ); + let assembled_url: String = url_with_fragment( + img_data_url.as_str(), + img_url_fragment.as_str(), ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("src")), - value: Tendril::from_slice(img_data_url.as_ref()), + value: Tendril::from_slice(assembled_url.as_ref()), }); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(img_full_url.clone()) { + let assembled_url: String = url_with_fragment( + img_full_url.as_str(), + img_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("src")), - value: Tendril::from_slice(img_full_url.as_ref()), + value: Tendril::from_slice(assembled_url.as_ref()), }); } } @@ -563,22 +582,27 @@ pub fn walk_and_embed_assets( &input_image_media_type, &input_image_data, &input_image_final_url, - &input_image_url_fragment, ); // Add data URL src attribute + let assembled_url: String = url_with_fragment( + input_image_data_url.as_str(), + input_image_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("src")), - value: Tendril::from_slice(input_image_data_url.as_ref()), + value: Tendril::from_slice(assembled_url.as_ref()), }); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(input_image_full_url.clone()) { + let assembled_url: String = url_with_fragment( + input_image_full_url.as_str(), + input_image_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("src")), - value: Tendril::from_slice( - input_image_full_url.as_ref(), - ), + value: Tendril::from_slice(assembled_url.as_ref()), }); } } @@ -610,20 +634,27 @@ pub fn walk_and_embed_assets( &image_media_type, &image_data, &image_final_url, - &image_url_fragment, ); // Add new data URL href attribute + let assembled_url: String = url_with_fragment( + image_data_url.as_str(), + image_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("href")), - value: Tendril::from_slice(image_data_url.as_ref()), + value: Tendril::from_slice(assembled_url.as_ref()), }); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(image_full_url.clone()) { + let assembled_url: String = url_with_fragment( + image_full_url.as_str(), + image_url_fragment.as_str(), + ); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), local_name!("href")), - value: Tendril::from_slice(image_full_url.as_ref()), + value: Tendril::from_slice(assembled_url.as_ref()), }); } } @@ -661,21 +692,23 @@ pub fn walk_and_embed_assets( &srcset_media_type, &srcset_data, &srcset_final_url, - &srcset_url_fragment, ); attr.value.clear(); - attr.value.push_slice(srcset_data_url.as_str()); + let assembled_url: String = url_with_fragment( + srcset_data_url.as_str(), + srcset_url_fragment.as_str(), + ); + attr.value.push_slice(assembled_url.as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(srcset_full_url.clone()) { attr.value.clear(); - attr.value.push_slice(srcset_full_url.as_str()); - if !srcset_url_fragment.is_empty() { - attr.value.push_slice("#"); - attr.value - .push_slice(srcset_url_fragment.as_str()); - } + let assembled_url: String = url_with_fragment( + srcset_full_url.as_str(), + srcset_url_fragment.as_str(), + ); + attr.value.push_slice(assembled_url.as_str()); } } } @@ -739,7 +772,6 @@ pub fn walk_and_embed_assets( "application/javascript", &script_data, &script_final_url, - "", ); // Add new data URL src attribute attrs_mut.push(Attribute { @@ -844,16 +876,23 @@ pub fn walk_and_embed_assets( &frame_media_type, &frame_data, &frame_final_url, - &frame_url_fragment, ); attr.value.clear(); - attr.value.push_slice(frame_data_url.as_str()); + let assembled_url: String = url_with_fragment( + frame_data_url.as_str(), + frame_url_fragment.as_str(), + ); + attr.value.push_slice(assembled_url.as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(frame_full_url.clone()) { attr.value.clear(); - attr.value.push_slice(frame_full_url.as_str()); + let assembled_url: String = url_with_fragment( + frame_full_url.as_str(), + frame_url_fragment.as_str(), + ); + attr.value.push_slice(assembled_url.as_str()); } } } @@ -896,16 +935,23 @@ pub fn walk_and_embed_assets( &video_poster_media_type, &video_poster_data, &video_poster_final_url, - &video_poster_url_fragment, ); attr.value.clear(); - attr.value.push_slice(video_poster_data_url.as_str()); + let assembled_url: String = url_with_fragment( + video_poster_data_url.as_str(), + video_poster_url_fragment.as_str(), + ); + attr.value.push_slice(assembled_url.as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if is_http_url(video_poster_full_url.clone()) { attr.value.clear(); - attr.value.push_slice(video_poster_full_url.as_str()); + let assembled_url: String = url_with_fragment( + video_poster_full_url.as_str(), + video_poster_url_fragment.as_str(), + ); + attr.value.push_slice(assembled_url.as_str()); } } } diff --git a/src/lib.rs b/src/lib.rs index a9034d5..024a8c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ mod macros; pub mod css; pub mod html; pub mod js; +pub mod url; pub mod utils; #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index 330f1cd..0a72c14 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,4 @@ use chrono::prelude::*; -use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; -use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset}; use reqwest::blocking::Client; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use reqwest::Url; @@ -12,6 +10,10 @@ use std::path::Path; use std::process; use std::time::Duration; +use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; +use monolith::url::{data_url_to_data, is_data_url, is_file_url, is_http_url}; +use monolith::utils::retrieve_asset; + mod args; mod macros; diff --git a/src/tests/utils/clean_url.rs b/src/tests/url/clean_url.rs similarity index 81% rename from src/tests/utils/clean_url.rs rename to src/tests/url/clean_url.rs index 9e3b9f7..4c1de79 100644 --- a/src/tests/utils/clean_url.rs +++ b/src/tests/url/clean_url.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn removes_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot#iefix"), + url::clean_url("https://somewhere.com/font.eot#iefix"), "https://somewhere.com/font.eot" ); } @@ -20,7 +20,7 @@ mod passing { #[test] fn removes_empty_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot#"), + url::clean_url("https://somewhere.com/font.eot#"), "https://somewhere.com/font.eot" ); } @@ -28,7 +28,7 @@ mod passing { #[test] fn removes_empty_query_and_empty_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot?#"), + url::clean_url("https://somewhere.com/font.eot?#"), "https://somewhere.com/font.eot" ); } @@ -36,7 +36,7 @@ mod passing { #[test] fn removes_empty_query_amp_and_empty_fragment() { assert_eq!( - utils::clean_url("https://somewhere.com/font.eot?a=b&#"), + url::clean_url("https://somewhere.com/font.eot?a=b&#"), "https://somewhere.com/font.eot?a=b" ); } @@ -44,7 +44,7 @@ mod passing { #[test] fn keeps_credentials() { assert_eq!( - utils::clean_url("https://cookie:monster@gibson.internet/"), + url::clean_url("https://cookie:monster@gibson.internet/"), "https://cookie:monster@gibson.internet/" ); } diff --git a/src/tests/utils/data_to_data_url.rs b/src/tests/url/data_to_data_url.rs similarity index 83% rename from src/tests/utils/data_to_data_url.rs rename to src/tests/url/data_to_data_url.rs index 9b4b4f6..f10e4a8 100644 --- a/src/tests/utils/data_to_data_url.rs +++ b/src/tests/url/data_to_data_url.rs @@ -7,13 +7,13 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn encode_string_with_specific_media_type() { let mime = "application/javascript"; let data = "var word = 'hello';\nalert(word);\n"; - let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", ""); + let data_url = url::data_to_data_url(mime, data.as_bytes(), ""); assert_eq!( &data_url, @@ -24,8 +24,8 @@ mod passing { #[test] fn encode_append_fragment() { let data = "\n"; - let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment"); + let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), ""); - assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment"); + assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"); } } diff --git a/src/tests/utils/data_url_to_data.rs b/src/tests/url/data_url_to_data.rs similarity index 82% rename from src/tests/utils/data_url_to_data.rs rename to src/tests/url/data_url_to_data.rs index 7d8b766..2ad5437 100644 --- a/src/tests/utils/data_url_to_data.rs +++ b/src/tests/url/data_url_to_data.rs @@ -7,11 +7,11 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn parse_text_html_base64() { - let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); + let (media_type, data) = url::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="); assert_eq!(media_type, "text/html"); assert_eq!( @@ -22,7 +22,7 @@ mod passing { #[test] fn parse_text_html_utf8() { - let (media_type, data) = utils::data_url_to_data( + let (media_type, data) = url::data_url_to_data( "data:text/html;utf8,Work expands so as to fill the time available for its completion", ); @@ -35,7 +35,7 @@ mod passing { #[test] fn parse_text_html_plaintext() { - let (media_type, data) = utils::data_url_to_data( + let (media_type, data) = url::data_url_to_data( "data:text/html,Work expands so as to fill the time available for its completion", ); @@ -48,7 +48,7 @@ mod passing { #[test] fn parse_text_html_charset_utf_8_between_two_whitespaces() { - let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "); + let (media_type, data) = url::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "); assert_eq!(media_type, "text/html"); assert_eq!( @@ -60,7 +60,7 @@ mod passing { #[test] fn parse_text_css_url_encoded() { let (media_type, data) = - utils::data_url_to_data("data:text/css,div{background-color:%23000}"); + url::data_url_to_data("data:text/css,div{background-color:%23000}"); assert_eq!(media_type, "text/css"); assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}"); @@ -68,7 +68,7 @@ mod passing { #[test] fn parse_no_media_type_base64() { - let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA=="); + let (media_type, data) = url::data_url_to_data("data:;base64,dGVzdA=="); assert_eq!(media_type, ""); assert_eq!(String::from_utf8_lossy(&data), "test"); @@ -76,7 +76,7 @@ mod passing { #[test] fn parse_no_media_type_no_encoding() { - let (media_type, data) = utils::data_url_to_data("data:;,test%20test"); + let (media_type, data) = url::data_url_to_data("data:;,test%20test"); assert_eq!(media_type, ""); assert_eq!(String::from_utf8_lossy(&data), "test test"); @@ -92,11 +92,11 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn just_word_data() { - let (media_type, data) = utils::data_url_to_data("data"); + let (media_type, data) = url::data_url_to_data("data"); assert_eq!(media_type, ""); assert_eq!(String::from_utf8_lossy(&data), ""); diff --git a/src/tests/utils/decode_url.rs b/src/tests/url/decode_url.rs similarity index 90% rename from src/tests/utils/decode_url.rs rename to src/tests/url/decode_url.rs index f436605..5cec664 100644 --- a/src/tests/utils/decode_url.rs +++ b/src/tests/url/decode_url.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn decode_unicode_characters() { assert_eq!( - utils::decode_url(str!( + url::decode_url(str!( "%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5" )), "検ヒム解塗ゃッ = サ" @@ -22,7 +22,7 @@ mod passing { #[test] fn decode_file_url() { assert_eq!( - utils::decode_url(str!("file:///tmp/space%20here/test%231.html")), + url::decode_url(str!("file:///tmp/space%20here/test%231.html")), "file:///tmp/space here/test#1.html" ); } @@ -30,7 +30,7 @@ mod passing { #[test] fn plus_sign() { assert_eq!( - utils::decode_url(str!( + url::decode_url(str!( "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" )), "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" diff --git a/src/tests/utils/file_url_to_fs_path.rs b/src/tests/url/file_url_to_fs_path.rs similarity index 79% rename from src/tests/utils/file_url_to_fs_path.rs rename to src/tests/url/file_url_to_fs_path.rs index 437f6bd..6194e3f 100644 --- a/src/tests/utils/file_url_to_fs_path.rs +++ b/src/tests/url/file_url_to_fs_path.rs @@ -7,18 +7,18 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn remove_protocl_and_fragment() { if cfg!(windows) { assert_eq!( - utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"), + url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"), "C:\\documents\\some-path\\some-file.svg" ); } else { assert_eq!( - utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"), + url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"), "/tmp/some-path/some-file.svg" ); } @@ -28,12 +28,12 @@ mod passing { fn decodes_urls() { if cfg!(windows) { assert_eq!( - utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"), + url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"), "C:\\Documents and Settings\\some-file.html" ); } else { assert_eq!( - utils::file_url_to_fs_path("file:///home/user/My%20Documents"), + url::file_url_to_fs_path("file:///home/user/My%20Documents"), "/home/user/My Documents" ); } diff --git a/src/tests/utils/get_url_fragment.rs b/src/tests/url/get_url_fragment.rs similarity index 90% rename from src/tests/utils/get_url_fragment.rs rename to src/tests/url/get_url_fragment.rs index 94cb8a6..9d65a21 100644 --- a/src/tests/utils/get_url_fragment.rs +++ b/src/tests/url/get_url_fragment.rs @@ -7,12 +7,12 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn data_url() { assert_eq!( - utils::get_url_fragment( + url::get_url_fragment( "data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test" ), "test" @@ -21,6 +21,6 @@ mod passing { #[test] fn https_empty() { - assert_eq!(utils::get_url_fragment("https://kernel.org#"), ""); + assert_eq!(url::get_url_fragment("https://kernel.org#"), ""); } } diff --git a/src/tests/utils/is_data_url.rs b/src/tests/url/is_data_url.rs similarity index 88% rename from src/tests/utils/is_data_url.rs rename to src/tests/url/is_data_url.rs index 92c896d..efd059c 100644 --- a/src/tests/utils/is_data_url.rs +++ b/src/tests/url/is_data_url.rs @@ -7,18 +7,18 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn data_url_text_html() { - assert!(utils::is_data_url( + assert!(url::is_data_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } #[test] fn data_url_no_media_type() { - assert!(utils::is_data_url( + assert!(url::is_data_url( "data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } @@ -33,20 +33,20 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn https_url() { - assert!(!utils::is_data_url("https://kernel.org")); + assert!(!url::is_data_url("https://kernel.org")); } #[test] fn no_protocol_url() { - assert!(!utils::is_data_url("//kernel.org")); + assert!(!url::is_data_url("//kernel.org")); } #[test] fn empty_string() { - assert!(!utils::is_data_url("")); + assert!(!url::is_data_url("")); } } diff --git a/src/tests/utils/is_file_url.rs b/src/tests/url/is_file_url.rs similarity index 84% rename from src/tests/utils/is_file_url.rs rename to src/tests/url/is_file_url.rs index 5e6e360..927b793 100644 --- a/src/tests/utils/is_file_url.rs +++ b/src/tests/url/is_file_url.rs @@ -7,32 +7,32 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn unix_file_url() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:///home/user/Websites/my-website/index.html" )); } #[test] fn windows_file_url() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png" )); } #[test] fn unix_url_with_backslashes() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:\\\\\\home\\user\\Websites\\my-website\\index.html" )); } #[test] fn windows_file_url_with_backslashes() { - assert!(utils::is_file_url( + assert!(url::is_file_url( "file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png" )); } @@ -47,37 +47,37 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn url_with_no_protocl() { - assert!(!utils::is_file_url("//kernel.org")); + assert!(!url::is_file_url("//kernel.org")); } #[test] fn dot_slash_filename() { - assert!(!utils::is_file_url("./index.html")); + assert!(!url::is_file_url("./index.html")); } #[test] fn just_filename() { - assert!(!utils::is_file_url("some-local-page.htm")); + assert!(!url::is_file_url("some-local-page.htm")); } #[test] fn https_ip_port_url() { - assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html")); + assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html")); } #[test] fn data_url() { - assert!(!utils::is_file_url( + assert!(!url::is_file_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } #[test] fn just_word_file() { - assert!(!utils::is_file_url("file")); + assert!(!url::is_file_url("file")); } } diff --git a/src/tests/utils/is_http_url.rs b/src/tests/url/is_http_url.rs similarity index 81% rename from src/tests/utils/is_http_url.rs rename to src/tests/url/is_http_url.rs index 981accc..622d340 100644 --- a/src/tests/utils/is_http_url.rs +++ b/src/tests/url/is_http_url.rs @@ -7,21 +7,21 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn http_url() { - assert!(utils::is_http_url("http://kernel.org")); + assert!(url::is_http_url("http://kernel.org")); } #[test] fn https_url() { - assert!(utils::is_http_url("https://www.rust-lang.org/")); + assert!(url::is_http_url("https://www.rust-lang.org/")); } #[test] fn http_url_with_backslashes() { - assert!(utils::is_http_url("http:\\\\freebsd.org\\")); + assert!(url::is_http_url("http:\\\\freebsd.org\\")); } } @@ -34,31 +34,31 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; #[test] fn url_with_no_protocol() { - assert!(!utils::is_http_url("//kernel.org")); + assert!(!url::is_http_url("//kernel.org")); } #[test] fn dot_slash_filename() { - assert!(!utils::is_http_url("./index.html")); + assert!(!url::is_http_url("./index.html")); } #[test] fn just_filename() { - assert!(!utils::is_http_url("some-local-page.htm")); + assert!(!url::is_http_url("some-local-page.htm")); } #[test] fn https_ip_port_url() { - assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html")); + assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html")); } #[test] fn data_url() { - assert!(!utils::is_http_url( + assert!(!url::is_http_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h" )); } diff --git a/src/tests/url/mod.rs b/src/tests/url/mod.rs new file mode 100644 index 0000000..226c388 --- /dev/null +++ b/src/tests/url/mod.rs @@ -0,0 +1,12 @@ +mod clean_url; +mod data_to_data_url; +mod data_url_to_data; +mod decode_url; +mod file_url_to_fs_path; +mod get_url_fragment; +mod is_data_url; +mod is_file_url; +mod is_http_url; +mod resolve_url; +mod url_has_protocol; +mod url_with_fragment; diff --git a/src/tests/utils/resolve_url.rs b/src/tests/url/resolve_url.rs similarity index 88% rename from src/tests/utils/resolve_url.rs rename to src/tests/url/resolve_url.rs index c3b1797..d584a32 100644 --- a/src/tests/utils/resolve_url.rs +++ b/src/tests/url/resolve_url.rs @@ -7,13 +7,13 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; use url::ParseError; #[test] fn from_https_to_level_up_relative() -> Result<(), ParseError> { let resolved_url = - utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?; + url::resolve_url("https://www.kernel.org", "../category/signatures.html")?; assert_eq!( resolved_url.as_str(), @@ -25,7 +25,7 @@ mod passing { #[test] fn from_just_filename_to_full_https_url() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "saved_page.htm", "https://www.kernel.org/category/signatures.html", )?; @@ -40,7 +40,7 @@ mod passing { #[test] fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.kernel.org", "//www.kernel.org/theme/images/logos/tux.png", )?; @@ -56,7 +56,7 @@ mod passing { #[test] fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.kernel.org", "//another-host.org/theme/images/logos/tux.png", )?; @@ -71,7 +71,7 @@ mod passing { #[test] fn from_https_url_to_relative_root_path() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.kernel.org/category/signatures.html", "/theme/images/logos/tux.png", )?; @@ -86,7 +86,7 @@ mod passing { #[test] fn from_https_to_just_filename() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "https://www.w3schools.com/html/html_iframe.asp", "default.asp", )?; @@ -101,7 +101,7 @@ mod passing { #[test] fn from_data_url_to_https() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "https://www.kernel.org/category/signatures.html", )?; @@ -116,7 +116,7 @@ mod passing { #[test] fn from_data_url_to_data_url() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K", )?; @@ -131,7 +131,7 @@ mod passing { #[test] fn from_file_url_to_relative_path() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "file:///home/user/Websites/my-website/index.html", "assets/images/logo.png", ) @@ -147,7 +147,7 @@ mod passing { #[test] fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "file:\\\\\\home\\user\\Websites\\my-website\\index.html", "assets\\images\\logo.png", ) @@ -163,7 +163,7 @@ mod passing { #[test] fn from_data_url_to_file_url() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "file:///etc/passwd", ) @@ -176,7 +176,7 @@ mod passing { #[test] fn preserve_fragment() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "http://doesnt-matter.local/", "css/fonts/fontmarvelous.svg#fontmarvelous", ) @@ -193,9 +193,9 @@ mod passing { #[test] fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> { let resolved_url = if cfg!(windows) { - utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!()) + url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!()) } else { - utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!()) + url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!()) }; assert_eq!( @@ -220,12 +220,12 @@ mod passing { #[cfg(test)] mod failing { - use crate::utils; + use crate::url; use url::ParseError; #[test] fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> { - let resolved_url = utils::resolve_url( + let resolved_url = url::resolve_url( "data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h", "//www.w3schools.com/html/html_iframe.asp", ) diff --git a/src/tests/utils/url_has_protocol.rs b/src/tests/url/url_has_protocol.rs similarity index 78% rename from src/tests/utils/url_has_protocol.rs rename to src/tests/url/url_has_protocol.rs index 3c03619..1111b4a 100644 --- a/src/tests/utils/url_has_protocol.rs +++ b/src/tests/url/url_has_protocol.rs @@ -7,50 +7,50 @@ #[cfg(test)] mod passing { - use crate::utils; + use crate::url; #[test] fn mailto() { - assert!(utils::url_has_protocol( + assert!(url::url_has_protocol( "mailto:somebody@somewhere.com?subject=hello" )); } #[test] fn tel() { - assert!(utils::url_has_protocol("tel:5551234567")); + assert!(url::url_has_protocol("tel:5551234567")); } #[test] fn ftp_no_slashes() { - assert!(utils::url_has_protocol("ftp:some-ftp-server.com")); + assert!(url::url_has_protocol("ftp:some-ftp-server.com")); } #[test] fn ftp_with_credentials() { - assert!(utils::url_has_protocol( + assert!(url::url_has_protocol( "ftp://user:password@some-ftp-server.com" )); } #[test] fn javascript() { - assert!(utils::url_has_protocol("javascript:void(0)")); + assert!(url::url_has_protocol("javascript:void(0)")); } #[test] fn http() { - assert!(utils::url_has_protocol("http://news.ycombinator.com")); + assert!(url::url_has_protocol("http://news.ycombinator.com")); } #[test] fn https() { - assert!(utils::url_has_protocol("https://github.com")); + assert!(url::url_has_protocol("https://github.com")); } #[test] fn mailto_uppercase() { - assert!(utils::url_has_protocol( + assert!(url::url_has_protocol( "MAILTO:somebody@somewhere.com?subject=hello" )); } @@ -69,23 +69,23 @@ mod failing { #[test] fn url_with_no_protocol() { - assert!(!utils::url_has_protocol( + assert!(!url::url_has_protocol( "//some-hostname.com/some-file.html" )); } #[test] fn relative_path() { - assert!(!utils::url_has_protocol("some-hostname.com/some-file.html")); + assert!(!url::url_has_protocol("some-hostname.com/some-file.html")); } #[test] fn relative_to_root_path() { - assert!(!utils::url_has_protocol("/some-file.html")); + assert!(!url::url_has_protocol("/some-file.html")); } #[test] fn empty_string() { - assert!(!utils::url_has_protocol("")); + assert!(!url::url_has_protocol("")); } } diff --git a/src/tests/url/url_with_fragment.rs b/src/tests/url/url_with_fragment.rs new file mode 100644 index 0000000..955acf3 --- /dev/null +++ b/src/tests/url/url_with_fragment.rs @@ -0,0 +1,40 @@ +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod passing { + use crate::url; + + #[test] + fn url_with_fragment_url() { + let url = "https://localhost.localdomain/path/"; + let fragment = "test"; + let assembled_url = url::url_with_fragment(url, fragment); + + assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test"); + } + #[test] + fn url_with_fragment_empty_url() { + let url = "https://localhost.localdomain/path/"; + let fragment = ""; + let assembled_url = url::url_with_fragment(url, fragment); + + assert_eq!(&assembled_url, "https://localhost.localdomain/path/"); + } + + #[test] + fn url_with_fragment_data_url() { + let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"; + let fragment = "fragment"; + let assembled_url = url::url_with_fragment(url, fragment); + + assert_eq!( + &assembled_url, + "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment" + ); + } +} diff --git a/src/tests/utils/mod.rs b/src/tests/utils/mod.rs index 8fc7e47..8d13447 100644 --- a/src/tests/utils/mod.rs +++ b/src/tests/utils/mod.rs @@ -1,13 +1,2 @@ -mod clean_url; -mod data_to_data_url; -mod data_url_to_data; -mod decode_url; mod detect_media_type; -mod file_url_to_fs_path; -mod get_url_fragment; -mod is_data_url; -mod is_file_url; -mod is_http_url; -mod resolve_url; mod retrieve_asset; -mod url_has_protocol; diff --git a/src/tests/utils/retrieve_asset.rs b/src/tests/utils/retrieve_asset.rs index 4aeeb5b..3c3d1e0 100644 --- a/src/tests/utils/retrieve_asset.rs +++ b/src/tests/utils/retrieve_asset.rs @@ -7,11 +7,13 @@ #[cfg(test)] mod passing { - use crate::utils; use reqwest::blocking::Client; use std::collections::HashMap; use std::env; + use crate::url; + use crate::utils; + #[test] fn read_data_url() { let cache = &mut HashMap::new(); @@ -28,12 +30,12 @@ mod passing { ) .unwrap(); assert_eq!( - utils::data_to_data_url(&media_type, &data, &final_url, ""), - utils::data_to_data_url("text/html", "target".as_bytes(), "", "") + url::data_to_data_url(&media_type, &data, &final_url), + url::data_to_data_url("text/html", "target".as_bytes(), "") ); assert_eq!( final_url, - utils::data_to_data_url("text/html", "target".as_bytes(), "", "") + url::data_to_data_url("text/html", "target".as_bytes(), "") ); assert_eq!(&media_type, "text/html"); } @@ -63,7 +65,7 @@ mod passing { false, ) .unwrap(); - assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); + assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="); assert_eq!( &final_url, &format!( diff --git a/src/url.rs b/src/url.rs new file mode 100644 index 0000000..e493ce1 --- /dev/null +++ b/src/url.rs @@ -0,0 +1,168 @@ +use base64; +use url::{form_urlencoded, ParseError, Url}; + +use crate::utils::detect_media_type; + +pub fn clean_url>(input: T) -> String { + let mut url = Url::parse(input.as_ref()).unwrap(); + + // Clear fragment + url.set_fragment(None); + + // Get rid of stray question mark + if url.query() == Some("") { + url.set_query(None); + } + + // Remove empty trailing ampersand(s) + let mut result: String = url.to_string(); + while result.ends_with("&") { + result.pop(); + } + + result +} + +pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String { + let media_type: String = if media_type.is_empty() { + detect_media_type(data, &url) + } else { + media_type.to_string() + }; + + format!("data:{};base64,{}", media_type, base64::encode(data)) +} + +pub fn data_url_to_data>(url: T) -> (String, Vec) { + let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap()); + let path: String = parsed_url.path().to_string(); + let comma_loc: usize = path.find(',').unwrap_or(path.len()); + + let meta_data: String = path.chars().take(comma_loc).collect(); + let raw_data: String = path.chars().skip(comma_loc + 1).collect(); + + let text: String = decode_url(raw_data); + + let meta_data_items: Vec<&str> = meta_data.split(';').collect(); + let mut media_type: String = str!(); + let mut encoding: &str = ""; + + let mut i: i8 = 0; + for item in &meta_data_items { + if i == 0 { + media_type = str!(item); + } else { + if item.eq_ignore_ascii_case("base64") + || item.eq_ignore_ascii_case("utf8") + || item.eq_ignore_ascii_case("charset=UTF-8") + { + encoding = item; + } + } + + i = i + 1; + } + + let data: Vec = if encoding.eq_ignore_ascii_case("base64") { + base64::decode(&text).unwrap_or(vec![]) + } else { + text.as_bytes().to_vec() + }; + + (media_type, data) +} + +pub fn decode_url(input: String) -> String { + let input: String = input.replace("+", "%2B"); + + form_urlencoded::parse(input.as_bytes()) + .map(|(key, val)| { + [ + key.to_string(), + if val.to_string().len() == 0 { + str!() + } else { + str!('=') + }, + val.to_string(), + ] + .concat() + }) + .collect() +} + +pub fn file_url_to_fs_path(url: &str) -> String { + if !is_file_url(url) { + return str!(); + } + + let cutoff_l = if cfg!(windows) { 8 } else { 7 }; + let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string()); + let url_fragment = get_url_fragment(url); + if url_fragment != "" { + let max_len = fs_file_path.len() - 1 - url_fragment.len(); + fs_file_path = fs_file_path[0..max_len].to_string(); + } + + if cfg!(windows) { + fs_file_path = fs_file_path.replace("/", "\\"); + } + + // File paths should not be %-encoded + decode_url(fs_file_path) +} + +pub fn get_url_fragment>(url: T) -> String { + if Url::parse(url.as_ref()).unwrap().fragment() == None { + str!() + } else { + str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap()) + } +} + +pub fn is_data_url>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme() == "data")) + .unwrap_or(false) +} + +pub fn is_file_url>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme() == "file")) + .unwrap_or(false) +} + +pub fn is_http_url>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https")) + .unwrap_or(false) +} + +pub fn resolve_url, U: AsRef>(from: T, to: U) -> Result { + let result = if is_http_url(to.as_ref()) { + to.as_ref().to_string() + } else { + Url::parse(from.as_ref())? + .join(to.as_ref())? + .as_ref() + .to_string() + }; + Ok(result) +} + +pub fn url_has_protocol>(url: T) -> bool { + Url::parse(url.as_ref()) + .and_then(|u| Ok(u.scheme().len() > 0)) + .unwrap_or(false) +} + +pub fn url_with_fragment(url: &str, fragment: &str) -> String { + let mut result = str!(&url); + + if !fragment.is_empty() { + result += "#"; + result += fragment; + } + + result +} diff --git a/src/utils.rs b/src/utils.rs index 11aecb2..de0ca45 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,10 +1,10 @@ -use base64; use reqwest::blocking::Client; use reqwest::header::CONTENT_TYPE; use std::collections::HashMap; use std::fs; use std::path::Path; -use url::{form_urlencoded, ParseError, Url}; + +use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url}; const MAGIC: [[&[u8]; 2]; 18] = [ // Image @@ -38,26 +38,6 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ "text/plain", ]; -pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String { - let media_type: String = if media_type.is_empty() { - detect_media_type(data, &url) - } else { - media_type.to_string() - }; - let hash: String = if fragment != "" { - format!("#{}", fragment) - } else { - str!() - }; - - format!( - "data:{};base64,{}{}", - media_type, - base64::encode(data), - hash - ) -} - pub fn detect_media_type(data: &[u8], url: &str) -> String { for item in MAGIC.iter() { if data.starts_with(item[0]) { @@ -72,153 +52,10 @@ pub fn detect_media_type(data: &[u8], url: &str) -> String { str!() } -pub fn url_has_protocol>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme().len() > 0)) - .unwrap_or(false) -} - -pub fn is_data_url>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme() == "data")) - .unwrap_or(false) -} - -pub fn is_file_url>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme() == "file")) - .unwrap_or(false) -} - -pub fn is_http_url>(url: T) -> bool { - Url::parse(url.as_ref()) - .and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https")) - .unwrap_or(false) -} - pub fn is_plaintext_media_type(media_type: &str) -> bool { PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str()) } -pub fn resolve_url, U: AsRef>(from: T, to: U) -> Result { - let result = if is_http_url(to.as_ref()) { - to.as_ref().to_string() - } else { - Url::parse(from.as_ref())? - .join(to.as_ref())? - .as_ref() - .to_string() - }; - Ok(result) -} - -pub fn get_url_fragment>(url: T) -> String { - if Url::parse(url.as_ref()).unwrap().fragment() == None { - str!() - } else { - str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap()) - } -} - -pub fn clean_url>(input: T) -> String { - let mut url = Url::parse(input.as_ref()).unwrap(); - - // Clear fragment - url.set_fragment(None); - - // Get rid of stray question mark - if url.query() == Some("") { - url.set_query(None); - } - - // Remove empty trailing ampersand(s) - let mut result: String = url.to_string(); - while result.ends_with("&") { - result.pop(); - } - - result -} - -pub fn data_url_to_data>(url: T) -> (String, Vec) { - let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap()); - let path: String = parsed_url.path().to_string(); - let comma_loc: usize = path.find(',').unwrap_or(path.len()); - - let meta_data: String = path.chars().take(comma_loc).collect(); - let raw_data: String = path.chars().skip(comma_loc + 1).collect(); - - let text: String = decode_url(raw_data); - - let meta_data_items: Vec<&str> = meta_data.split(';').collect(); - let mut media_type: String = str!(); - let mut encoding: &str = ""; - - let mut i: i8 = 0; - for item in &meta_data_items { - if i == 0 { - media_type = str!(item); - } else { - if item.eq_ignore_ascii_case("base64") - || item.eq_ignore_ascii_case("utf8") - || item.eq_ignore_ascii_case("charset=UTF-8") - { - encoding = item; - } - } - - i = i + 1; - } - - let data: Vec = if encoding.eq_ignore_ascii_case("base64") { - base64::decode(&text).unwrap_or(vec![]) - } else { - text.as_bytes().to_vec() - }; - - (media_type, data) -} - -pub fn decode_url(input: String) -> String { - let input: String = input.replace("+", "%2B"); - - form_urlencoded::parse(input.as_bytes()) - .map(|(key, val)| { - [ - key.to_string(), - if val.to_string().len() == 0 { - str!() - } else { - str!('=') - }, - val.to_string(), - ] - .concat() - }) - .collect() -} - -pub fn file_url_to_fs_path(url: &str) -> String { - if !is_file_url(url) { - return str!(); - } - - let cutoff_l = if cfg!(windows) { 8 } else { 7 }; - let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string()); - let url_fragment = get_url_fragment(url); - if url_fragment != "" { - let max_len = fs_file_path.len() - 1 - url_fragment.len(); - fs_file_path = fs_file_path[0..max_len].to_string(); - } - - if cfg!(windows) { - fs_file_path = fs_file_path.replace("/", "\\"); - } - - // File paths should not be %-encoded - decode_url(fs_file_path) -} - pub fn retrieve_asset( cache: &mut HashMap>, client: &Client,