monolith/src/utils.rs

use base64;
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use url::{form_urlencoded, ParseError, Url};

const MAGIC: [[&[u8]; 2]; 18] = [
    // Image
    [b"GIF87a", b"image/gif"],
    [b"GIF89a", b"image/gif"],
    [b"\xFF\xD8\xFF", b"image/jpeg"],
    [b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
    [b"<svg ", b"image/svg+xml"],
    [b"RIFF....WEBPVP8 ", b"image/webp"],
    [b"\x00\x00\x01\x00", b"image/x-icon"],
    // Audio
    [b"ID3", b"audio/mpeg"],
    [b"\xFF\x0E", b"audio/mpeg"],
    [b"\xFF\x0F", b"audio/mpeg"],
    [b"OggS", b"audio/ogg"],
    [b"RIFF....WAVEfmt ", b"audio/wav"],
    [b"fLaC", b"audio/x-flac"],
    // Video
    [b"RIFF....AVI LIST", b"video/avi"],
    [b"....ftyp", b"video/mp4"],
    [b"\x00\x00\x01\x0B", b"video/mpeg"],
    [b"....moov", b"video/quicktime"],
    [b"\x1A\x45\xDF\xA3", b"video/webm"],
];

pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
    let media_type: String = if media_type.is_empty() {
        detect_media_type(data, &url)
    } else {
        media_type.to_string()
    };
    let hash: String = if fragment != "" {
        format!("#{}", fragment)
    } else {
        str!()
    };

    format!(
        "data:{};base64,{}{}",
        media_type,
        base64::encode(data),
        hash
    )
}

pub fn detect_media_type(data: &[u8], url: &str) -> String {
    for item in MAGIC.iter() {
        if data.starts_with(item[0]) {
            return String::from_utf8(item[1].to_vec()).unwrap();
        }
    }

    if url.to_lowercase().ends_with(".svg") {
        return str!("image/svg+xml");
    }

    str!()
}

pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
    Url::parse(url.as_ref())
        .and_then(|u| Ok(u.scheme().len() > 0))
        .unwrap_or(false)
}

pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
    Url::parse(url.as_ref())
        .and_then(|u| Ok(u.scheme() == "data"))
        .unwrap_or(false)
}

pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
    Url::parse(url.as_ref())
        .and_then(|u| Ok(u.scheme() == "file"))
        .unwrap_or(false)
}

pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
    Url::parse(url.as_ref())
        .and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
        .unwrap_or(false)
}

pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
    let result = if is_http_url(to.as_ref()) {
        to.as_ref().to_string()
    } else {
        Url::parse(from.as_ref())?
            .join(to.as_ref())?
            .as_ref()
            .to_string()
    };
    Ok(result)
}

pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
    if Url::parse(url.as_ref()).unwrap().fragment() == None {
        str!()
    } else {
        str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
    }
}

pub fn clean_url<T: AsRef<str>>(url: T) -> String {
    let mut result = Url::parse(url.as_ref()).unwrap();

    // Clear fragment
    result.set_fragment(None);

    // Get rid of stray question mark
    if result.query() == Some("") {
        result.set_query(None);
    }
    result.to_string()
}

pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
    let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap());
    let path: String = parsed_url.path().to_string();
    let comma_loc: usize = path.find(',').unwrap_or(path.len());

    if comma_loc == path.len() {
        return str!();
    }

    let meta_data: String = path.chars().take(comma_loc).collect();
    let raw_data: String = path.chars().skip(comma_loc + 1).collect();

    let data: String = decode_url(raw_data);

    let meta_data_items: Vec<&str> = meta_data.split(';').collect();
    let mut media_type: &str = "";
    let mut encoding: &str = "";

    let mut i: i8 = 0;
    for item in &meta_data_items {
        if i == 0 {
            if item.eq_ignore_ascii_case("text/html") {
                media_type = item;
                continue;
            }
        }

        if item.eq_ignore_ascii_case("base64") || item.eq_ignore_ascii_case("utf8") {
            encoding = item;
        }

        i = i + 1;
    }

    if media_type.eq_ignore_ascii_case("text/html") {
        if encoding.eq_ignore_ascii_case("base64") {
            String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
        } else {
            data
        }
    } else {
        str!()
    }
}

pub fn decode_url(input: String) -> String {
    form_urlencoded::parse(input.as_bytes())
        .map(|(key, val)| {
            [
                key.to_string(),
                if val.to_string().len() == 0 {
                    str!()
                } else {
                    str!('=')
                },
                val.to_string(),
            ]
            .concat()
        })
        .collect()
}

pub fn file_url_to_fs_path(url: &str) -> String {
    if !is_file_url(url) {
        return str!();
    }

    let cutoff_l = if cfg!(windows) { 8 } else { 7 };
    let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
    let url_fragment = get_url_fragment(url);
    if url_fragment != "" {
        let max_len = fs_file_path.len() - 1 - url_fragment.len();
        fs_file_path = fs_file_path[0..max_len].to_string();
    }

    if cfg!(windows) {
        fs_file_path = fs_file_path.replace("/", "\\");
    }

    fs_file_path
}

pub fn retrieve_asset(
    cache: &mut HashMap<String, String>,
    client: &Client,
    parent_url: &str,
    url: &str,
    as_data_url: bool,
    media_type: &str,
    opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
    if url.len() == 0 {
        return Ok((str!(), str!()));
    }

    let cache_key = clean_url(&url);

    if is_data_url(&url) {
        Ok((url.to_string(), url.to_string()))
    } else if is_file_url(&url) {
        // Check if parent_url is also file:///
        // (if not, then we don't embed the asset)
        if !is_file_url(&parent_url) {
            return Ok((str!(), str!()));
        }

        let fs_file_path: String = file_url_to_fs_path(url);
        let path = Path::new(&fs_file_path);
        let url_fragment = get_url_fragment(url);
        if path.exists() {
            if !opt_silent {
                eprintln!("{}", &url);
            }

            if as_data_url {
                let data_url: String = data_to_data_url(
                    &media_type,
                    &fs::read(&fs_file_path).unwrap(),
                    &fs_file_path,
                    &url_fragment,
                );
                Ok((data_url, url.to_string()))
            } else {
                let data: String = fs::read_to_string(&fs_file_path).expect(url);
                Ok((data, url.to_string()))
            }
        } else {
            Ok((str!(), url.to_string()))
        }
    } else {
        if cache.contains_key(&cache_key) {
            // URL is in cache
            if !opt_silent {
                eprintln!("{} (from cache)", &url);
            }
            let data = cache.get(&cache_key).unwrap();
            Ok((data.to_string(), url.to_string()))
        } else {
            // URL not in cache, we request it
            let mut response = client.get(url).send()?;
            let res_url = response.url().to_string();

            if !opt_silent {
                if url == res_url {
                    eprintln!("{}", &url);
                } else {
                    eprintln!("{} -> {}", &url, &res_url);
                }
            }

            let new_cache_key = clean_url(&res_url);

            if as_data_url {
                // Convert response into a byte array
                let mut data: Vec<u8> = vec![];
                response.copy_to(&mut data)?;

                // Attempt to obtain media type by reading the Content-Type header
                let media_type = if media_type == "" {
                    response
                        .headers()
                        .get(CONTENT_TYPE)
                        .and_then(|header| header.to_str().ok())
                        .unwrap_or(&media_type)
                } else {
                    media_type
                };
                let url_fragment = get_url_fragment(url);
                let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
                // Add to cache
                cache.insert(new_cache_key, data_url.clone());
                Ok((data_url, res_url))
            } else {
                let content = response.text().unwrap();
                // Add to cache
                cache.insert(new_cache_key, content.clone());
                Ok((content, res_url))
            }
        }
    }
}
add support for working with local assets 4 years ago			`use base64;`
upgrade reqwest to v0.10.0 This will improve build time and binary size as follows: * Before - Compile targets: 220 - Build time: `cargo build --release 1264.95s user 39.72s system 335% cpu 6:29.14 total` - Binary size: 6578568 bytes * After - Compile targets: 170 - Build time: `cargo build --release 1130.64s user 32.15s system 359% cpu 5:23.69 total` - Binary size: 6107088 bytes * Differences - Compile targets: 1.29x smaller - Build time: 1.23x faster - Binary size: 1.07x smaller 4 years ago			`use reqwest::blocking::Client;`
add support for working with local assets 4 years ago			`use reqwest::header::CONTENT_TYPE;`
Cleaned up some overcomplicated code 5 years ago			`use std::collections::HashMap;`
add support for working with local assets 4 years ago			`use std::fs;`
			`use std::path::Path;`
add black box tests 4 years ago			`use url::{form_urlencoded, ParseError, Url};`
Improve code structure 5 years ago
improve SVG media type detection 4 years ago			`const MAGIC: [[&[u8]; 2]; 18] = [`
Get rid of mime-sniffer dependency 5 years ago			`// Image`
			`[b"GIF87a", b"image/gif"],`
			`[b"GIF89a", b"image/gif"],`
			`[b"\xFF\xD8\xFF", b"image/jpeg"],`
			`[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],`
			`[b"<svg ", b"image/svg+xml"],`
			`[b"RIFF....WEBPVP8 ", b"image/webp"],`
			`[b"\x00\x00\x01\x00", b"image/x-icon"],`
			`// Audio`
			`[b"ID3", b"audio/mpeg"],`
			`[b"\xFF\x0E", b"audio/mpeg"],`
			`[b"\xFF\x0F", b"audio/mpeg"],`
			`[b"OggS", b"audio/ogg"],`
			`[b"RIFF....WAVEfmt ", b"audio/wav"],`
			`[b"fLaC", b"audio/x-flac"],`
			`// Video`
			`[b"RIFF....AVI LIST", b"video/avi"],`
			`[b"....ftyp", b"video/mp4"],`
			`[b"\x00\x00\x01\x0B", b"video/mpeg"],`
			`[b"....moov", b"video/quicktime"],`
			`[b"\x1A\x45\xDF\xA3", b"video/webm"],`
			`];`
Rewrite program in Rust 5 years ago
implement full CSS parsing 4 years ago			`pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {`
			`let media_type: String = if media_type.is_empty() {`
improve SVG media type detection 4 years ago			`detect_media_type(data, &url)`
Rewrite program in Rust 5 years ago			`} else {`
improve SVG media type detection 4 years ago			`media_type.to_string()`
cargo clippy 5 years ago			`};`
implement full CSS parsing 4 years ago			`let hash: String = if fragment != "" {`
			`format!("#{}", fragment)`
			`} else {`
			`str!()`
			`};`

			`format!(`
			`"data:{};base64,{}{}",`
			`media_type,`
			`base64::encode(data),`
			`hash`
			`)`
Rewrite program in Rust 5 years ago			`}`

improve SVG media type detection 4 years ago			`pub fn detect_media_type(data: &[u8], url: &str) -> String {`
Add CSP isolation, no CSS, and no iframe options 5 years ago			`for item in MAGIC.iter() {`
Get rid of mime-sniffer dependency 5 years ago			`if data.starts_with(item[0]) {`
refactor utils functions 5 years ago			`return String::from_utf8(item[1].to_vec()).unwrap();`
Get rid of mime-sniffer dependency 5 years ago			`}`
			`}`
improve SVG media type detection 4 years ago
			`if url.to_lowercase().ends_with(".svg") {`
			`return str!("image/svg+xml");`
			`}`

add support for data URL targets 4 years ago			`str!()`
Rewrite program in Rust 5 years ago			`}`

refactor utils functions 5 years ago			`pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {`
add support for data URL targets 4 years ago			`Url::parse(url.as_ref())`
			`.and_then(\|u\| Ok(u.scheme().len() > 0))`
			`.unwrap_or(false)`
Improve code structure 5 years ago			`}`
Rewrite program in Rust 5 years ago
add support for data URL targets 4 years ago			`pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {`
			`Url::parse(url.as_ref())`
			`.and_then(\|u\| Ok(u.scheme() == "data"))`
			`.unwrap_or(false)`
Improve code structure 5 years ago			`}`
Get rid of mime-sniffer dependency 5 years ago
add support for working with local assets 4 years ago			`pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {`
			`Url::parse(url.as_ref())`
			`.and_then(\|u\| Ok(u.scheme() == "file"))`
			`.unwrap_or(false)`
			`}`

add support for data URL targets 4 years ago			`pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {`
			`Url::parse(url.as_ref())`
			`.and_then(\|u\| Ok(u.scheme() == "http" \|\| u.scheme() == "https"))`
			`.unwrap_or(false)`
Improve code structure 5 years ago			`}`

refactor utils functions 5 years ago			`pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {`
correct is_valid_url to is_http_url 4 years ago			`let result = if is_http_url(to.as_ref()) {`
refactor utils functions 5 years ago			`to.as_ref().to_string()`
Improve code structure 5 years ago			`} else {`
refactor utils functions 5 years ago			`Url::parse(from.as_ref())?`
			`.join(to.as_ref())?`
			`.as_ref()`
			`.to_string()`
Improve code structure 5 years ago			`};`
			`Ok(result)`
Rewrite program in Rust 5 years ago			`}`
Added loading of the links given as url(...) in css files 5 years ago
implement full CSS parsing 4 years ago			`pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {`
			`if Url::parse(url.as_ref()).unwrap().fragment() == None {`
			`str!()`
Added support for <style> tags 5 years ago			`} else {`
implement full CSS parsing 4 years ago			`str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())`
Added support for <style> tags 5 years ago			`}`
Fixed formatting 5 years ago			`}`
use clean URLs as hashmap keys 5 years ago
			`pub fn clean_url<T: AsRef<str>>(url: T) -> String {`
			`let mut result = Url::parse(url.as_ref()).unwrap();`
improve SVG media type detection 4 years ago
use clean URLs as hashmap keys 5 years ago			`// Clear fragment`
			`result.set_fragment(None);`
improve SVG media type detection 4 years ago
use clean URLs as hashmap keys 5 years ago			`// Get rid of stray question mark`
			`if result.query() == Some("") {`
			`result.set_query(None);`
			`}`
			`result.to_string()`
			`}`
add support for data URL targets 4 years ago
			`pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {`
			`let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap());`
add black box tests 4 years ago			`let path: String = parsed_url.path().to_string();`
			`let comma_loc: usize = path.find(',').unwrap_or(path.len());`

			`if comma_loc == path.len() {`
			`return str!();`
			`}`

			`let meta_data: String = path.chars().take(comma_loc).collect();`
			`let raw_data: String = path.chars().skip(comma_loc + 1).collect();`

add support for working with local assets 4 years ago			`let data: String = decode_url(raw_data);`
add black box tests 4 years ago
			`let meta_data_items: Vec<&str> = meta_data.split(';').collect();`
improve SVG media type detection 4 years ago			`let mut media_type: &str = "";`
add black box tests 4 years ago			`let mut encoding: &str = "";`

			`let mut i: i8 = 0;`
			`for item in &meta_data_items {`
			`if i == 0 {`
			`if item.eq_ignore_ascii_case("text/html") {`
improve SVG media type detection 4 years ago			`media_type = item;`
add black box tests 4 years ago			`continue;`
add support for data URL targets 4 years ago			`}`
add black box tests 4 years ago			`}`

			`if item.eq_ignore_ascii_case("base64") \|\| item.eq_ignore_ascii_case("utf8") {`
			`encoding = item;`
			`}`

			`i = i + 1;`
			`}`

improve SVG media type detection 4 years ago			`if media_type.eq_ignore_ascii_case("text/html") {`
add black box tests 4 years ago			`if encoding.eq_ignore_ascii_case("base64") {`
add support for working with local assets 4 years ago			`String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())`
add support for data URL targets 4 years ago			`} else {`
add black box tests 4 years ago			`data`
add support for data URL targets 4 years ago			`}`
			`} else {`
			`str!()`
			`}`
			`}`
add support for working with local assets 4 years ago
			`pub fn decode_url(input: String) -> String {`
			`form_urlencoded::parse(input.as_bytes())`
			`.map(\|(key, val)\| {`
			`[`
			`key.to_string(),`
			`if val.to_string().len() == 0 {`
			`str!()`
			`} else {`
			`str!('=')`
			`},`
			`val.to_string(),`
			`]`
			`.concat()`
			`})`
			`.collect()`
			`}`

implement full CSS parsing 4 years ago			`pub fn file_url_to_fs_path(url: &str) -> String {`
			`if !is_file_url(url) {`
			`return str!();`
			`}`

			`let cutoff_l = if cfg!(windows) { 8 } else { 7 };`
			`let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());`
			`let url_fragment = get_url_fragment(url);`
			`if url_fragment != "" {`
			`let max_len = fs_file_path.len() - 1 - url_fragment.len();`
			`fs_file_path = fs_file_path[0..max_len].to_string();`
			`}`

			`if cfg!(windows) {`
			`fs_file_path = fs_file_path.replace("/", "\\");`
			`}`

			`fs_file_path`
			`}`

add support for working with local assets 4 years ago			`pub fn retrieve_asset(`
			`cache: &mut HashMap<String, String>,`
			`client: &Client,`
			`parent_url: &str,`
			`url: &str,`
			`as_data_url: bool,`
improve SVG media type detection 4 years ago			`media_type: &str,`
add support for working with local assets 4 years ago			`opt_silent: bool,`
			`) -> Result<(String, String), reqwest::Error> {`
			`if url.len() == 0 {`
			`return Ok((str!(), str!()));`
			`}`

			`let cache_key = clean_url(&url);`

			`if is_data_url(&url) {`
			`Ok((url.to_string(), url.to_string()))`
			`} else if is_file_url(&url) {`
			`// Check if parent_url is also file:///`
implement full CSS parsing 4 years ago			`// (if not, then we don't embed the asset)`
add support for working with local assets 4 years ago			`if !is_file_url(&parent_url) {`
			`return Ok((str!(), str!()));`
			`}`

implement full CSS parsing 4 years ago			`let fs_file_path: String = file_url_to_fs_path(url);`
add support for working with local assets 4 years ago			`let path = Path::new(&fs_file_path);`
implement full CSS parsing 4 years ago			`let url_fragment = get_url_fragment(url);`
add support for working with local assets 4 years ago			`if path.exists() {`
			`if !opt_silent {`
			`eprintln!("{}", &url);`
			`}`

			`if as_data_url {`
improve SVG media type detection 4 years ago			`let data_url: String = data_to_data_url(`
			`&media_type,`
			`&fs::read(&fs_file_path).unwrap(),`
			`&fs_file_path,`
implement full CSS parsing 4 years ago			`&url_fragment,`
improve SVG media type detection 4 years ago			`);`
add support for working with local assets 4 years ago			`Ok((data_url, url.to_string()))`
			`} else {`
			`let data: String = fs::read_to_string(&fs_file_path).expect(url);`
			`Ok((data, url.to_string()))`
			`}`
			`} else {`
			`Ok((str!(), url.to_string()))`
			`}`
			`} else {`
			`if cache.contains_key(&cache_key) {`
			`// URL is in cache`
			`if !opt_silent {`
			`eprintln!("{} (from cache)", &url);`
			`}`
			`let data = cache.get(&cache_key).unwrap();`
			`Ok((data.to_string(), url.to_string()))`
			`} else {`
			`// URL not in cache, we request it`
			`let mut response = client.get(url).send()?;`
			`let res_url = response.url().to_string();`

			`if !opt_silent {`
			`if url == res_url {`
			`eprintln!("{}", &url);`
			`} else {`
			`eprintln!("{} -> {}", &url, &res_url);`
			`}`
			`}`

			`let new_cache_key = clean_url(&res_url);`

			`if as_data_url {`
			`// Convert response into a byte array`
			`let mut data: Vec<u8> = vec![];`
			`response.copy_to(&mut data)?;`

improve SVG media type detection 4 years ago			`// Attempt to obtain media type by reading the Content-Type header`
			`let media_type = if media_type == "" {`
add support for working with local assets 4 years ago			`response`
			`.headers()`
			`.get(CONTENT_TYPE)`
			`.and_then(\|header\| header.to_str().ok())`
improve SVG media type detection 4 years ago			`.unwrap_or(&media_type)`
add support for working with local assets 4 years ago			`} else {`
improve SVG media type detection 4 years ago			`media_type`
add support for working with local assets 4 years ago			`};`
implement full CSS parsing 4 years ago			`let url_fragment = get_url_fragment(url);`
			`let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);`
add support for working with local assets 4 years ago			`// Add to cache`
			`cache.insert(new_cache_key, data_url.clone());`
			`Ok((data_url, res_url))`
			`} else {`
			`let content = response.text().unwrap();`
			`// Add to cache`
			`cache.insert(new_cache_key, content.clone());`
			`Ok((content, res_url))`
			`}`
			`}`
			`}`
			`}`