improve SVG media type detection

pull/138/head
Sunshine 4 years ago
parent f27d5fa23e
commit be25784297
No known key found for this signature in database
GPG Key ID: B80CA68703CD8AB1

@ -473,7 +473,7 @@ pub fn walk_and_embed_assets(
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let iframe_data_url = data_to_data_url("text/html", &buf);
let iframe_data_url = data_to_data_url("text/html", &buf, "");
attr.value.clear();
attr.value.push_slice(iframe_data_url.as_str());
}

@ -11,7 +11,7 @@ use crate::utils;
fn passing_encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = utils::data_to_data_url(mime, data.as_bytes());
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "");
assert_eq!(
&data_url,

@ -8,40 +8,130 @@ use crate::utils;
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_image_media_types() {
assert_eq!(utils::detect_media_type(b"GIF87a"), "image/gif");
assert_eq!(utils::detect_media_type(b"GIF89a"), "image/gif");
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF"), "image/jpeg");
fn passing_image_gif87() {
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
}
#[test]
fn passing_image_gif89() {
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
}
#[test]
fn passing_image_jpeg() {
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
}
#[test]
fn passing_image_png() {
assert_eq!(
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A"),
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
"image/png"
);
assert_eq!(utils::detect_media_type(b"<?xml "), "image/svg+xml");
assert_eq!(utils::detect_media_type(b"<svg "), "image/svg+xml");
assert_eq!(utils::detect_media_type(b"RIFF....WEBPVP8 "), "image/webp");
}
#[test]
fn passing_image_svg() {
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
}
#[test]
fn passing_image_webp() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00"),
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
"image/webp"
);
}
#[test]
fn passing_image_icon() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
"image/x-icon"
);
}
#[test]
fn passing_audio_media_types() {
assert_eq!(utils::detect_media_type(b"ID3"), "audio/mpeg");
assert_eq!(utils::detect_media_type(b"\xFF\x0E"), "audio/mpeg");
assert_eq!(utils::detect_media_type(b"\xFF\x0F"), "audio/mpeg");
assert_eq!(utils::detect_media_type(b"OggS"), "audio/ogg");
assert_eq!(utils::detect_media_type(b"RIFF....WAVEfmt "), "audio/wav");
assert_eq!(utils::detect_media_type(b"fLaC"), "audio/x-flac");
fn passing_image_svg_filename() {
assert_eq!(
utils::detect_media_type(b"<?xml ", "local-file.svg"),
"image/svg+xml"
);
}
#[test]
fn passing_image_svg_url_uppercase() {
assert_eq!(
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
"image/svg+xml"
);
}
#[test]
fn passing_audio_mpeg() {
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
}
#[test]
fn passing_audio_mpeg_2() {
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
}
#[test]
fn passing_video_media_types() {
assert_eq!(utils::detect_media_type(b"RIFF....AVI LIST"), "video/avi");
assert_eq!(utils::detect_media_type(b"....ftyp"), "video/mp4");
assert_eq!(utils::detect_media_type(b"\x00\x00\x01\x0B"), "video/mpeg");
assert_eq!(utils::detect_media_type(b"....moov"), "video/quicktime");
assert_eq!(utils::detect_media_type(b"\x1A\x45\xDF\xA3"), "video/webm");
fn passing_audio_mpeg_3() {
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
}
#[test]
fn passing_audio_ogg() {
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
}
#[test]
fn passing_audio_wav() {
assert_eq!(
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
"audio/wav"
);
}
#[test]
fn passing_audio_flac() {
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
}
#[test]
fn passing_video_avi() {
assert_eq!(
utils::detect_media_type(b"RIFF....AVI LIST", ""),
"video/avi"
);
}
#[test]
fn passing_video_mp4() {
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
}
#[test]
fn passing_video_mpeg() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
"video/mpeg"
);
}
#[test]
fn passing_video_quicktime() {
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
}
#[test]
fn passing_video_webm() {
assert_eq!(
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
"video/webm"
);
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@ -53,5 +143,5 @@ fn passing_video_media_types() {
#[test]
fn failing_unknown_media_type() {
assert_eq!(utils::detect_media_type(b"abcdef0123456789"), "");
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
}

@ -111,6 +111,21 @@ fn passing_from_data_url_to_https() -> Result<(), ParseError> {
Ok(())
}
#[test]
fn passing_from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
assert_eq!(
resolved_url.as_str(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
Ok(())
}
#[test]
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(

@ -42,13 +42,12 @@ lazy_static! {
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
}
const MAGIC: [[&[u8]; 2]; 19] = [
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
[b"GIF89a", b"image/gif"],
[b"\xFF\xD8\xFF", b"image/jpeg"],
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
[b"<?xml ", b"image/svg+xml"],
[b"<svg ", b"image/svg+xml"],
[b"RIFF....WEBPVP8 ", b"image/webp"],
[b"\x00\x00\x01\x00", b"image/x-icon"],
@ -67,21 +66,26 @@ const MAGIC: [[&[u8]; 2]; 19] = [
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
pub fn data_to_data_url(mime: &str, data: &[u8]) -> String {
let media_type = if mime.is_empty() {
detect_media_type(data)
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
let media_type = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
mime.to_string()
media_type.to_string()
};
format!("data:{};base64,{}", media_type, base64::encode(data))
}
pub fn detect_media_type(data: &[u8]) -> String {
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
return String::from_utf8(item[1].to_vec()).unwrap();
}
}
if url.to_lowercase().ends_with(".svg") {
return str!("image/svg+xml");
}
str!()
}
@ -179,7 +183,7 @@ pub fn resolve_css_imports(
&parent_url,
&embedded_url,
true, // Format as data URL
"", // Unknown MIME type
"", // Unknown media type
opt_silent,
)
.map(|(a, _)| a)
@ -206,7 +210,7 @@ pub fn resolve_css_imports(
}
if as_data_url {
data_to_data_url("text/css", resolved_css.as_bytes())
data_to_data_url("text/css", resolved_css.as_bytes(), "")
} else {
resolved_css
}
@ -214,8 +218,10 @@ pub fn resolve_css_imports(
pub fn clean_url<T: AsRef<str>>(url: T) -> String {
let mut result = Url::parse(url.as_ref()).unwrap();
// Clear fragment
result.set_fragment(None);
// Get rid of stray question mark
if result.query() == Some("") {
result.set_query(None);
@ -238,14 +244,14 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
let data: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut mime_type: &str = "";
let mut media_type: &str = "";
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
if item.eq_ignore_ascii_case("text/html") {
mime_type = item;
media_type = item;
continue;
}
}
@ -257,7 +263,7 @@ pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
i = i + 1;
}
if mime_type.eq_ignore_ascii_case("text/html") {
if media_type.eq_ignore_ascii_case("text/html") {
if encoding.eq_ignore_ascii_case("base64") {
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else {
@ -291,7 +297,7 @@ pub fn retrieve_asset(
parent_url: &str,
url: &str,
as_data_url: bool,
mime: &str,
media_type: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
if url.len() == 0 {
@ -318,7 +324,11 @@ pub fn retrieve_asset(
}
if as_data_url {
let data_url: String = data_to_data_url(&mime, &fs::read(&fs_file_path).unwrap());
let data_url: String = data_to_data_url(
&media_type,
&fs::read(&fs_file_path).unwrap(),
&fs_file_path,
);
Ok((data_url, url.to_string()))
} else {
let data: String = fs::read_to_string(&fs_file_path).expect(url);
@ -355,17 +365,17 @@ pub fn retrieve_asset(
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain MIME type by reading the Content-Type header
let media_type = if mime == "" {
// Attempt to obtain media type by reading the Content-Type header
let media_type = if media_type == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&mime)
.unwrap_or(&media_type)
} else {
mime
media_type
};
let data_url = data_to_data_url(&media_type, &data);
let data_url = data_to_data_url(&media_type, &data, url);
// Add to cache
cache.insert(new_cache_key, data_url.clone());
Ok((data_url, res_url))

Loading…
Cancel
Save