add more tests

pull/256/head
Sunshine 3 years ago
parent a308a20411
commit a6e891b3c5
No known key found for this signature in database
GPG Key ID: B80CA68703CD8AB1

@ -79,11 +79,11 @@ or
- `-j`: Exclude JavaScript - `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates - `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information - `-M`: Don't add timestamp and URL information
- `-n`: Extract contents of NOSCRIPT tags - `-n`: Extract contents of NOSCRIPT elements
- `-o`: Write output to `file` - `-o`: Write output to `file`
- `-s`: Be quiet - `-s`: Be quiet
- `-t`: Adjust `network request timeout` - `-t`: Adjust `network request timeout`
- `-u`: Provide `custom User-Agent` - `-u`: Provide custom `User-Agent`
- `-v`: Exclude videos - `-v`: Exclude videos
--------------------------------------------------- ---------------------------------------------------
@ -99,19 +99,15 @@ Please open an issue if something is wrong, that helps make this project better.
--------------------------------------------------- ---------------------------------------------------
## Related projects ## Related projects
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web - Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver - Pagesaver: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm - Personal WayBack Machine: https://github.com/popey/pwbm
- `Hako`: https://github.com/dmpop/hako - Hako: https://github.com/dmpop/hako
--------------------------------------------------- ---------------------------------------------------
## License ## License
<a href="https://creativecommons.org/publicdomain/zero/1.0/">
<img src="https://i.creativecommons.org/p/zero/1.0/88x31.png" alt="CC0-1.0" />
</a>
<br />
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide. To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty. This software is distributed without any warranty.

@ -474,8 +474,9 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
result = String::from_utf8(buf).unwrap(); result = String::from_utf8(buf).unwrap();
} }
// Unwrap NOSCRIPT elements
if options.unwrap_noscript { if options.unwrap_noscript {
let noscript_re = Regex::new(r"<(?P<c>/?noscript)>").unwrap(); let noscript_re = Regex::new(r"<(?P<c>/?noscript[^>]*)>").unwrap();
result = noscript_re.replace_all(&result, "<!--$c-->").to_string(); result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
} }
@ -503,44 +504,39 @@ pub fn retrieve_and_embed_asset(
depth + 1, depth + 1,
) { ) {
Ok((data, final_url, mut media_type)) => { Ok((data, final_url, mut media_type)) => {
// Check integrity if it's a LINK or SCRIPT tag
let node_name: &str = get_node_name(&node).unwrap(); let node_name: &str = get_node_name(&node).unwrap();
let mut ok_to_include: bool = true;
// Check integrity if it's a LINK or SCRIPT element
let mut ok_to_include: bool = true;
if node_name == "link" || node_name == "script" { if node_name == "link" || node_name == "script" {
let node_integrity_attr_value: Option<String> = get_node_attr(node, "integrity");
// Check integrity // Check integrity
if let Some(node_integrity_attr_value) = node_integrity_attr_value { if let Some(node_integrity_attr_value) = get_node_attr(node, "integrity") {
if !node_integrity_attr_value.is_empty() { if !node_integrity_attr_value.is_empty() {
ok_to_include = check_integrity(&data, &node_integrity_attr_value); ok_to_include = check_integrity(&data, &node_integrity_attr_value);
} }
}
// Wipe integrity attribute // Wipe the integrity attribute
set_node_attr(node, "integrity", None); set_node_attr(node, "integrity", None);
}
} }
if ok_to_include { if ok_to_include {
if node_name == "link" { if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
let link_type: &str = determine_link_node_type(node); // Stylesheet LINK elements require special treatment
// CSS LINK nodes requires special treatment let css: String = embed_css(
if link_type == "stylesheet" { cache,
let css: String = embed_css( client,
cache, &final_url,
client, &String::from_utf8_lossy(&data),
&final_url, options,
&String::from_utf8_lossy(&data), depth + 1,
options, );
depth + 1,
);
let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
return; // Do not fall through // Create and embed data URL
} let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
} else if node_name == "frame" || node_name == "iframe" { } else if node_name == "frame" || node_name == "iframe" {
// (I)FRAMEs are also quite different from conventional resources
let frame_dom = html_to_dom(&String::from_utf8_lossy(&data)); let frame_dom = html_to_dom(&String::from_utf8_lossy(&data));
walk_and_embed_assets( walk_and_embed_assets(
cache, cache,
@ -559,30 +555,38 @@ pub fn retrieve_and_embed_asset(
) )
.unwrap(); .unwrap();
// Create and embed data URL
let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url); let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url);
frame_data_url.set_fragment(resolved_url.fragment()); frame_data_url.set_fragment(resolved_url.fragment());
set_node_attr(node, attr_name, Some(frame_data_url.to_string())); set_node_attr(node, attr_name, Some(frame_data_url.to_string()));
} else {
// Every other type of element gets processed here
// Parse media type for SCRIPT elements
if node_name == "script" {
if let Some(_) = get_node_attr(node, "src") {
if let Some(script_node_type_attr_value) = get_node_attr(node, "type") {
media_type = script_node_type_attr_value.to_string();
} else {
// Fallback to default one if it's not specified
media_type = "application/javascript".to_string();
}
}
}
return; // Do not fall through // Create and embed data URL
} let mut data_url = create_data_url(&media_type, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
// Everything else set_node_attr(node, attr_name, Some(data_url.to_string()));
if node_name == "script" {
media_type = "application/javascript".to_string();
} }
let mut data_url = create_data_url(&media_type, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
set_node_attr(node, attr_name, Some(data_url.to_string()));
} }
} }
Err(_) => { Err(_) => {
if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" { if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" {
// Keep remote reference if unable to retrieve the asset // Keep remote references if unable to retrieve the asset
set_node_attr(node, attr_name, Some(resolved_url.to_string())); set_node_attr(node, attr_name, Some(resolved_url.to_string()));
} else { } else {
// Exclude non-remote URLs // Remove local references if they can't be successfully embedded as data URLs
set_node_attr(node, attr_name, None); set_node_attr(node, attr_name, None);
} }
} }
@ -645,7 +649,7 @@ pub fn walk_and_embed_assets(
let link_type: &str = determine_link_node_type(node); let link_type: &str = determine_link_node_type(node);
if link_type == "icon" { if link_type == "icon" {
// Find and resolve this LINK node's href attribute // Find and resolve LINK's href attribute
if let Some(link_attr_href_value) = get_node_attr(node, "href") { if let Some(link_attr_href_value) = get_node_attr(node, "href") {
if !options.no_images && !link_attr_href_value.is_empty() { if !options.no_images && !link_attr_href_value.is_empty() {
retrieve_and_embed_asset( retrieve_and_embed_asset(
@ -663,10 +667,12 @@ pub fn walk_and_embed_assets(
} }
} }
} else if link_type == "stylesheet" { } else if link_type == "stylesheet" {
// Find and resolve this LINK node's href attribute // Resolve LINK's href attribute
if let Some(link_attr_href_value) = get_node_attr(node, "href") { if let Some(link_attr_href_value) = get_node_attr(node, "href") {
if options.no_css { if options.no_css {
set_node_attr(node, "href", None); set_node_attr(node, "href", None);
// Wipe integrity attribute
set_node_attr(node, "integrity", None);
} else { } else {
if !link_attr_href_value.is_empty() { if !link_attr_href_value.is_empty() {
retrieve_and_embed_asset( retrieve_and_embed_asset(
@ -916,14 +922,15 @@ pub fn walk_and_embed_assets(
// Replace with empty JS call to preserve original behavior // Replace with empty JS call to preserve original behavior
set_node_attr(node, "href", Some(str!("javascript:;"))); set_node_attr(node, "href", Some(str!("javascript:;")));
} }
} else if anchor_attr_href_value.clone().starts_with('#')
|| is_url_and_has_protocol(&anchor_attr_href_value.clone())
{
// Don't touch mailto: links or hrefs which begin with a hash sign
} else { } else {
let href_full_url: Url = // Don't touch mailto: links or hrefs which begin with a hash sign
resolve_url(document_url, &anchor_attr_href_value); if !anchor_attr_href_value.clone().starts_with('#')
set_node_attr(node, "href", Some(href_full_url.to_string())); && !is_url_and_has_protocol(&anchor_attr_href_value.clone())
{
let href_full_url: Url =
resolve_url(document_url, &anchor_attr_href_value);
set_node_attr(node, "href", Some(href_full_url.to_string()));
}
} }
} }
} }
@ -937,6 +944,8 @@ pub fn walk_and_embed_assets(
// Remove src attribute // Remove src attribute
if script_attr_src != None { if script_attr_src != None {
set_node_attr(node, "src", None); set_node_attr(node, "src", None);
// Wipe integrity attribute
set_node_attr(node, "integrity", None);
} }
} else if !script_attr_src.clone().unwrap_or_default().is_empty() { } else if !script_attr_src.clone().unwrap_or_default().is_empty() {
retrieve_and_embed_asset( retrieve_and_embed_asset(
@ -1081,7 +1090,7 @@ pub fn walk_and_embed_assets(
); );
// Get rid of original contents // Get rid of original contents
noscript_contents.clear(); noscript_contents.clear();
// Insert HTML containing embedded assets back into NOSCRIPT node // Insert HTML containing embedded assets into NOSCRIPT node
if let Some(html) = if let Some(html) =
get_child_node_by_name(&noscript_contents_dom.document, "html") get_child_node_by_name(&noscript_contents_dom.document, "html")
{ {

@ -88,7 +88,7 @@ mod passing {
} }
#[test] #[test]
fn remove_existing_when_empty_provided() { fn set_existing_to_empty_when_empty_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd let out = cmd
.arg("-M") .arg("-M")

@ -11,24 +11,6 @@ mod passing {
use std::env; use std::env;
use std::process::Command; use std::process::Command;
#[test]
fn bad_input_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
}
#[test] #[test]
fn isolate_data_url() { fn isolate_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
@ -192,6 +174,38 @@ mod passing {
// The exit code should be 0 // The exit code should be 0
out.assert().code(0); out.assert().code(0);
} }
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
}
#[test] #[test]
fn security_disallow_local_assets_within_data_url_targets() { fn security_disallow_local_assets_within_data_url_targets() {

@ -130,7 +130,14 @@ mod passing {
// STDOUT should contain HTML with no CSS // STDOUT should contain HTML with no CSS
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(), std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n" "<html>\
<head></head>\
<body>\
<!--noscript-->\
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
<!--/noscript-->\n\
</body>\
</html>\n"
); );
// STDERR should contain target HTML and embedded SVG files // STDERR should contain target HTML and embedded SVG files
@ -153,4 +160,27 @@ mod passing {
// The exit code should be 0 // The exit code should be 0
out.assert().code(0); out.assert().code(0);
} }
#[test]
fn unwrap_noscript_contents_attr_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-n")
.arg("data:text/html,<noscript class=\"\">test</noscript>")
.output()
.unwrap();
// STDOUT should contain unwrapped contents of NOSCRIPT element
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
}
} }

@ -30,7 +30,14 @@ mod passing {
// STDOUT should contain newly added base URL // STDOUT should contain newly added base URL
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(), std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\n <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n </head>\n <body>\n © Some Company\n \n\n</body></html>\n" "<html>\
<head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
</head>\n \
<body>\n \
© Some Company\n \
\n\n</body>\
</html>\n"
); );
// STDERR should contain only the target file // STDERR should contain only the target file

@ -87,10 +87,12 @@ mod passing {
#[test] #[test]
fn no_css() { fn no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\ let html = "\
<link rel=\"alternate stylesheet\" href=\"main.css\">\ <link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\ <link rel=\"alternate stylesheet\" href=\"main.css\">\
<div style=\"display: none;\"></div>"; <style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>\
";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap(); let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
@ -108,16 +110,18 @@ mod passing {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\ "\
<head>\ <html>\
<link rel=\"stylesheet\">\ <head>\
<link rel=\"alternate stylesheet\">\ <link rel=\"stylesheet\">\
<style></style>\ <link rel=\"alternate stylesheet\">\
</head>\ <style></style>\
<body>\ </head>\
<div></div>\ <body>\
</body>\ <div></div>\
</html>" </body>\
</html>\
"
); );
} }
@ -203,7 +207,15 @@ mod passing {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>" "\
<html>\
<head>\
</head>\
<frameset>\
<frame src=\"\">\
</frameset>\
</html>\
"
); );
} }
@ -227,16 +239,25 @@ mod passing {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>" "\
<html>\
<head></head>\
<body>\
<iframe src=\"\"></iframe>\
</body>\
</html>\
"
); );
} }
#[test] #[test]
fn no_js() { fn no_js() {
let html = "<div onClick=\"void(0)\">\ let html = "\
<script src=\"http://localhost/assets/some.js\"></script>\ <div onClick=\"void(0)\">\
<script>alert(1)</script>\ <script src=\"http://localhost/assets/some.js\"></script>\
</div>"; <script>alert(1)</script>\
</div>\
";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap(); let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
@ -254,52 +275,141 @@ mod passing {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script></script>\ "\
<script></script></div></body></html>" <html>\
<head></head>\
<body>\
<div>\
<script></script>\
<script></script>\
</div>\
</body>\
</html>\
"
); );
} }
// #[test] #[test]
// fn discards_integrity() { fn keeps_integrity_for_linked_assets() {
// let html = "<title>No integrity</title>\ let html = "<title>Has integrity</title>\
// <link integrity=\"sha384-...\" rel=\"something\"/>\ <link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
// <script integrity=\"sha384-...\" src=\"some.js\"></script>"; let dom = html::html_to_dom(&html);
// let dom = html::html_to_dom(&html); let url: Url = Url::parse("http://localhost").unwrap();
// let url: Url = Url::parse("http://localhost").unwrap(); let cache = &mut HashMap::new();
// let cache = &mut HashMap::new();
let mut options = Options::default();
// let mut options = Options::default(); options.silent = true;
// options.no_css = true;
// options.no_frames = true; let client = Client::new();
// options.no_js = true;
// options.no_images = true; html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
// options.silent = true;
let mut buf: Vec<u8> = Vec::new();
// let client = Client::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
// html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
// let mut buf: Vec<u8> = Vec::new(); "\
// serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); <html>\
<head>\
// assert_eq!( <title>Has integrity</title>\
// buf.iter().map(|&c| c as char).collect::<String>(), <link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
// "<html>\ </head>\
// <head><title>No integrity</title><link rel=\"something\"><script></script></head>\ <body></body>\
// <body></body>\ </html>\
// </html>" "
// ); );
// } }
#[test]
fn discards_integrity_for_linked_assets_nojs_nocss() {
let html = "\
<title>No integrity</title>\
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
<script integrity=\"\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link rel=\"stylesheet\">\
<script></script>\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_embedded_assets() {
let html = "\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
<script></script>\
</head>\
<body>\
</body>\
</html>\
"
);
}
#[test] #[test]
fn removes_unwanted_meta_tags() { fn removes_unwanted_meta_tags() {
let html = "<html>\ let html = "\
<head>\ <html>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\ <head>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\ <meta http-equiv=\"Refresh\" value=\"20\"/>\
</head>\ <meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
<body></body>\ </head>\
</html>"; <body>\
</body>\
</html>\
";
let dom = html::html_to_dom(&html); let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap(); let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new(); let cache = &mut HashMap::new();
@ -320,19 +430,22 @@ mod passing {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\ "\
<html>\
<head>\ <head>\
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\ <meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\ <meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
</head>\ </head>\
<body></body>\ <body>\
</body>\
</html>" </html>"
); );
} }
#[test] #[test]
fn processes_noscript_tags() { fn processes_noscript_tags() {
let html = "<html>\ let html = "\
<html>\
<body>\ <body>\
<noscript>\ <noscript>\
<img src=\"image.png\" />\ <img src=\"image.png\" />\
@ -357,7 +470,8 @@ mod passing {
assert_eq!( assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(), buf.iter().map(|&c| c as char).collect::<String>(),
format!( format!(
"<html>\ "\
<html>\
<head>\ <head>\
</head>\ </head>\
<body>\ <body>\
@ -370,4 +484,34 @@ mod passing {
) )
); );
} }
#[test]
fn preserves_script_type_json() {
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
</head>\
<body>\
</body>\
</html>"
);
}
} }

Loading…
Cancel
Save