make possible to unwrap NOSCRIPT nodes

pull/253/head
Sunshine 3 years ago
parent 6ecda080e8
commit 6f918f6c1c
No known key found for this signature in database
GPG Key ID: B80CA68703CD8AB1

@ -75,6 +75,7 @@ or
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information
- `-n`: Extract contents of NOSCRIPT tags
- `-o`: Write output to `file`
- `-s`: Be quiet
- `-t`: Adjust `network request timeout`

@ -474,6 +474,11 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
result = String::from_utf8(buf).unwrap();
}
if options.unwrap_noscript {
let noscript_re = Regex::new(r"<(?P<c>/?noscript)>").unwrap();
result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
}
result
}
@ -1060,11 +1065,11 @@ pub fn walk_and_embed_assets(
for child_node in node.children.borrow_mut().iter_mut() {
match child_node.data {
NodeData::Text { ref contents } => {
// Get contents of the NOSCRIPT node
// Get contents of NOSCRIPT node
let mut noscript_contents = contents.borrow_mut();
// Parse contents of the NOSCRIPT node
// Parse contents of NOSCRIPT node as DOM
let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents);
// Embed assets within the NOSCRIPT node
// Embed assets of NOSCRIPT node contents
walk_and_embed_assets(
cache,
client,
@ -1075,7 +1080,7 @@ pub fn walk_and_embed_assets(
);
// Get rid of original contents
noscript_contents.clear();
// Insert HTML containing embedded assets into the NOSCRIPT node
// Insert HTML containing embedded assets back into NOSCRIPT node
if let Some(html) =
get_child_node_by_name(&noscript_contents_dom.document, "html")
{

@ -21,6 +21,7 @@ pub struct Options {
pub no_video: bool,
pub target: String,
pub no_color: bool,
pub unwrap_noscript: bool,
}
const ASCII: &'static str = " \
@ -55,6 +56,9 @@ impl Options {
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage(
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
)
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
@ -100,6 +104,7 @@ impl Options {
} else {
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
}
options.unwrap_noscript = app.is_present("unwrap-noscript");
options.no_video = app.is_present("no-video");
options.no_color =

@ -202,12 +202,12 @@ mod passing {
format!(
"\
{file_url_html}\n \
{file_url_css}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_css = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
)

@ -2,4 +2,5 @@ mod base_url;
mod basic;
mod data_url;
mod local_files;
mod noscript;
mod unusual_encodings;

@ -0,0 +1,164 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::Path;
use std::process::Command;
use url::Url;
#[test]
fn parse_noscript_contents() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn unwrap_noscript_contents() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn unwrap_noscript_contents_nested() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let path_html: &Path = Path::new("src/tests/data/noscript/nested.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn unwrap_noscript_contents_with_script() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let path_html: &Path = Path::new("src/tests/data/noscript/script.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap())
.unwrap()
.into_string(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap())
.unwrap()
.into_string(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

@ -0,0 +1,5 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

After

Width:  |  Height:  |  Size: 296 B

@ -0,0 +1 @@
<body><noscript><img src="image.svg" /></noscript></body>

@ -0,0 +1 @@
<body><noscript><h1>JS is not active</h1><noscript><img src="image.svg" /></noscript></noscript></body>

@ -0,0 +1 @@
<body><noscript><script>alert(1);</script><img src="image.svg" /></noscript></body>
Loading…
Cancel
Save