automatically remove "Refresh" and "Location" META tags

pull/189/head
Sunshine 4 years ago
parent c3ca2ad1d5
commit 44cac65a83
No known key found for this signature in database
GPG Key ID: B80CA68703CD8AB1

@ -0,0 +1,19 @@
# 4. Reload and location `meta` nodes
Date: 2020-06-25
## Status
Accepted
## Context
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
## Decision
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` nodes that have `http-equiv` attribute equal to "Refresh" or "Location" in order to prevent them from forcing page to reload or redirect to another location.
## Consequences
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading page.

@ -170,6 +170,25 @@ pub fn walk_and_embed_assets(
let attrs_mut = &mut attrs.borrow_mut();
match name.local.as_ref() {
"meta" => {
// Determine type
let mut is_unwanted_meta: bool = false;
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("http-equiv") {
let value: String = attr.value.to_string();
is_unwanted_meta = value.eq_ignore_ascii_case("refresh")
|| value.eq_ignore_ascii_case("location");
}
}
if is_unwanted_meta {
// Strip this node off all its attributes
while attrs_mut.len() > 0 {
attrs_mut.remove(0);
}
}
}
"link" => {
// Remove integrity attributes, keep value of the last one
let mut integrity: String = str!();

@ -211,15 +211,15 @@ mod passing {
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
@ -341,8 +341,8 @@ mod passing {
#[test]
fn no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
@ -381,7 +381,7 @@ mod passing {
}
#[test]
fn with_no_integrity() {
fn discards_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
@ -415,8 +415,56 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<meta>\
<meta>\
</head>\
<body></body>\
</html>"
);
}

Loading…
Cancel
Save