From 88a230872ca2cd619cdd3817a36f30616148e7e6 Mon Sep 17 00:00:00 2001 From: Vincent Flyson Date: Sat, 21 Sep 2019 20:06:00 -0400 Subject: [PATCH] Add CSP isolation, no CSS, and no iframe options --- .travis.yml | 4 + Cargo.toml | 2 +- README.md | 6 +- appveyor.yml | 4 +- src/html.rs | 666 +++++++++++++++++++++++++++++++++++++++------------ src/http.rs | 17 +- src/main.rs | 53 ++-- src/utils.rs | 4 +- 8 files changed, 572 insertions(+), 184 deletions(-) diff --git a/.travis.yml b/.travis.yml index 819dabc..050d3dd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,10 @@ rust: - beta - nightly +before_script: + - rustup component add rustfmt + script: - cargo build --verbose - cargo test --verbose + - cargo fmt --all -- --check diff --git a/Cargo.toml b/Cargo.toml index f30d76c..65c715b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "monolith" -version = "2.0.17" +version = "2.0.18" authors = [ "Sunshine ", "Mahdi Robatipoor ", diff --git a/README.md b/README.md index 52253b8..6c91fd1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ [![Travis CI Build Status](https://travis-ci.org/Y2Z/monolith.svg?branch=master)](https://travis-ci.org/Y2Z/monolith) -[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/j1v1d96sw952b1ch?svg=true)](https://ci.appveyor.com/project/vflyson/monolith) +[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/ae7soyjih8jg2bv7/branch/master?svg=true)](https://ci.appveyor.com/project/snshn/monolith/branch/master) + # monolith @@ -21,7 +22,10 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as $ monolith https://lyrics.github.io/db/p/portishead/dummy/roads/ > portishead-roads-lyrics.html ### Options + - `-c`: Ignore styles + - `-f`: Exclude iframes - `-i`: Remove images + - `-I`: Isolate document - `-j`: Exclude JavaScript - `-k`: Accept invalid X.509 (TLS) certificates - `-s`: Silent mode diff --git a/appveyor.yml b/appveyor.yml index 394334e..2b618fa 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -114,6 +114,7 @@ install: - if defined MINGW_PATH set PATH=%PATH%;%MINGW_PATH% - rustc -vV - cargo -vV + - rustup component add rustfmt ## Build Script ## @@ -125,4 +126,5 @@ build: false #directly or perform other testing commands. Rust will automatically be placed in the PATH # environment variable. test_script: -- cargo test --verbose %cargoflags% + - cargo test --verbose %cargoflags% + - cargo fmt --all -- --check diff --git a/src/html.rs b/src/html.rs index b3be66d..3f9f73e 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,23 +1,25 @@ +use html5ever::interface::QualName; use html5ever::parse_document; use html5ever::rcdom::{Handle, NodeData, RcDom}; use html5ever::serialize::{serialize, SerializeOpts}; -use html5ever::tendril::TendrilSink; +use html5ever::tendril::{format_tendril, TendrilSink}; +use html5ever::tree_builder::{Attribute, TreeSink}; +use html5ever::{local_name, namespace_url, ns}; use http::{is_valid_url, resolve_url, retrieve_asset}; use regex::Regex; use std::default::Default; -use std::io; use utils::data_to_dataurl; lazy_static! { static ref EMPTY_STRING: String = String::new(); static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap(); - static ref ICON_VALUES: Regex = Regex::new( - r"^icon|shortcut icon|mask-icon|apple-touch-icon|fluid-icon$" - ).unwrap(); + static ref ICON_VALUES: Regex = + Regex::new(r"^icon|shortcut icon|mask-icon|apple-touch-icon|fluid-icon$").unwrap(); } -const TRANSPARENT_PIXEL: &str = "data:image/png;base64,\ -iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="; +const TRANSPARENT_PIXEL: &str = + "data:image/png;base64,\ + iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="; const JS_DOM_EVENT_ATTRS: [&str; 21] = [ // Input @@ -47,53 +49,46 @@ const JS_DOM_EVENT_ATTRS: [&str; 21] = [ "onresize", ]; -fn get_parent_node_name(node: &Handle) -> String { +fn get_parent_node(node: &Handle) -> Handle { let parent = node.parent.take().clone(); - let parent_node = parent.and_then(|node| node.upgrade()).unwrap(); - - match &parent_node.data { - NodeData::Document => { EMPTY_STRING.clone() } - NodeData::Doctype { .. } => { EMPTY_STRING.clone() } - NodeData::Text { .. } => { EMPTY_STRING.clone() } - NodeData::Comment { .. } => { EMPTY_STRING.clone() } - NodeData::Element { ref name, attrs: _, .. } => { - name.local.as_ref().to_string() - } - NodeData::ProcessingInstruction { .. } => unreachable!() + parent.and_then(|node| node.upgrade()).unwrap() +} + +fn get_node_name(node: &Handle) -> String { + match &node.data { + NodeData::Element { ref name, .. } => name.local.as_ref().to_string(), + _ => EMPTY_STRING.clone(), } } pub fn walk_and_embed_assets( url: &str, node: &Handle, + opt_no_css: bool, opt_no_js: bool, opt_no_images: bool, opt_user_agent: &str, opt_silent: bool, opt_insecure: bool, + opt_no_frames: bool, ) { match node.data { NodeData::Document => { // Dig deeper for child in node.children.borrow().iter() { walk_and_embed_assets( - &url, child, - opt_no_js, - opt_no_images, - opt_user_agent, - opt_silent, - opt_insecure, - ); + &url, + child, + opt_no_css, + opt_no_js, + opt_no_images, + opt_user_agent, + opt_silent, + opt_insecure, + opt_no_frames, + ); } } - NodeData::Doctype { .. } => {} - NodeData::Text { .. } => {} - NodeData::Comment { .. } => { - // Note: in case of opt_no_js being set to true, there's no need to worry about - // getting rid of comments that may contain scripts, e.g.