From 9b40dbbf271c9e5d768dca410174784ac544088c Mon Sep 17 00:00:00 2001 From: Sunshine Date: Wed, 22 Apr 2020 03:37:02 -0400 Subject: [PATCH] add option to exclude web fonts --- README.md | 1 + src/args.rs | 3 ++ src/css.rs | 25 +++++++-- src/html.rs | 19 ++++--- src/main.rs | 3 +- src/tests/css/embed_css.rs | 72 +++++++++++++++++++++++-- src/tests/html/walk_and_embed_assets.rs | 40 ++++++++++---- 7 files changed, 138 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 3dccbd9..aba5353 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ The guide can be found [here](docs/containers.md) ## Options - `-c`: Ignore styles - `-f`: Exclude frames and iframes + - `-F`: Omit web fonts - `-i`: Remove images - `-I`: Isolate the document - `-j`: Exclude JavaScript diff --git a/src/args.rs b/src/args.rs index 589e094..9136271 100644 --- a/src/args.rs +++ b/src/args.rs @@ -4,6 +4,7 @@ use clap::{App, Arg}; pub struct AppArgs { pub target: String, pub no_css: bool, + pub no_fonts: bool, pub no_frames: bool, pub no_images: bool, pub no_js: bool, @@ -35,6 +36,7 @@ impl AppArgs { // .args_from_usage("-a, --include-audio 'Removes audio sources'") .args_from_usage("-c, --no-css 'Removes CSS'") .args_from_usage("-f, --no-frames 'Removes frames and iframes'") + .args_from_usage("-F, --no-fonts 'Removes fonts'") .args_from_usage("-i, --no-images 'Removes images'") .args_from_usage("-I, --isolate 'Cuts off document from the Internet'") .args_from_usage("-j, --no-js 'Removes JavaScript'") @@ -52,6 +54,7 @@ impl AppArgs { .expect("please set target") .to_string(); app_args.no_css = app.is_present("no-css"); + app_args.no_fonts = app.is_present("no-fonts"); app_args.no_frames = app.is_present("no-frames"); app_args.no_images = app.is_present("no-images"); app_args.no_js = app.is_present("no-js"); diff --git a/src/css.rs b/src/css.rs index cce992d..ed84ecc 100644 --- a/src/css.rs +++ b/src/css.rs @@ -61,6 +61,7 @@ pub fn process_css<'a>( rule_name: &str, prop_name: &str, func_name: &str, + opt_no_fonts: bool, opt_no_images: bool, opt_silent: bool, ) -> Result> { @@ -89,6 +90,10 @@ pub fn process_css<'a>( Token::Colon => result.push_str(":"), Token::Comma => result.push_str(","), Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => { + if opt_no_fonts && curr_rule == "font-face" { + continue; + } + let closure: &str; if token == &Token::ParenthesisBlock { result.push_str("("); @@ -111,6 +116,7 @@ pub fn process_css<'a>( rule_name, curr_prop.as_str(), func_name, + opt_no_fonts, opt_no_images, opt_silent, ) @@ -133,12 +139,18 @@ pub fn process_css<'a>( Token::WhiteSpace(ref value) => { result.push_str(value); } + // div... Token::Ident(ref value) => { + curr_rule = str!(); curr_prop = str!(value); result.push_str(&escape(value)); } + // @import, @font-face, @charset, @media... Token::AtKeyword(ref value) => { curr_rule = str!(value); + if opt_no_fonts && curr_rule == "font-face" { + continue; + } result.push_str("@"); result.push_str(value); } @@ -147,13 +159,10 @@ pub fn process_css<'a>( result.push_str(value); } Token::QuotedString(ref value) => { - let is_import: bool = curr_rule == "import"; - if is_import { + if curr_rule == "import" { // Reset current at-rule value curr_rule = str!(); - } - if is_import { // Skip empty import values if value.len() < 1 { result.push_str("''"); @@ -182,6 +191,7 @@ pub fn process_css<'a>( client, final_url.as_str(), &css, + opt_no_fonts, opt_no_images, opt_silent, ) @@ -254,12 +264,15 @@ pub fn process_css<'a>( result.push_str(str!(value).as_str()); result.push_str(str!(unit).as_str()); } + // #selector, #id... Token::IDHash(ref value) => { + curr_rule = str!(); result.push_str("#"); result.push_str(&escape(value)); } Token::UnquotedUrl(ref value) => { let is_import: bool = curr_rule == "import"; + if is_import { // Reset current at-rule value curr_rule = str!(); @@ -300,6 +313,7 @@ pub fn process_css<'a>( client, final_url.as_str(), &css, + opt_no_fonts, opt_no_images, opt_silent, ) @@ -347,6 +361,7 @@ pub fn process_css<'a>( curr_rule.as_str(), curr_prop.as_str(), function_name, + opt_no_fonts, opt_no_images, opt_silent, ) @@ -368,6 +383,7 @@ pub fn embed_css( client: &Client, parent_url: &str, css: &str, + opt_no_fonts: bool, opt_no_images: bool, opt_silent: bool, ) -> String { @@ -382,6 +398,7 @@ pub fn embed_css( "", "", "", + opt_no_fonts, opt_no_images, opt_silent, ) diff --git a/src/html.rs b/src/html.rs index 4fba459..7d9c032 100644 --- a/src/html.rs +++ b/src/html.rs @@ -42,10 +42,11 @@ pub fn walk_and_embed_assets( url: &str, node: &Handle, opt_no_css: bool, + opt_no_fonts: bool, + opt_no_frames: bool, opt_no_js: bool, opt_no_images: bool, opt_silent: bool, - opt_no_frames: bool, ) { match node.data { NodeData::Document => { @@ -57,10 +58,11 @@ pub fn walk_and_embed_assets( &url, child, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); } } @@ -157,17 +159,18 @@ pub fn walk_and_embed_assets( ) { // On successful retrieval, traverse CSS Ok((css_data, final_url)) => { - let x: String = embed_css( + let css: String = embed_css( cache, client, &final_url, &css_data, + opt_no_fonts, opt_no_images, opt_silent, ); data_to_data_url( "text/css", - x.as_bytes(), + css.as_bytes(), &final_url, "", ) @@ -462,6 +465,7 @@ pub fn walk_and_embed_assets( client, &url, tendril.as_ref(), + opt_no_fonts, opt_no_images, opt_silent, ); @@ -519,10 +523,11 @@ pub fn walk_and_embed_assets( &frame_final_url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); @@ -590,6 +595,7 @@ pub fn walk_and_embed_assets( client, &url, attribute.value.as_ref(), + opt_no_fonts, opt_no_images, opt_silent, ); @@ -621,10 +627,11 @@ pub fn walk_and_embed_assets( &url, child, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); } } diff --git a/src/main.rs b/src/main.rs index 8243431..b3af7cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -139,10 +139,11 @@ fn main() { &base_url, &dom.document, app_args.no_css, + app_args.no_fonts, + app_args.no_frames, app_args.no_js, app_args.no_images, app_args.silent, - app_args.no_frames, ); let html: String = stringify_document( diff --git a/src/tests/css/embed_css.rs b/src/tests/css/embed_css.rs index cf81031..f3d036b 100644 --- a/src/tests/css/embed_css.rs +++ b/src/tests/css/embed_css.rs @@ -15,7 +15,10 @@ fn passing_empty_input() { let cache = &mut HashMap::new(); let client = Client::new(); - assert_eq!(css::embed_css(cache, &client, "", "", false, false,), ""); + assert_eq!( + css::embed_css(cache, &client, "", "", false, false, false,), + "" + ); } #[test] @@ -37,6 +40,7 @@ height: calc(100vh - 10pt)"; &client, "https://doesntmatter.local/", &STYLE, + false, true, true, ), @@ -67,7 +71,7 @@ line-height: -1; \ height: calc(100vh - 10pt)"; assert_eq!( - css::embed_css(cache, &client, "", &STYLE, true, true,), + css::embed_css(cache, &client, "", &STYLE, false, true, true,), format!( "/* border: none;*/\ background-image: url('{empty_image}'); \ @@ -95,7 +99,7 @@ fn passing_style_block() { html > body {}"; assert_eq!( - css::embed_css(cache, &client, "file:///", &CSS, false, true,), + css::embed_css(cache, &client, "file:///", &CSS, false, false, true,), CSS ); } @@ -135,7 +139,10 @@ fn passing_attribute_selectors() { } "; - assert_eq!(css::embed_css(cache, &client, "", &CSS, false, false,), CSS); + assert_eq!( + css::embed_css(cache, &client, "", &CSS, false, false, false,), + CSS + ); } #[test] @@ -158,6 +165,7 @@ fn passing_import_string() { "https://doesntmatter.local/", &CSS, false, + false, true, ), "\ @@ -192,6 +200,7 @@ body {\n \ "https://doesntmatter.local/", &CSS, false, + false, true, ), CSS @@ -218,6 +227,7 @@ div {\n \ "https://doesntmatter.local/", &CSS, false, + false, true, ), CSS @@ -246,8 +256,62 @@ fn passing_unusual_indents() { "https://doesntmatter.local/", &CSS, false, + false, true, ), CSS ); } + +#[test] +fn passing_exclude_fonts() { + let cache = &mut HashMap::new(); + let client = Client::new(); + + const CSS: &str = "\ +@font-face {\n \ + font-family: 'My Font';\n \ + src: url(my_font.woff);\n\ +}\n\ +\n\ +#identifier {\n \ + font-family: 'My Font' Arial\n\ +}\n\ +\n\ +@font-face {\n \ + font-family: 'My Font';\n \ + src: url(my_font.woff);\n\ +}\n\ +\n\ +div {\n \ + font-family: 'My Font' Verdana\n\ +}\n\ +"; + + const CSS_OUT: &str = " \ +\n\ +\n\ +#identifier {\n \ + font-family: 'My Font' Arial\n\ +}\n\ +\n \ +\n\ +\n\ +div {\n \ + font-family: 'My Font' Verdana\n\ +}\n\ +"; + + assert_eq!( + css::embed_css( + cache, + &client, + "https://doesntmatter.local/", + &CSS, + true, + false, + true, + ), + CSS_OUT + ); +} diff --git a/src/tests/html/walk_and_embed_assets.rs b/src/tests/html/walk_and_embed_assets.rs index 7dbd8be..b41beb2 100644 --- a/src/tests/html/walk_and_embed_assets.rs +++ b/src/tests/html/walk_and_embed_assets.rs @@ -19,6 +19,7 @@ fn passing_basic() { let url = "http://localhost"; let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = false; let opt_no_js: bool = false; let opt_no_images: bool = false; @@ -32,10 +33,11 @@ fn passing_basic() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -55,6 +57,7 @@ fn passing_ensure_no_recursive_iframe() { let cache = &mut HashMap::new(); let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = false; let opt_no_js: bool = false; let opt_no_images: bool = false; @@ -68,10 +71,11 @@ fn passing_ensure_no_recursive_iframe() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -91,6 +95,7 @@ fn passing_ensure_no_recursive_frame() { let cache = &mut HashMap::new(); let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = false; let opt_no_js: bool = false; let opt_no_images: bool = false; @@ -104,10 +109,11 @@ fn passing_ensure_no_recursive_frame() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -129,6 +135,7 @@ fn passing_no_css() { let cache = &mut HashMap::new(); let opt_no_css: bool = true; + let opt_no_fonts: bool = false; let opt_no_frames: bool = false; let opt_no_js: bool = false; let opt_no_images: bool = false; @@ -141,10 +148,11 @@ fn passing_no_css() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -173,6 +181,7 @@ fn passing_no_images() { let cache = &mut HashMap::new(); let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = false; let opt_no_js: bool = false; let opt_no_images: bool = true; @@ -186,10 +195,11 @@ fn passing_no_images() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -221,6 +231,7 @@ fn passing_no_body_background_images() { let cache = &mut HashMap::new(); let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = false; let opt_no_js: bool = false; let opt_no_images: bool = true; @@ -234,10 +245,11 @@ fn passing_no_body_background_images() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -257,6 +269,7 @@ fn passing_no_frames() { let cache = &mut HashMap::new(); let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = true; let opt_no_js: bool = false; let opt_no_images: bool = false; @@ -269,10 +282,11 @@ fn passing_no_frames() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -292,6 +306,7 @@ fn passing_no_iframes() { let cache = &mut HashMap::new(); let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = true; let opt_no_js: bool = false; let opt_no_images: bool = false; @@ -304,10 +319,11 @@ fn passing_no_iframes() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -330,6 +346,7 @@ fn passing_no_js() { let cache = &mut HashMap::new(); let opt_no_css: bool = false; + let opt_no_fonts: bool = false; let opt_no_frames: bool = false; let opt_no_js: bool = true; let opt_no_images: bool = false; @@ -343,10 +360,11 @@ fn passing_no_js() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -369,6 +387,7 @@ fn passing_with_no_integrity() { let cache = &mut HashMap::new(); let client = Client::new(); let opt_no_css: bool = true; + let opt_no_fonts: bool = false; let opt_no_frames: bool = true; let opt_no_js: bool = true; let opt_no_images: bool = true; @@ -380,10 +399,11 @@ fn passing_with_no_integrity() { &url, &dom.document, opt_no_css, + opt_no_fonts, + opt_no_frames, opt_no_js, opt_no_images, opt_silent, - opt_no_frames, ); let mut buf: Vec = Vec::new();