diff --git a/README.md b/README.md index f6c3e60..d349cef 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as ## Options - `-c`: Ignore styles - - `-f`: Exclude iframes + - `-f`: Exclude frames and iframes - `-i`: Remove images - `-I`: Isolate the document - `-j`: Exclude JavaScript diff --git a/src/args.rs b/src/args.rs index ca72efc..75a429c 100644 --- a/src/args.rs +++ b/src/args.rs @@ -34,7 +34,7 @@ impl AppArgs { ) // .args_from_usage("-a, --include-audio 'Removes audio sources'") .args_from_usage("-c, --no-css 'Removes CSS'") - .args_from_usage("-f, --no-frames 'Removes iframes'") + .args_from_usage("-f, --no-frames 'Removes frames and iframes'") .args_from_usage("-i, --no-images 'Removes images'") .args_from_usage("-I, --isolate 'Cuts off document from the Internet'") .args_from_usage("-j, --no-js 'Removes JavaScript'") diff --git a/src/html.rs b/src/html.rs index e6ded28..2711266 100644 --- a/src/html.rs +++ b/src/html.rs @@ -386,7 +386,7 @@ pub fn walk_and_embed_assets( } } } - "iframe" => { + "frame" | "iframe" => { for attr in attrs_mut.iter_mut() { if &attr.name.local == "src" { if opt_no_frames { @@ -395,15 +395,15 @@ pub fn walk_and_embed_assets( continue; } - let iframe_src = attr.value.trim(); + let frame_src = attr.value.trim(); - // Ignore iframes with empty source (they cause infinite loops) - if iframe_src.is_empty() { + // Ignore (i)frames with empty source — they cause infinite loops + if frame_src.is_empty() { continue; } - let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default(); - let (iframe_data, iframe_final_url) = retrieve_asset( + let src_full_url = resolve_url(&url, frame_src).unwrap_or_default(); + let (frame_data, frame_final_url) = retrieve_asset( cache, client, &src_full_url, @@ -412,11 +412,11 @@ pub fn walk_and_embed_assets( opt_silent, ) .unwrap_or((str!(), src_full_url)); - let dom = html_to_dom(&iframe_data); + let dom = html_to_dom(&frame_data); walk_and_embed_assets( cache, client, - &iframe_final_url, + &frame_final_url, &dom.document, opt_no_css, opt_no_js, diff --git a/src/tests/html.rs b/src/tests/html.rs index 6ea6d76..00c5a31 100644 --- a/src/tests/html.rs +++ b/src/tests/html.rs @@ -132,6 +132,42 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() { ); } +#[test] +fn test_walk_and_embed_assets_ensure_no_recursive_frame() { + let html = ""; + let dom = html_to_dom(&html); + let url = "http://localhost"; + let cache = &mut HashMap::new(); + + let opt_no_css: bool = false; + let opt_no_frames: bool = false; + let opt_no_js: bool = false; + let opt_no_images: bool = false; + let opt_silent = true; + + let client = Client::new(); + + walk_and_embed_assets( + cache, + &client, + &url, + &dom.document, + opt_no_css, + opt_no_js, + opt_no_images, + opt_silent, + opt_no_frames, + ); + + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); + + assert_eq!( + buf.iter().map(|&c| c as char).collect::(), + "" + ); +} + #[test] fn test_walk_and_embed_assets_no_css() { let html = "\ @@ -227,6 +263,41 @@ fn test_walk_and_embed_assets_no_images() { #[test] fn test_walk_and_embed_assets_no_frames() { + let html = ""; + let dom = html_to_dom(&html); + let url = "http://localhost"; + let cache = &mut HashMap::new(); + + let opt_no_css: bool = false; + let opt_no_frames: bool = true; + let opt_no_js: bool = false; + let opt_no_images: bool = false; + let opt_silent = true; + let client = Client::new(); + + walk_and_embed_assets( + cache, + &client, + &url, + &dom.document, + opt_no_css, + opt_no_js, + opt_no_images, + opt_silent, + opt_no_frames, + ); + + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); + + assert_eq!( + buf.iter().map(|&c| c as char).collect::(), + "" + ); +} + +#[test] +fn test_walk_and_embed_assets_no_iframes() { let html = ""; let dom = html_to_dom(&html); let url = "http://localhost";