@ -474,8 +474,9 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
result = String ::from_utf8 ( buf ) . unwrap ( ) ;
result = String ::from_utf8 ( buf ) . unwrap ( ) ;
}
}
// Unwrap NOSCRIPT elements
if options . unwrap_noscript {
if options . unwrap_noscript {
let noscript_re = Regex ::new ( r"<(?P<c>/?noscript )>") . unwrap ( ) ;
let noscript_re = Regex ::new ( r"<(?P<c>/?noscript [^>]* )>") . unwrap ( ) ;
result = noscript_re . replace_all ( & result , "<!--$c-->" ) . to_string ( ) ;
result = noscript_re . replace_all ( & result , "<!--$c-->" ) . to_string ( ) ;
}
}
@ -503,44 +504,39 @@ pub fn retrieve_and_embed_asset(
depth + 1 ,
depth + 1 ,
) {
) {
Ok ( ( data , final_url , mut media_type ) ) = > {
Ok ( ( data , final_url , mut media_type ) ) = > {
// Check integrity if it's a LINK or SCRIPT tag
let node_name : & str = get_node_name ( & node ) . unwrap ( ) ;
let node_name : & str = get_node_name ( & node ) . unwrap ( ) ;
let mut ok_to_include : bool = true ;
// Check integrity if it's a LINK or SCRIPT element
let mut ok_to_include : bool = true ;
if node_name = = "link" | | node_name = = "script" {
if node_name = = "link" | | node_name = = "script" {
let node_integrity_attr_value : Option < String > = get_node_attr ( node , "integrity" ) ;
// Check integrity
// Check integrity
if let Some ( node_integrity_attr_value ) = node_integrity_attr_value {
if let Some ( node_integrity_attr_value ) = get_node_attr ( node , "integrity" ) {
if ! node_integrity_attr_value . is_empty ( ) {
if ! node_integrity_attr_value . is_empty ( ) {
ok_to_include = check_integrity ( & data , & node_integrity_attr_value ) ;
ok_to_include = check_integrity ( & data , & node_integrity_attr_value ) ;
}
}
}
// Wipe integrity attribute
// Wipe the integrity attribute
set_node_attr ( node , "integrity" , None ) ;
set_node_attr ( node , "integrity" , None ) ;
}
}
}
if ok_to_include {
if ok_to_include {
if node_name = = "link" {
if node_name = = "link" & & determine_link_node_type ( node ) = = "stylesheet" {
let link_type : & str = determine_link_node_type ( node ) ;
// Stylesheet LINK elements require special treatment
// CSS LINK nodes requires special treatment
let css : String = embed_css (
if link_type = = "stylesheet" {
cache ,
let css : String = embed_css (
client ,
cache ,
& final_url ,
client ,
& String ::from_utf8_lossy ( & data ) ,
& final_url ,
options ,
& String ::from_utf8_lossy ( & data ) ,
depth + 1 ,
options ,
) ;
depth + 1 ,
) ;
let css_data_url = create_data_url ( "text/css" , css . as_bytes ( ) , & final_url ) ;
set_node_attr ( & node , attr_name , Some ( css_data_url . to_string ( ) ) ) ;
return ; // Do not fall through
// Create and embed data URL
}
let css_data_url = create_data_url ( "text/css" , css . as_bytes ( ) , & final_url ) ;
set_node_attr ( & node , attr_name , Some ( css_data_url . to_string ( ) ) ) ;
} else if node_name = = "frame" | | node_name = = "iframe" {
} else if node_name = = "frame" | | node_name = = "iframe" {
// (I)FRAMEs are also quite different from conventional resources
let frame_dom = html_to_dom ( & String ::from_utf8_lossy ( & data ) ) ;
let frame_dom = html_to_dom ( & String ::from_utf8_lossy ( & data ) ) ;
walk_and_embed_assets (
walk_and_embed_assets (
cache ,
cache ,
@ -559,30 +555,38 @@ pub fn retrieve_and_embed_asset(
)
)
. unwrap ( ) ;
. unwrap ( ) ;
// Create and embed data URL
let mut frame_data_url = create_data_url ( & media_type , & frame_data , & final_url ) ;
let mut frame_data_url = create_data_url ( & media_type , & frame_data , & final_url ) ;
frame_data_url . set_fragment ( resolved_url . fragment ( ) ) ;
frame_data_url . set_fragment ( resolved_url . fragment ( ) ) ;
set_node_attr ( node , attr_name , Some ( frame_data_url . to_string ( ) ) ) ;
set_node_attr ( node , attr_name , Some ( frame_data_url . to_string ( ) ) ) ;
} else {
// Every other type of element gets processed here
// Parse media type for SCRIPT elements
if node_name = = "script" {
if let Some ( _ ) = get_node_attr ( node , "src" ) {
if let Some ( script_node_type_attr_value ) = get_node_attr ( node , "type" ) {
media_type = script_node_type_attr_value . to_string ( ) ;
} else {
// Fallback to default one if it's not specified
media_type = "application/javascript" . to_string ( ) ;
}
}
}
return ; // Do not fall through
// Create and embed data URL
}
let mut data_url = create_data_url ( & media_type , & data , & final_url ) ;
data_url . set_fragment ( resolved_url . fragment ( ) ) ;
// Everything else
set_node_attr ( node , attr_name , Some ( data_url . to_string ( ) ) ) ;
if node_name = = "script" {
media_type = "application/javascript" . to_string ( ) ;
}
}
let mut data_url = create_data_url ( & media_type , & data , & final_url ) ;
data_url . set_fragment ( resolved_url . fragment ( ) ) ;
set_node_attr ( node , attr_name , Some ( data_url . to_string ( ) ) ) ;
}
}
}
}
Err ( _ ) = > {
Err ( _ ) = > {
if resolved_url . scheme ( ) = = "http" | | resolved_url . scheme ( ) = = "https" {
if resolved_url . scheme ( ) = = "http" | | resolved_url . scheme ( ) = = "https" {
// Keep remote reference if unable to retrieve the asset
// Keep remote reference s if unable to retrieve the asset
set_node_attr ( node , attr_name , Some ( resolved_url . to_string ( ) ) ) ;
set_node_attr ( node , attr_name , Some ( resolved_url . to_string ( ) ) ) ;
} else {
} else {
// Exclude non-remote URLs
// Remove local references if they can't be successfully embedded as data URLs
set_node_attr ( node , attr_name , None ) ;
set_node_attr ( node , attr_name , None ) ;
}
}
}
}
@ -645,7 +649,7 @@ pub fn walk_and_embed_assets(
let link_type : & str = determine_link_node_type ( node ) ;
let link_type : & str = determine_link_node_type ( node ) ;
if link_type = = "icon" {
if link_type = = "icon" {
// Find and resolve this LINK node 's href attribute
// Find and resolve LINK's href attribute
if let Some ( link_attr_href_value ) = get_node_attr ( node , "href" ) {
if let Some ( link_attr_href_value ) = get_node_attr ( node , "href" ) {
if ! options . no_images & & ! link_attr_href_value . is_empty ( ) {
if ! options . no_images & & ! link_attr_href_value . is_empty ( ) {
retrieve_and_embed_asset (
retrieve_and_embed_asset (
@ -663,10 +667,12 @@ pub fn walk_and_embed_assets(
}
}
}
}
} else if link_type = = "stylesheet" {
} else if link_type = = "stylesheet" {
// Find and resolve this LINK node 's href attribute
// Resolve LINK 's href attribute
if let Some ( link_attr_href_value ) = get_node_attr ( node , "href" ) {
if let Some ( link_attr_href_value ) = get_node_attr ( node , "href" ) {
if options . no_css {
if options . no_css {
set_node_attr ( node , "href" , None ) ;
set_node_attr ( node , "href" , None ) ;
// Wipe integrity attribute
set_node_attr ( node , "integrity" , None ) ;
} else {
} else {
if ! link_attr_href_value . is_empty ( ) {
if ! link_attr_href_value . is_empty ( ) {
retrieve_and_embed_asset (
retrieve_and_embed_asset (
@ -916,14 +922,15 @@ pub fn walk_and_embed_assets(
// Replace with empty JS call to preserve original behavior
// Replace with empty JS call to preserve original behavior
set_node_attr ( node , "href" , Some ( str ! ( "javascript:;" ) ) ) ;
set_node_attr ( node , "href" , Some ( str ! ( "javascript:;" ) ) ) ;
}
}
} else if anchor_attr_href_value . clone ( ) . starts_with ( '#' )
| | is_url_and_has_protocol ( & anchor_attr_href_value . clone ( ) )
{
// Don't touch mailto: links or hrefs which begin with a hash sign
} else {
} else {
let href_full_url : Url =
// Don't touch mailto: links or hrefs which begin with a hash sign
resolve_url ( document_url , & anchor_attr_href_value ) ;
if ! anchor_attr_href_value . clone ( ) . starts_with ( '#' )
set_node_attr ( node , "href" , Some ( href_full_url . to_string ( ) ) ) ;
& & ! is_url_and_has_protocol ( & anchor_attr_href_value . clone ( ) )
{
let href_full_url : Url =
resolve_url ( document_url , & anchor_attr_href_value ) ;
set_node_attr ( node , "href" , Some ( href_full_url . to_string ( ) ) ) ;
}
}
}
}
}
}
}
@ -937,6 +944,8 @@ pub fn walk_and_embed_assets(
// Remove src attribute
// Remove src attribute
if script_attr_src ! = None {
if script_attr_src ! = None {
set_node_attr ( node , "src" , None ) ;
set_node_attr ( node , "src" , None ) ;
// Wipe integrity attribute
set_node_attr ( node , "integrity" , None ) ;
}
}
} else if ! script_attr_src . clone ( ) . unwrap_or_default ( ) . is_empty ( ) {
} else if ! script_attr_src . clone ( ) . unwrap_or_default ( ) . is_empty ( ) {
retrieve_and_embed_asset (
retrieve_and_embed_asset (
@ -1081,7 +1090,7 @@ pub fn walk_and_embed_assets(
) ;
) ;
// Get rid of original contents
// Get rid of original contents
noscript_contents . clear ( ) ;
noscript_contents . clear ( ) ;
// Insert HTML containing embedded assets back into NOSCRIPT node
// Insert HTML containing embedded assets into NOSCRIPT node
if let Some ( html ) =
if let Some ( html ) =
get_child_node_by_name ( & noscript_contents_dom . document , "html" )
get_child_node_by_name ( & noscript_contents_dom . document , "html" )
{
{