mirror of https://github.com/Y2Z/monolith
add support for wider range of charsets
parent
22a031af5d
commit
b8aa545e8c
@ -0,0 +1,9 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html;charset=GB2312"/>
|
||||
<title>近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 </title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>近七成人减少线下需求 银行数字化转型提速</h1>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,72 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn meta_content_type() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some(str!("GB2312")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn meta_charset() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset=\"GB2312\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some(str!("GB2312")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_conflicting_meta_charset_first() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset=\"utf-8\" />
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some(str!("utf-8")));
|
||||
}
|
||||
#[test]
|
||||
fn multiple_conflicting_meta_content_type_first() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||
<meta charset=\"utf-8\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), str!());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some(str!("GB2312")));
|
||||
}
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
mod detect_media_type;
|
||||
mod indent;
|
||||
mod parse_content_type;
|
||||
mod retrieve_asset;
|
||||
|
@ -0,0 +1,86 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn text_plain_utf8() {
|
||||
let (media_type, charset, is_base64) = utils::parse_content_type("text/plain;charset=utf8");
|
||||
assert_eq!(media_type, "text/plain");
|
||||
assert_eq!(charset, "utf8");
|
||||
assert!(!is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_plain_utf8_spaces() {
|
||||
let (media_type, charset, is_base64) =
|
||||
utils::parse_content_type(" text/plain ; charset=utf8 ");
|
||||
assert_eq!(media_type, "text/plain");
|
||||
assert_eq!(charset, "utf8");
|
||||
assert!(!is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let (media_type, charset, is_base64) = utils::parse_content_type("");
|
||||
assert_eq!(media_type, "text/plain");
|
||||
assert_eq!(charset, "US-ASCII");
|
||||
assert!(!is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base64() {
|
||||
let (media_type, charset, is_base64) = utils::parse_content_type(";base64");
|
||||
assert_eq!(media_type, "text/plain");
|
||||
assert_eq!(charset, "US-ASCII");
|
||||
assert!(is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_html_base64() {
|
||||
let (media_type, charset, is_base64) = utils::parse_content_type("text/html;base64");
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(charset, "US-ASCII");
|
||||
assert!(is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_media_type() {
|
||||
let (media_type, charset, is_base64) = utils::parse_content_type("text/html");
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(charset, "US-ASCII");
|
||||
assert!(!is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_media_type_colon() {
|
||||
let (media_type, charset, is_base64) = utils::parse_content_type("text/html;");
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(charset, "US-ASCII");
|
||||
assert!(!is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn media_type_gb2312_filename() {
|
||||
let (media_type, charset, is_base64) =
|
||||
utils::parse_content_type("text/html;charset=GB2312;filename=index.html");
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(charset, "GB2312");
|
||||
assert!(!is_base64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn media_type_filename_gb2312() {
|
||||
let (media_type, charset, is_base64) =
|
||||
utils::parse_content_type("text/html;filename=index.html;charset=GB2312");
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(charset, "GB2312");
|
||||
assert!(!is_base64);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue