You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
272 lines
5.8 KiB
Go
272 lines
5.8 KiB
Go
package core
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/grufwub/go-errors"
|
|
)
|
|
|
|
// HasAsciiControlBytes returns whether a byte slice contains ASCII control bytes
|
|
func HasAsciiControlBytes(raw string) bool {
|
|
for i := 0; i < len(raw); i++ {
|
|
if raw[i] < ' ' || raw[i] == 0xf {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// ParseScheme attempts to parse a scheme from a raw url
|
|
func ParseScheme(raw string) (string, string, error) {
|
|
// If first char is:
|
|
// - valid ascii (but non-scheme), return here no errors
|
|
// - end of scheme char, return bad request error
|
|
c := raw[0]
|
|
switch {
|
|
case ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'):
|
|
// All good, continue
|
|
case c == ':':
|
|
return "", "", ErrParsingScheme.Extend(raw)
|
|
default:
|
|
// Invalid scheme char (or scheme first-char) return
|
|
return "", raw, nil
|
|
}
|
|
|
|
// Iterate
|
|
for i := 1; i < len(raw); i++ {
|
|
c := raw[i]
|
|
switch {
|
|
case ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
|
|
('0' <= c && c <= '9') || c == '+' || c == '-' || c == '.':
|
|
// Is valid ASCII, do nothing
|
|
case c == ':':
|
|
// Return the scheme (lowercase)
|
|
return strings.ToLower(raw[:i]), raw[i+1:], nil
|
|
default:
|
|
// Invalid char, return
|
|
return "", raw, nil
|
|
}
|
|
}
|
|
|
|
return "", raw, nil
|
|
}
|
|
|
|
func isHex(b byte) bool {
|
|
return ('a' <= b && b <= 'f') ||
|
|
('A' <= b && b <= 'F') ||
|
|
('0' <= b && b <= '9')
|
|
}
|
|
|
|
func unHex(b byte) byte {
|
|
switch {
|
|
case '0' <= b && b <= '9':
|
|
return b - '0'
|
|
case 'a' <= b && b <= 'f':
|
|
return b - 'a' + 10
|
|
case 'A' <= b && b <= 'F':
|
|
return b - 'A' + 10
|
|
default:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func shouldEscape(b byte) bool {
|
|
// All alphanumeric are unreserved
|
|
if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || '0' <= b && b <= '9' {
|
|
return false
|
|
}
|
|
|
|
switch b {
|
|
// Further unreserved
|
|
case '-', '_', '.', '~':
|
|
return false
|
|
|
|
// All else should be escaped
|
|
default:
|
|
return true
|
|
}
|
|
}
|
|
|
|
func shouldHostEscape(b byte) bool {
|
|
switch b {
|
|
// Allowed host sub-delims +
|
|
// ':' for port +
|
|
// '[]' for ipv6 +
|
|
// '<>' only others we can allow (can't be % encoded)
|
|
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
|
|
return false
|
|
|
|
// Check all-else
|
|
default:
|
|
return shouldEscape(b)
|
|
}
|
|
}
|
|
|
|
func shouldPathEscape(b byte) bool {
|
|
switch b {
|
|
// Reserved character in path.
|
|
// Bear in mind ;, ARE allowed in a URL path,
|
|
// but when converting from a filesystem-->URL path
|
|
// (how this will be used), it will need escaping.
|
|
case '?', ';', ',':
|
|
return true
|
|
|
|
// Allowed in path
|
|
case '$', '&', '+', '/', ':', '=', '@':
|
|
return false
|
|
|
|
// Check all-else
|
|
default:
|
|
return shouldEscape(b)
|
|
}
|
|
}
|
|
|
|
func unescape(raw string, count int) string {
|
|
var t strings.Builder
|
|
t.Grow(len(raw) - 2*count)
|
|
for i := 0; i < len(raw); i++ {
|
|
switch raw[i] {
|
|
// Replace % encoded char
|
|
case '%':
|
|
t.WriteByte(unHex(raw[i+1])<<4 | unHex(raw[i+2]))
|
|
i += 2
|
|
|
|
// Write as-is
|
|
default:
|
|
t.WriteByte(raw[i])
|
|
}
|
|
}
|
|
return t.String()
|
|
}
|
|
|
|
func unescapeHost(raw string) (string, error) {
|
|
// Count all the percent signs
|
|
count := 0
|
|
for i := 0; i < len(raw); {
|
|
switch raw[i] {
|
|
case '%':
|
|
// Increase count
|
|
count++
|
|
|
|
// If not a valid % encoded hex value, return with error
|
|
if i+2 >= len(raw) || !isHex(raw[i+1]) || !isHex(raw[i+2]) {
|
|
return "", ErrUnescapingHost.Extend(raw)
|
|
}
|
|
|
|
// In the host component % encoding can only be used
|
|
// for non-ASCII bytes. And rfc6874 introduces %25 for
|
|
// escaped percent sign in IPv6 literals
|
|
if unHex(raw[i+1]) < 8 && raw[i:i+3] != "%25" {
|
|
return "", ErrUnescapingHost.Extend(raw)
|
|
}
|
|
|
|
// Skip iteration past the
|
|
// hex we just confirmed
|
|
i += 3
|
|
default:
|
|
// If within ASCII range, and shoud be escaped, return error
|
|
if raw[i] < 0x80 && shouldHostEscape(raw[i]) {
|
|
return "", ErrUnescapingHost.Extend(raw)
|
|
}
|
|
|
|
// Iter
|
|
i++
|
|
}
|
|
}
|
|
|
|
// No encoding? return as-is. Else, escape
|
|
if count == 0 {
|
|
return raw, nil
|
|
}
|
|
return unescape(raw, count), nil
|
|
}
|
|
|
|
func unescapePath(raw string) (string, error) {
|
|
// Count all the percent signs
|
|
count := 0
|
|
length := len(raw)
|
|
for i := 0; i < length; {
|
|
switch raw[i] {
|
|
case '%':
|
|
// Increase count
|
|
count++
|
|
|
|
// If not a valid % encoded hex value, return with error
|
|
if i+2 >= length || !isHex(raw[i+1]) || !isHex(raw[i+2]) {
|
|
return "", ErrUnescapingPath.Extend(raw)
|
|
}
|
|
|
|
// Skip iteration past the
|
|
// hex we just confirmed
|
|
i += 3
|
|
default:
|
|
i++
|
|
}
|
|
}
|
|
|
|
// No encoding? return as-is. Else, escape
|
|
if count == 0 {
|
|
return raw, nil
|
|
}
|
|
return unescape(raw, count), nil
|
|
}
|
|
|
|
// EscapePath escapes a URL path
|
|
func EscapePath(path string) string {
|
|
const upperhex = "0123456789ABCDEF"
|
|
|
|
count := 0
|
|
for i := 0; i < len(path); i++ {
|
|
if shouldPathEscape(path[i]) {
|
|
count++
|
|
}
|
|
}
|
|
|
|
if count == 0 {
|
|
return path
|
|
}
|
|
|
|
sb := strings.Builder{}
|
|
sb.Grow(len(path) + 2*count)
|
|
for i := 0; i < len(path); i++ {
|
|
c := path[i]
|
|
if shouldPathEscape(c) {
|
|
sb.WriteByte('%')
|
|
sb.WriteByte(upperhex[c>>4])
|
|
sb.WriteByte(upperhex[c&15])
|
|
} else {
|
|
sb.WriteByte(c)
|
|
}
|
|
}
|
|
|
|
return sb.String()
|
|
}
|
|
|
|
// ParseEncodedHost parses encoded host info, safely returning unescape host and port
|
|
func ParseEncodedHost(raw string) (string, string, error) {
|
|
// Unescape the host info
|
|
raw, err := unescapeHost(raw)
|
|
if err != nil {
|
|
return "", "", err.(errors.Error).WrapWithin(ErrParsingHost)
|
|
}
|
|
|
|
// Split by last ':' and return
|
|
host, port := SplitByLast(raw, ":")
|
|
return host, port, nil
|
|
}
|
|
|
|
// ParseEncodedURI parses encoded URI, safely returning unescaped path and still-escaped query
|
|
func ParseEncodedURI(received string) (string, string, error) {
|
|
// Split into path and query
|
|
rawPath, query := SplitBy(received, "?")
|
|
|
|
// Unescape path, query is up-to CGI scripts
|
|
rawPath, err := unescapePath(rawPath)
|
|
if err != nil {
|
|
return "", "", err.(errors.Error).WrapWithin(ErrParsingURI)
|
|
}
|
|
|
|
// Return the raw path and query
|
|
return rawPath, query, nil
|
|
}
|