You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

272 lines
5.8 KiB
Go

package core
import (
"strings"
"github.com/grufwub/go-errors"
)
// HasAsciiControlBytes returns whether a byte slice contains ASCII control bytes
func HasAsciiControlBytes(raw string) bool {
for i := 0; i < len(raw); i++ {
if raw[i] < ' ' || raw[i] == 0xf {
return true
}
}
return false
}
// ParseScheme attempts to parse a scheme from a raw url
func ParseScheme(raw string) (string, string, error) {
// If first char is:
// - valid ascii (but non-scheme), return here no errors
// - end of scheme char, return bad request error
c := raw[0]
switch {
case ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'):
// All good, continue
case c == ':':
return "", "", ErrParsingScheme.Extend(raw)
default:
// Invalid scheme char (or scheme first-char) return
return "", raw, nil
}
// Iterate
for i := 1; i < len(raw); i++ {
c := raw[i]
switch {
case ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') || c == '+' || c == '-' || c == '.':
// Is valid ASCII, do nothing
case c == ':':
// Return the scheme (lowercase)
return strings.ToLower(raw[:i]), raw[i+1:], nil
default:
// Invalid char, return
return "", raw, nil
}
}
return "", raw, nil
}
func isHex(b byte) bool {
return ('a' <= b && b <= 'f') ||
('A' <= b && b <= 'F') ||
('0' <= b && b <= '9')
}
func unHex(b byte) byte {
switch {
case '0' <= b && b <= '9':
return b - '0'
case 'a' <= b && b <= 'f':
return b - 'a' + 10
case 'A' <= b && b <= 'F':
return b - 'A' + 10
default:
return 0
}
}
func shouldEscape(b byte) bool {
// All alphanumeric are unreserved
if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || '0' <= b && b <= '9' {
return false
}
switch b {
// Further unreserved
case '-', '_', '.', '~':
return false
// All else should be escaped
default:
return true
}
}
func shouldHostEscape(b byte) bool {
switch b {
// Allowed host sub-delims +
// ':' for port +
// '[]' for ipv6 +
// '<>' only others we can allow (can't be % encoded)
case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
return false
// Check all-else
default:
return shouldEscape(b)
}
}
func shouldPathEscape(b byte) bool {
switch b {
// Reserved character in path.
// Bear in mind ;, ARE allowed in a URL path,
// but when converting from a filesystem-->URL path
// (how this will be used), it will need escaping.
case '?', ';', ',':
return true
// Allowed in path
case '$', '&', '+', '/', ':', '=', '@':
return false
// Check all-else
default:
return shouldEscape(b)
}
}
func unescape(raw string, count int) string {
var t strings.Builder
t.Grow(len(raw) - 2*count)
for i := 0; i < len(raw); i++ {
switch raw[i] {
// Replace % encoded char
case '%':
t.WriteByte(unHex(raw[i+1])<<4 | unHex(raw[i+2]))
i += 2
// Write as-is
default:
t.WriteByte(raw[i])
}
}
return t.String()
}
func unescapeHost(raw string) (string, error) {
// Count all the percent signs
count := 0
for i := 0; i < len(raw); {
switch raw[i] {
case '%':
// Increase count
count++
// If not a valid % encoded hex value, return with error
if i+2 >= len(raw) || !isHex(raw[i+1]) || !isHex(raw[i+2]) {
return "", ErrUnescapingHost.Extend(raw)
}
// In the host component % encoding can only be used
// for non-ASCII bytes. And rfc6874 introduces %25 for
// escaped percent sign in IPv6 literals
if unHex(raw[i+1]) < 8 && raw[i:i+3] != "%25" {
return "", ErrUnescapingHost.Extend(raw)
}
// Skip iteration past the
// hex we just confirmed
i += 3
default:
// If within ASCII range, and shoud be escaped, return error
if raw[i] < 0x80 && shouldHostEscape(raw[i]) {
return "", ErrUnescapingHost.Extend(raw)
}
// Iter
i++
}
}
// No encoding? return as-is. Else, escape
if count == 0 {
return raw, nil
}
return unescape(raw, count), nil
}
func unescapePath(raw string) (string, error) {
// Count all the percent signs
count := 0
length := len(raw)
for i := 0; i < length; {
switch raw[i] {
case '%':
// Increase count
count++
// If not a valid % encoded hex value, return with error
if i+2 >= length || !isHex(raw[i+1]) || !isHex(raw[i+2]) {
return "", ErrUnescapingPath.Extend(raw)
}
// Skip iteration past the
// hex we just confirmed
i += 3
default:
i++
}
}
// No encoding? return as-is. Else, escape
if count == 0 {
return raw, nil
}
return unescape(raw, count), nil
}
// EscapePath escapes a URL path
func EscapePath(path string) string {
const upperhex = "0123456789ABCDEF"
count := 0
for i := 0; i < len(path); i++ {
if shouldPathEscape(path[i]) {
count++
}
}
if count == 0 {
return path
}
sb := strings.Builder{}
sb.Grow(len(path) + 2*count)
for i := 0; i < len(path); i++ {
c := path[i]
if shouldPathEscape(c) {
sb.WriteByte('%')
sb.WriteByte(upperhex[c>>4])
sb.WriteByte(upperhex[c&15])
} else {
sb.WriteByte(c)
}
}
return sb.String()
}
// ParseEncodedHost parses encoded host info, safely returning unescape host and port
func ParseEncodedHost(raw string) (string, string, error) {
// Unescape the host info
raw, err := unescapeHost(raw)
if err != nil {
return "", "", err.(errors.Error).WrapWithin(ErrParsingHost)
}
// Split by last ':' and return
host, port := SplitByLast(raw, ":")
return host, port, nil
}
// ParseEncodedURI parses encoded URI, safely returning unescaped path and still-escaped query
func ParseEncodedURI(received string) (string, string, error) {
// Split into path and query
rawPath, query := SplitBy(received, "?")
// Unescape path, query is up-to CGI scripts
rawPath, err := unescapePath(rawPath)
if err != nil {
return "", "", err.(errors.Error).WrapWithin(ErrParsingURI)
}
// Return the raw path and query
return rawPath, query, nil
}