You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
cointop/vendor/go4.org/bytereplacer/bytereplacer_test.go

424 lines
11 KiB
Go

/*
Copyright 2015 The Perkeep Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package bytereplacer
import (
"bytes"
"strings"
"testing"
)
var htmlEscaper = New(
"&", "&",
"<", "&lt;",
">", "&gt;",
`"`, "&quot;",
"'", "&apos;",
)
var htmlUnescaper = New(
"&amp;", "&",
"&lt;", "<",
"&gt;", ">",
"&quot;", `"`,
"&apos;", "'",
)
var capitalLetters = New("a", "A", "b", "B")
func TestReplacer(t *testing.T) {
type testCase struct {
r *Replacer
in, out string
}
var testCases []testCase
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
str := func(b byte) string {
return string([]byte{b})
}
var s []string
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
s = nil
for i := 0; i < 256; i++ {
s = append(s, str(byte(i)), str(byte(i+1)))
}
inc := New(s...)
// Test cases with 1-byte old strings, 1-byte new strings.
testCases = append(testCases,
testCase{capitalLetters, "brad", "BrAd"},
testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)},
testCase{capitalLetters, "", ""},
testCase{inc, "brad", "csbe"},
testCase{inc, "\x00\xff", "\x01\x00"},
testCase{inc, "", ""},
testCase{New("a", "1", "a", "2"), "brad", "br1d"},
)
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
s = nil
for i := 0; i < 256; i++ {
n := i + 1 - 'a'
if n < 1 {
n = 1
}
s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n))
}
repeat := New(s...)
// Test cases with 1-byte old strings, variable length new strings.
testCases = append(testCases,
testCase{htmlEscaper, "No changes", "No changes"},
testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
testCase{htmlEscaper, "", ""},
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
testCase{repeat, "abba", "abbbba"},
testCase{repeat, "", ""},
testCase{New("a", "11", "a", "22"), "brad", "br11d"},
)
// The remaining test cases have variable length old strings.
testCases = append(testCases,
testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
testCase{htmlUnescaper, "", ""},
testCase{New("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
testCase{New("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
testCase{New("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
)
// gen1 has multiple old strings of variable length. There is no
// overall non-empty common prefix, but some pairwise common prefixes.
gen1 := New(
"aaa", "3[aaa]",
"aa", "2[aa]",
"a", "1[a]",
"i", "i",
"longerst", "most long",
"longer", "medium",
"long", "short",
"xx", "xx",
"x", "X",
"X", "Y",
"Y", "Z",
)
testCases = append(testCases,
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
testCase{gen1, "xxxxx", "xxxxX"},
testCase{gen1, "XiX", "YiY"},
testCase{gen1, "", ""},
)
// gen2 has multiple old strings with no pairwise common prefix.
gen2 := New(
"roses", "red",
"violets", "blue",
"sugar", "sweet",
)
testCases = append(testCases,
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
testCase{gen2, "", ""},
)
// gen3 has multiple old strings with an overall common prefix.
gen3 := New(
"abracadabra", "poof",
"abracadabrakazam", "splat",
"abraham", "lincoln",
"abrasion", "scrape",
"abraham", "isaac",
)
testCases = append(testCases,
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
testCase{gen3, "abrasion abracad", "scrape abracad"},
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
testCase{gen3, "", ""},
)
// foo{1,2,3,4} have multiple old strings with an overall common prefix
// and 1- or 2- byte extensions from the common prefix.
foo1 := New(
"foo1", "A",
"foo2", "B",
"foo3", "C",
)
foo2 := New(
"foo1", "A",
"foo2", "B",
"foo31", "C",
"foo32", "D",
)
foo3 := New(
"foo11", "A",
"foo12", "B",
"foo31", "C",
"foo32", "D",
)
foo4 := New(
"foo12", "B",
"foo32", "D",
)
testCases = append(testCases,
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
testCase{foo1, "", ""},
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
testCase{foo2, "", ""},
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo3, "", ""},
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo4, "", ""},
)
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
allBytes := make([]byte, 256)
for i := range allBytes {
allBytes[i] = byte(i)
}
allString := string(allBytes)
genAll := New(
allString, "[all]",
"\xff", "[ff]",
"\x00", "[00]",
)
testCases = append(testCases,
testCase{genAll, allString, "[all]"},
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
testCase{genAll, "", ""},
)
// Test cases with empty old strings.
blankToX1 := New("", "X")
blankToX2 := New("", "X", "", "")
blankHighPriority := New("", "X", "o", "O")
blankLowPriority := New("o", "O", "", "X")
blankNoOp1 := New("", "")
blankNoOp2 := New("", "", "", "A")
blankFoo := New("", "X", "foobar", "R", "foobaz", "Z")
testCases = append(testCases,
testCase{blankToX1, "foo", "XfXoXoX"},
testCase{blankToX1, "", "X"},
testCase{blankToX2, "foo", "XfXoXoX"},
testCase{blankToX2, "", "X"},
testCase{blankHighPriority, "oo", "XOXOX"},
testCase{blankHighPriority, "ii", "XiXiX"},
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
testCase{blankHighPriority, "", "X"},
testCase{blankLowPriority, "oo", "OOX"},
testCase{blankLowPriority, "ii", "XiXiX"},
testCase{blankLowPriority, "oiio", "OXiXiOX"},
testCase{blankLowPriority, "iooi", "XiOOXiX"},
testCase{blankLowPriority, "", "X"},
testCase{blankNoOp1, "foo", "foo"},
testCase{blankNoOp1, "", ""},
testCase{blankNoOp2, "foo", "foo"},
testCase{blankNoOp2, "", ""},
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
testCase{blankFoo, "", "X"},
)
// single string replacer
abcMatcher := New("abc", "[match]")
testCases = append(testCases,
testCase{abcMatcher, "", ""},
testCase{abcMatcher, "ab", "ab"},
testCase{abcMatcher, "abc", "[match]"},
testCase{abcMatcher, "abcd", "[match]d"},
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
)
// Issue 6659 cases (more single string replacer)
noHello := New("Hello", "")
testCases = append(testCases,
testCase{noHello, "Hello", ""},
testCase{noHello, "Hellox", "x"},
testCase{noHello, "xHello", "x"},
testCase{noHello, "xHellox", "xx"},
)
// No-arg test cases.
nop := New()
testCases = append(testCases,
testCase{nop, "abc", "abc"},
testCase{nop, "", ""},
)
// Run the test cases.
for i, tc := range testCases {
{
// Replace with len(in) == cap(in)
in := make([]byte, len(tc.in))
copy(in, tc.in)
if s := string(tc.r.Replace(in)); s != tc.out {
t.Errorf("%d. Replace(%q /* len == cap */) = %q, want %q", i, tc.in, s, tc.out)
}
}
{
// Replace with len(in) < cap(in)
in := make([]byte, len(tc.in), len(tc.in)*2)
copy(in, tc.in)
if s := string(tc.r.Replace(in)); s != tc.out {
t.Errorf("%d. Replace(%q /* len < cap */) = %q, want %q", i, tc.in, s, tc.out)
}
}
}
}
func BenchmarkGenericNoMatch(b *testing.B) {
str := []byte(strings.Repeat("A", 100) + strings.Repeat("B", 100))
generic := New("a", "A", "b", "B", "12", "123") // varying lengths forces generic
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch1(b *testing.B) {
str := []byte(strings.Repeat("a", 100) + strings.Repeat("b", 100))
generic := New("a", "A", "b", "B", "12", "123")
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch2(b *testing.B) {
str := bytes.Repeat([]byte("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!"), 100)
for i := 0; i < b.N; i++ {
htmlUnescaper.Replace(str)
}
}
func benchmarkSingleString(b *testing.B, pattern, text string) {
r := New(pattern, "[match]")
buf := make([]byte, len(text), len(text)*7)
b.SetBytes(int64(len(text)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
copy(buf, text)
r.Replace(buf)
}
}
func BenchmarkSingleMaxSkipping(b *testing.B) {
benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000))
}
func BenchmarkSingleLongSuffixFail(b *testing.B) {
benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002))
}
func BenchmarkSingleMatch(b *testing.B) {
benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000))
}
func benchmarkReplacer(b *testing.B, r *Replacer, str string) {
buf := make([]byte, len(str))
b.ResetTimer()
for i := 0; i < b.N; i++ {
copy(buf, str)
r.Replace(buf)
}
}
func BenchmarkByteByteNoMatch(b *testing.B) {
benchmarkReplacer(b, capitalLetters, strings.Repeat("A", 100)+strings.Repeat("B", 100))
}
func BenchmarkByteByteMatch(b *testing.B) {
benchmarkReplacer(b, capitalLetters, strings.Repeat("a", 100)+strings.Repeat("b", 100))
}
func BenchmarkByteStringMatch(b *testing.B) {
benchmarkReplacer(b, htmlEscaper, "<"+strings.Repeat("a", 99)+strings.Repeat("b", 99)+">")
}
func BenchmarkHTMLEscapeNew(b *testing.B) {
benchmarkReplacer(b, htmlEscaper, "I <3 to escape HTML & other text too.")
}
func BenchmarkHTMLEscapeOld(b *testing.B) {
str := "I <3 to escape HTML & other text too."
buf := make([]byte, len(str))
for i := 0; i < b.N; i++ {
copy(buf, str)
oldHTMLEscape(buf)
}
}
// The http package's old HTML escaping function in bytes form.
func oldHTMLEscape(s []byte) []byte {
s = bytes.Replace(s, []byte("&"), []byte("&amp;"), -1)
s = bytes.Replace(s, []byte("<"), []byte("&lt;"), -1)
s = bytes.Replace(s, []byte(">"), []byte("&gt;"), -1)
s = bytes.Replace(s, []byte(`"`), []byte("&quot;"), -1)
s = bytes.Replace(s, []byte("'"), []byte("&apos;"), -1)
return s
}
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
func BenchmarkByteByteReplaces(b *testing.B) {
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
for i := 0; i < b.N; i++ {
bytes.Replace(bytes.Replace([]byte(str), []byte{'a'}, []byte{'A'}, -1), []byte{'b'}, []byte{'B'}, -1)
}
}
// BenchmarkByteByteMap compares byteByteImpl against Map.
func BenchmarkByteByteMap(b *testing.B) {
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
fn := func(r rune) rune {
switch r {
case 'a':
return 'A'
case 'b':
return 'B'
}
return r
}
for i := 0; i < b.N; i++ {
bytes.Map(fn, []byte(str))
}
}