Switch to using the dep tool and update all the dependencies
This commit is contained in:
parent
5135ff73cb
commit
98c2d2c41b
5321 changed files with 4483201 additions and 5922 deletions
18
vendor/golang.org/x/text/unicode/norm/composition.go
generated
vendored
18
vendor/golang.org/x/text/unicode/norm/composition.go
generated
vendored
|
@ -33,17 +33,9 @@ const (
|
|||
// streamSafe implements the policy of when a CGJ should be inserted.
|
||||
type streamSafe uint8
|
||||
|
||||
// mkStreamSafe is a shorthand for declaring a streamSafe var and calling
|
||||
// first on it.
|
||||
func mkStreamSafe(p Properties) streamSafe {
|
||||
return streamSafe(p.nTrailingNonStarters())
|
||||
}
|
||||
|
||||
// first inserts the first rune of a segment.
|
||||
// first inserts the first rune of a segment. It is a faster version of next if
|
||||
// it is known p represents the first rune in a segment.
|
||||
func (ss *streamSafe) first(p Properties) {
|
||||
if *ss != 0 {
|
||||
panic("!= 0")
|
||||
}
|
||||
*ss = streamSafe(p.nTrailingNonStarters())
|
||||
}
|
||||
|
||||
|
@ -66,7 +58,7 @@ func (ss *streamSafe) next(p Properties) ssState {
|
|||
// be a non-starter. Note that it always hold that if nLead > 0 then
|
||||
// nLead == nTrail.
|
||||
if n == 0 {
|
||||
*ss = 0
|
||||
*ss = streamSafe(p.nTrailingNonStarters())
|
||||
return ssStarter
|
||||
}
|
||||
return ssSuccess
|
||||
|
@ -142,7 +134,6 @@ func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
|
|||
func (rb *reorderBuffer) reset() {
|
||||
rb.nrune = 0
|
||||
rb.nbyte = 0
|
||||
rb.ss = 0
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) doFlush() bool {
|
||||
|
@ -257,6 +248,9 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
|
|||
// It flushes the buffer on each new segment start.
|
||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
|
||||
rb.tmpBytes.setBytes(dcomp)
|
||||
// As the streamSafe accounting already handles the counting for modifiers,
|
||||
// we don't have to call next. However, we do need to keep the accounting
|
||||
// intact when flushing the buffer.
|
||||
for i := 0; i < len(dcomp); {
|
||||
info := rb.f.info(rb.tmpBytes, i)
|
||||
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
|
||||
|
|
130
vendor/golang.org/x/text/unicode/norm/composition_test.go
generated
vendored
Normal file
130
vendor/golang.org/x/text/unicode/norm/composition_test.go
generated
vendored
Normal file
|
@ -0,0 +1,130 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "testing"
|
||||
|
||||
// TestCase is used for most tests.
|
||||
type TestCase struct {
|
||||
in []rune
|
||||
out []rune
|
||||
}
|
||||
|
||||
func runTests(t *testing.T, name string, fm Form, tests []TestCase) {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(fm, nil)
|
||||
for i, test := range tests {
|
||||
rb.setFlusher(nil, appendFlush)
|
||||
for j, rune := range test.in {
|
||||
b := []byte(string(rune))
|
||||
src := inputBytes(b)
|
||||
info := rb.f.info(src, 0)
|
||||
if j == 0 {
|
||||
rb.ss.first(info)
|
||||
} else {
|
||||
rb.ss.next(info)
|
||||
}
|
||||
if rb.insertFlush(src, 0, info) < 0 {
|
||||
t.Errorf("%s:%d: insert failed for rune %d", name, i, j)
|
||||
}
|
||||
}
|
||||
rb.doFlush()
|
||||
was := string(rb.out)
|
||||
want := string(test.out)
|
||||
if len(was) != len(want) {
|
||||
t.Errorf("%s:%d: length = %d; want %d", name, i, len(was), len(want))
|
||||
}
|
||||
if was != want {
|
||||
k, pfx := pidx(was, want)
|
||||
t.Errorf("%s:%d: \nwas %s%+q; \nwant %s%+q", name, i, pfx, was[k:], pfx, want[k:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFlush(t *testing.T) {
|
||||
const (
|
||||
hello = "Hello "
|
||||
world = "world!"
|
||||
)
|
||||
buf := make([]byte, maxByteBufferSize)
|
||||
p := copy(buf, hello)
|
||||
out := buf[p:]
|
||||
rb := reorderBuffer{}
|
||||
rb.initString(NFC, world)
|
||||
if i := rb.flushCopy(out); i != 0 {
|
||||
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", i)
|
||||
}
|
||||
|
||||
for i := range world {
|
||||
// No need to set streamSafe values for this test.
|
||||
rb.insertFlush(rb.src, i, rb.f.info(rb.src, i))
|
||||
n := rb.flushCopy(out)
|
||||
out = out[n:]
|
||||
p += n
|
||||
}
|
||||
|
||||
was := buf[:p]
|
||||
want := hello + world
|
||||
if string(was) != want {
|
||||
t.Errorf(`output after flush was "%s"; want "%s"`, string(was), want)
|
||||
}
|
||||
if rb.nrune != 0 {
|
||||
t.Errorf("non-null size of info buffer (rb.nrune == %d)", rb.nrune)
|
||||
}
|
||||
if rb.nbyte != 0 {
|
||||
t.Errorf("non-null size of byte buffer (rb.nbyte == %d)", rb.nbyte)
|
||||
}
|
||||
}
|
||||
|
||||
var insertTests = []TestCase{
|
||||
{[]rune{'a'}, []rune{'a'}},
|
||||
{[]rune{0x300}, []rune{0x300}},
|
||||
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
|
||||
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
|
||||
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
|
||||
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
|
||||
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
|
||||
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
|
||||
}
|
||||
|
||||
func TestInsert(t *testing.T) {
|
||||
runTests(t, "TestInsert", NFD, insertTests)
|
||||
}
|
||||
|
||||
var decompositionNFDTest = []TestCase{
|
||||
{[]rune{0xC0}, []rune{0x41, 0x300}},
|
||||
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
|
||||
{[]rune{0x01C4}, []rune{0x01C4}},
|
||||
{[]rune{0x320E}, []rune{0x320E}},
|
||||
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
|
||||
}
|
||||
|
||||
var decompositionNFKDTest = []TestCase{
|
||||
{[]rune{0xC0}, []rune{0x41, 0x300}},
|
||||
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
|
||||
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
|
||||
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
|
||||
}
|
||||
|
||||
func TestDecomposition(t *testing.T) {
|
||||
runTests(t, "TestDecompositionNFD", NFD, decompositionNFDTest)
|
||||
runTests(t, "TestDecompositionNFKD", NFKD, decompositionNFKDTest)
|
||||
}
|
||||
|
||||
var compositionTest = []TestCase{
|
||||
{[]rune{0x41, 0x300}, []rune{0xC0}},
|
||||
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
|
||||
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
|
||||
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
|
||||
// blocking starter
|
||||
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
|
||||
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
|
||||
// parenthesized Hangul, alternate between ASCII and Hangul.
|
||||
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
|
||||
}
|
||||
|
||||
func TestComposition(t *testing.T) {
|
||||
runTests(t, "TestComposition", NFC, compositionTest)
|
||||
}
|
82
vendor/golang.org/x/text/unicode/norm/example_iter_test.go
generated
vendored
Normal file
82
vendor/golang.org/x/text/unicode/norm/example_iter_test.go
generated
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// EqualSimple uses a norm.Iter to compare two non-normalized
|
||||
// strings for equivalence.
|
||||
func EqualSimple(a, b string) bool {
|
||||
var ia, ib norm.Iter
|
||||
ia.InitString(norm.NFKD, a)
|
||||
ib.InitString(norm.NFKD, b)
|
||||
for !ia.Done() && !ib.Done() {
|
||||
if !bytes.Equal(ia.Next(), ib.Next()) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return ia.Done() && ib.Done()
|
||||
}
|
||||
|
||||
// FindPrefix finds the longest common prefix of ASCII characters
|
||||
// of a and b.
|
||||
func FindPrefix(a, b string) int {
|
||||
i := 0
|
||||
for ; i < len(a) && i < len(b) && a[i] < utf8.RuneSelf && a[i] == b[i]; i++ {
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// EqualOpt is like EqualSimple, but optimizes the special
|
||||
// case for ASCII characters.
|
||||
func EqualOpt(a, b string) bool {
|
||||
n := FindPrefix(a, b)
|
||||
a, b = a[n:], b[n:]
|
||||
var ia, ib norm.Iter
|
||||
ia.InitString(norm.NFKD, a)
|
||||
ib.InitString(norm.NFKD, b)
|
||||
for !ia.Done() && !ib.Done() {
|
||||
if !bytes.Equal(ia.Next(), ib.Next()) {
|
||||
return false
|
||||
}
|
||||
if n := int64(FindPrefix(a[ia.Pos():], b[ib.Pos():])); n != 0 {
|
||||
ia.Seek(n, 1)
|
||||
ib.Seek(n, 1)
|
||||
}
|
||||
}
|
||||
return ia.Done() && ib.Done()
|
||||
}
|
||||
|
||||
var compareTests = []struct{ a, b string }{
|
||||
{"aaa", "aaa"},
|
||||
{"aaa", "aab"},
|
||||
{"a\u0300a", "\u00E0a"},
|
||||
{"a\u0300\u0320b", "a\u0320\u0300b"},
|
||||
{"\u1E0A\u0323", "\x44\u0323\u0307"},
|
||||
// A character that decomposes into multiple segments
|
||||
// spans several iterations.
|
||||
{"\u3304", "\u30A4\u30CB\u30F3\u30AF\u3099"},
|
||||
}
|
||||
|
||||
func ExampleIter() {
|
||||
for i, t := range compareTests {
|
||||
r0 := EqualSimple(t.a, t.b)
|
||||
r1 := EqualOpt(t.a, t.b)
|
||||
fmt.Printf("%d: %v %v\n", i, r0, r1)
|
||||
}
|
||||
// Output:
|
||||
// 0: true true
|
||||
// 1: false false
|
||||
// 2: true true
|
||||
// 3: true true
|
||||
// 4: true true
|
||||
// 5: true true
|
||||
}
|
27
vendor/golang.org/x/text/unicode/norm/example_test.go
generated
vendored
Normal file
27
vendor/golang.org/x/text/unicode/norm/example_test.go
generated
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
func ExampleForm_NextBoundary() {
|
||||
s := norm.NFD.String("Mêlée")
|
||||
|
||||
for i := 0; i < len(s); {
|
||||
d := norm.NFC.NextBoundaryInString(s[i:], true)
|
||||
fmt.Printf("%[1]s: %+[1]q\n", s[i:i+d])
|
||||
i += d
|
||||
}
|
||||
// Output:
|
||||
// M: "M"
|
||||
// ê: "e\u0302"
|
||||
// l: "l"
|
||||
// é: "e\u0301"
|
||||
// e: "e"
|
||||
}
|
2
vendor/golang.org/x/text/unicode/norm/forminfo.go
generated
vendored
2
vendor/golang.org/x/text/unicode/norm/forminfo.go
generated
vendored
|
@ -10,7 +10,7 @@ package norm
|
|||
// and its corresponding decomposing form share the same trie. Each trie maps
|
||||
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
||||
// bits
|
||||
// 15: 1 (inverse of NFD_QD bit of qcInfo)
|
||||
// 15: 1 (inverse of NFD_QC bit of qcInfo)
|
||||
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
|
||||
// 6..0: ccc (compressed CCC value).
|
||||
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
||||
|
|
54
vendor/golang.org/x/text/unicode/norm/forminfo_test.go
generated
vendored
Normal file
54
vendor/golang.org/x/text/unicode/norm/forminfo_test.go
generated
vendored
Normal file
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build test
|
||||
|
||||
package norm
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestProperties(t *testing.T) {
|
||||
var d runeData
|
||||
CK := [2]string{"C", "K"}
|
||||
for k, r := 1, rune(0); r < 0x2ffff; r++ {
|
||||
if k < len(testData) && r == testData[k].r {
|
||||
d = testData[k]
|
||||
k++
|
||||
}
|
||||
s := string(r)
|
||||
for j, p := range []Properties{NFC.PropertiesString(s), NFKC.PropertiesString(s)} {
|
||||
f := d.f[j]
|
||||
if p.CCC() != d.ccc {
|
||||
t.Errorf("%U: ccc(%s): was %d; want %d %X", r, CK[j], p.CCC(), d.ccc, p.index)
|
||||
}
|
||||
if p.isYesC() != (f.qc == Yes) {
|
||||
t.Errorf("%U: YesC(%s): was %v; want %v", r, CK[j], p.isYesC(), f.qc == Yes)
|
||||
}
|
||||
if p.combinesBackward() != (f.qc == Maybe) {
|
||||
t.Errorf("%U: combines backwards(%s): was %v; want %v", r, CK[j], p.combinesBackward(), f.qc == Maybe)
|
||||
}
|
||||
if p.nLeadingNonStarters() != d.nLead {
|
||||
t.Errorf("%U: nLead(%s): was %d; want %d %#v %#v", r, CK[j], p.nLeadingNonStarters(), d.nLead, p, d)
|
||||
}
|
||||
if p.nTrailingNonStarters() != d.nTrail {
|
||||
t.Errorf("%U: nTrail(%s): was %d; want %d %#v %#v", r, CK[j], p.nTrailingNonStarters(), d.nTrail, p, d)
|
||||
}
|
||||
if p.combinesForward() != f.combinesForward {
|
||||
t.Errorf("%U: combines forward(%s): was %v; want %v %#v", r, CK[j], p.combinesForward(), f.combinesForward, p)
|
||||
}
|
||||
// Skip Hangul as it is algorithmically computed.
|
||||
if r >= hangulBase && r < hangulEnd {
|
||||
continue
|
||||
}
|
||||
if p.hasDecomposition() {
|
||||
if has := f.decomposition != ""; !has {
|
||||
t.Errorf("%U: hasDecomposition(%s): was %v; want %v", r, CK[j], p.hasDecomposition(), has)
|
||||
}
|
||||
if string(p.Decomposition()) != f.decomposition {
|
||||
t.Errorf("%U: decomp(%s): was %+q; want %+q", r, CK[j], p.Decomposition(), f.decomposition)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
8
vendor/golang.org/x/text/unicode/norm/input.go
generated
vendored
8
vendor/golang.org/x/text/unicode/norm/input.go
generated
vendored
|
@ -90,16 +90,20 @@ func (in *input) charinfoNFKC(p int) (uint16, int) {
|
|||
}
|
||||
|
||||
func (in *input) hangul(p int) (r rune) {
|
||||
var size int
|
||||
if in.bytes == nil {
|
||||
if !isHangulString(in.str[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, _ = utf8.DecodeRuneInString(in.str[p:])
|
||||
r, size = utf8.DecodeRuneInString(in.str[p:])
|
||||
} else {
|
||||
if !isHangul(in.bytes[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, _ = utf8.DecodeRune(in.bytes[p:])
|
||||
r, size = utf8.DecodeRune(in.bytes[p:])
|
||||
}
|
||||
if size != hangulUTF8Size {
|
||||
return 0
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
|
31
vendor/golang.org/x/text/unicode/norm/iter.go
generated
vendored
31
vendor/golang.org/x/text/unicode/norm/iter.go
generated
vendored
|
@ -41,6 +41,7 @@ func (i *Iter) Init(f Form, src []byte) {
|
|||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIBytes
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
}
|
||||
|
||||
// InitString initializes i to iterate over src after normalizing it to Form f.
|
||||
|
@ -56,11 +57,12 @@ func (i *Iter) InitString(f Form, src string) {
|
|||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIString
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
}
|
||||
|
||||
// Seek sets the segment to be returned by the next call to Next to start
|
||||
// at position p. It is the responsibility of the caller to set p to the
|
||||
// start of a UTF8 rune.
|
||||
// start of a segment.
|
||||
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
||||
var abs int64
|
||||
switch whence {
|
||||
|
@ -84,6 +86,7 @@ func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
|||
i.multiSeg = nil
|
||||
i.next = i.rb.f.nextMain
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
|
@ -161,6 +164,7 @@ func nextHangul(i *Iter) []byte {
|
|||
if next >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
} else if i.rb.src.hangul(next) == 0 {
|
||||
i.rb.ss.next(i.info)
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
|
@ -204,12 +208,10 @@ func nextMultiNorm(i *Iter) []byte {
|
|||
if info.BoundaryBefore() {
|
||||
i.rb.compose()
|
||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
i.rb.ss.first(info)
|
||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||
i.multiSeg = d[j+int(info.size):]
|
||||
return seg
|
||||
}
|
||||
i.rb.ss.next(info)
|
||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||
j += int(info.size)
|
||||
}
|
||||
|
@ -222,9 +224,9 @@ func nextMultiNorm(i *Iter) []byte {
|
|||
func nextDecomposed(i *Iter) (next []byte) {
|
||||
outp := 0
|
||||
inCopyStart, outCopyStart := i.p, 0
|
||||
ss := mkStreamSafe(i.info)
|
||||
for {
|
||||
if sz := int(i.info.size); sz <= 1 {
|
||||
i.rb.ss = 0
|
||||
p := i.p
|
||||
i.p++ // ASCII or illegal byte. Either way, advance by 1.
|
||||
if i.p >= i.rb.nsrc {
|
||||
|
@ -243,6 +245,8 @@ func nextDecomposed(i *Iter) (next []byte) {
|
|||
p := outp + len(d)
|
||||
if outp > 0 {
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
// TODO: this condition should not be possible, but we leave it
|
||||
// in for defensive purposes.
|
||||
if p > len(i.buf) {
|
||||
return i.buf[:outp]
|
||||
}
|
||||
|
@ -266,7 +270,7 @@ func nextDecomposed(i *Iter) (next []byte) {
|
|||
} else {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
switch ss.next(i.info) {
|
||||
switch i.rb.ss.next(i.info) {
|
||||
case ssOverflow:
|
||||
i.next = nextCGJDecompose
|
||||
fallthrough
|
||||
|
@ -309,7 +313,7 @@ func nextDecomposed(i *Iter) (next []byte) {
|
|||
}
|
||||
prevCC := i.info.tccc
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if v := ss.next(i.info); v == ssStarter {
|
||||
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||
break
|
||||
} else if v == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
|
@ -335,10 +339,6 @@ doNorm:
|
|||
|
||||
func doNormDecomposed(i *Iter) []byte {
|
||||
for {
|
||||
if s := i.rb.ss.next(i.info); s == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
break
|
||||
}
|
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
|
@ -348,6 +348,10 @@ func doNormDecomposed(i *Iter) []byte {
|
|||
if i.info.ccc == 0 {
|
||||
break
|
||||
}
|
||||
if s := i.rb.ss.next(i.info); s == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
break
|
||||
}
|
||||
}
|
||||
// new segment or too many combining characters: exit normalization
|
||||
return i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
|
@ -357,6 +361,7 @@ func nextCGJDecompose(i *Iter) []byte {
|
|||
i.rb.ss = 0
|
||||
i.rb.insertCGJ()
|
||||
i.next = nextDecomposed
|
||||
i.rb.ss.first(i.info)
|
||||
buf := doNormDecomposed(i)
|
||||
return buf
|
||||
}
|
||||
|
@ -365,7 +370,6 @@ func nextCGJDecompose(i *Iter) []byte {
|
|||
func nextComposed(i *Iter) []byte {
|
||||
outp, startp := 0, i.p
|
||||
var prevCC uint8
|
||||
ss := mkStreamSafe(i.info)
|
||||
for {
|
||||
if !i.info.isYesC() {
|
||||
goto doNorm
|
||||
|
@ -385,11 +389,12 @@ func nextComposed(i *Iter) []byte {
|
|||
i.setDone()
|
||||
break
|
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
||||
i.rb.ss = 0
|
||||
i.next = i.asciiF
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if v := ss.next(i.info); v == ssStarter {
|
||||
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||
break
|
||||
} else if v == ssOverflow {
|
||||
i.next = nextCGJCompose
|
||||
|
@ -401,8 +406,10 @@ func nextComposed(i *Iter) []byte {
|
|||
}
|
||||
return i.returnSlice(startp, i.p)
|
||||
doNorm:
|
||||
// reset to start position
|
||||
i.p = startp
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
if i.info.multiSegment() {
|
||||
d := i.info.Decomposition()
|
||||
info := i.rb.f.info(input{bytes: d}, 0)
|
||||
|
|
98
vendor/golang.org/x/text/unicode/norm/iter_test.go
generated
vendored
Normal file
98
vendor/golang.org/x/text/unicode/norm/iter_test.go
generated
vendored
Normal file
|
@ -0,0 +1,98 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func doIterNorm(f Form, s string) []byte {
|
||||
acc := []byte{}
|
||||
i := Iter{}
|
||||
i.InitString(f, s)
|
||||
for !i.Done() {
|
||||
acc = append(acc, i.Next()...)
|
||||
}
|
||||
return acc
|
||||
}
|
||||
|
||||
func TestIterNext(t *testing.T) {
|
||||
runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
|
||||
return doIterNorm(f, string(append(out, s...)))
|
||||
})
|
||||
}
|
||||
|
||||
type SegmentTest struct {
|
||||
in string
|
||||
out []string
|
||||
}
|
||||
|
||||
var segmentTests = []SegmentTest{
|
||||
{"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
|
||||
{rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
|
||||
{rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
|
||||
{rep('a', segSize) + "\u0300aa",
|
||||
append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
|
||||
|
||||
// U+0f73 is NOT treated as a starter as it is a modifier
|
||||
{"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}},
|
||||
{"a\u0f73", []string{"a\u0f73"}},
|
||||
|
||||
// U+ff9e is treated as a non-starter.
|
||||
// TODO: should we? Note that this will only affect iteration, as whether
|
||||
// or not we do so does not affect the normalization output and will either
|
||||
// way result in consistent iteration output.
|
||||
{"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}},
|
||||
{"a\uff9e", []string{"a\uff9e"}},
|
||||
}
|
||||
|
||||
var segmentTestsK = []SegmentTest{
|
||||
{"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
|
||||
// last segment of multi-segment decomposition needs normalization
|
||||
{"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
|
||||
{"\u320E", []string{"\x28", "\uAC00", "\x29"}},
|
||||
|
||||
// last segment should be copied to start of buffer.
|
||||
{"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}},
|
||||
{"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}},
|
||||
{"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}},
|
||||
|
||||
// Hangul and Jamo are grouped together.
|
||||
{"\uAC00", []string{"\u1100\u1161", ""}},
|
||||
{"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
|
||||
{"\u1100\u1161", []string{"\u1100\u1161", ""}},
|
||||
}
|
||||
|
||||
// Note that, by design, segmentation is equal for composing and decomposing forms.
|
||||
func TestIterSegmentation(t *testing.T) {
|
||||
segmentTest(t, "SegmentTestD", NFD, segmentTests)
|
||||
segmentTest(t, "SegmentTestC", NFC, segmentTests)
|
||||
segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK)
|
||||
segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK)
|
||||
}
|
||||
|
||||
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
|
||||
iter := Iter{}
|
||||
for i, tt := range tests {
|
||||
iter.InitString(f, tt.in)
|
||||
for j, seg := range tt.out {
|
||||
if seg == "" {
|
||||
if !iter.Done() {
|
||||
res := string(iter.Next())
|
||||
t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if iter.Done() {
|
||||
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
|
||||
}
|
||||
seg = f.String(seg)
|
||||
if res := string(iter.Next()); res != seg {
|
||||
t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
12
vendor/golang.org/x/text/unicode/norm/maketables.go
generated
vendored
12
vendor/golang.org/x/text/unicode/norm/maketables.go
generated
vendored
|
@ -35,12 +35,9 @@ func main() {
|
|||
computeNonStarterCounts()
|
||||
verifyComputed()
|
||||
printChars()
|
||||
if *test {
|
||||
testDerived()
|
||||
printTestdata()
|
||||
} else {
|
||||
makeTables()
|
||||
}
|
||||
testDerived()
|
||||
printTestdata()
|
||||
makeTables()
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -602,6 +599,7 @@ func printCharInfoTables(w io.Writer) int {
|
|||
}
|
||||
index := normalDecomp
|
||||
nTrail := chars[r].nTrailingNonStarters
|
||||
nLead := chars[r].nLeadingNonStarters
|
||||
if tccc > 0 || lccc > 0 || nTrail > 0 {
|
||||
tccc <<= 2
|
||||
tccc |= nTrail
|
||||
|
@ -612,7 +610,7 @@ func printCharInfoTables(w io.Writer) int {
|
|||
index = firstCCC
|
||||
}
|
||||
}
|
||||
if lccc > 0 {
|
||||
if lccc > 0 || nLead > 0 {
|
||||
s += string([]byte{lccc})
|
||||
if index == firstCCC {
|
||||
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
|
||||
|
|
14
vendor/golang.org/x/text/unicode/norm/norm_test.go
generated
vendored
Normal file
14
vendor/golang.org/x/text/unicode/norm/norm_test.go
generated
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPlaceHolder(t *testing.T) {
|
||||
// Does nothing, just allows the Makefile to be canonical
|
||||
// while waiting for the package itself to be written.
|
||||
}
|
21
vendor/golang.org/x/text/unicode/norm/normalize.go
generated
vendored
21
vendor/golang.org/x/text/unicode/norm/normalize.go
generated
vendored
|
@ -2,11 +2,12 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Note: the file data_test.go that is generated should not be checked in.
|
||||
//go:generate go run maketables.go triegen.go
|
||||
//go:generate go run maketables.go triegen.go -test
|
||||
//go:generate go test -tags test
|
||||
|
||||
// Package norm contains types and functions for normalizing Unicode strings.
|
||||
package norm
|
||||
package norm // import "golang.org/x/text/unicode/norm"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
@ -323,7 +324,6 @@ func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool)
|
|||
// have an overflow for runes that are starters (e.g. with U+FF9E).
|
||||
switch ss.next(info) {
|
||||
case ssStarter:
|
||||
ss.first(info)
|
||||
lastSegStart = i
|
||||
case ssOverflow:
|
||||
return lastSegStart, false
|
||||
|
@ -440,6 +440,8 @@ func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
|
|||
}
|
||||
return -1
|
||||
}
|
||||
// TODO: Using streamSafe to determine the boundary isn't the same as
|
||||
// using BoundaryBefore. Determine which should be used.
|
||||
if s := ss.next(info); s != ssSuccess {
|
||||
return i
|
||||
}
|
||||
|
@ -504,15 +506,14 @@ func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
|
|||
if info.size == 0 {
|
||||
return 0
|
||||
}
|
||||
if rb.nrune > 0 {
|
||||
if s := rb.ss.next(info); s == ssStarter {
|
||||
goto end
|
||||
} else if s == ssOverflow {
|
||||
rb.insertCGJ()
|
||||
if s := rb.ss.next(info); s == ssStarter {
|
||||
// TODO: this could be removed if we don't support merging.
|
||||
if rb.nrune > 0 {
|
||||
goto end
|
||||
}
|
||||
} else {
|
||||
rb.ss.first(info)
|
||||
} else if s == ssOverflow {
|
||||
rb.insertCGJ()
|
||||
goto end
|
||||
}
|
||||
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
|
||||
return int(err)
|
||||
|
|
1287
vendor/golang.org/x/text/unicode/norm/normalize_test.go
generated
vendored
Normal file
1287
vendor/golang.org/x/text/unicode/norm/normalize_test.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
56
vendor/golang.org/x/text/unicode/norm/readwriter_test.go
generated
vendored
Normal file
56
vendor/golang.org/x/text/unicode/norm/readwriter_test.go
generated
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var bufSizes = []int{1, 2, 3, 4, 5, 6, 7, 8, 100, 101, 102, 103, 4000, 4001, 4002, 4003}
|
||||
|
||||
func readFunc(size int) appendFunc {
|
||||
return func(f Form, out []byte, s string) []byte {
|
||||
out = append(out, s...)
|
||||
r := f.Reader(bytes.NewBuffer(out))
|
||||
buf := make([]byte, size)
|
||||
result := []byte{}
|
||||
for n, err := 0, error(nil); err == nil; {
|
||||
n, err = r.Read(buf)
|
||||
result = append(result, buf[:n]...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
for _, s := range bufSizes {
|
||||
name := fmt.Sprintf("TestReader%d", s)
|
||||
runNormTests(t, name, readFunc(s))
|
||||
}
|
||||
}
|
||||
|
||||
func writeFunc(size int) appendFunc {
|
||||
return func(f Form, out []byte, s string) []byte {
|
||||
in := append(out, s...)
|
||||
result := new(bytes.Buffer)
|
||||
w := f.Writer(result)
|
||||
buf := make([]byte, size)
|
||||
for n := 0; len(in) > 0; in = in[n:] {
|
||||
n = copy(buf, in)
|
||||
_, _ = w.Write(buf[:n])
|
||||
}
|
||||
w.Close()
|
||||
return result.Bytes()
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriter(t *testing.T) {
|
||||
for _, s := range bufSizes {
|
||||
name := fmt.Sprintf("TestWriter%d", s)
|
||||
runNormTests(t, name, writeFunc(s))
|
||||
}
|
||||
}
|
1028
vendor/golang.org/x/text/unicode/norm/tables.go
generated
vendored
1028
vendor/golang.org/x/text/unicode/norm/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
101
vendor/golang.org/x/text/unicode/norm/transform_test.go
generated
vendored
Normal file
101
vendor/golang.org/x/text/unicode/norm/transform_test.go
generated
vendored
Normal file
|
@ -0,0 +1,101 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func TestTransform(t *testing.T) {
|
||||
tests := []struct {
|
||||
f Form
|
||||
in, out string
|
||||
eof bool
|
||||
dstSize int
|
||||
err error
|
||||
}{
|
||||
{NFC, "ab", "ab", true, 2, nil},
|
||||
{NFC, "qx", "qx", true, 2, nil},
|
||||
{NFD, "qx", "qx", true, 2, nil},
|
||||
{NFC, "", "", true, 1, nil},
|
||||
{NFD, "", "", true, 1, nil},
|
||||
{NFC, "", "", false, 1, nil},
|
||||
{NFD, "", "", false, 1, nil},
|
||||
|
||||
// Normalized segment does not fit in destination.
|
||||
{NFD, "ö", "", true, 1, transform.ErrShortDst},
|
||||
{NFD, "ö", "", true, 2, transform.ErrShortDst},
|
||||
|
||||
// As an artifact of the algorithm, only full segments are written.
|
||||
// This is not strictly required, and some bytes could be written.
|
||||
// In practice, for Transform to not block, the destination buffer
|
||||
// should be at least MaxSegmentSize to work anyway and these edge
|
||||
// conditions will be relatively rare.
|
||||
{NFC, "ab", "", true, 1, transform.ErrShortDst},
|
||||
// This is even true for inert runes.
|
||||
{NFC, "qx", "", true, 1, transform.ErrShortDst},
|
||||
{NFC, "a\u0300abc", "\u00e0a", true, 4, transform.ErrShortDst},
|
||||
|
||||
// We cannot write a segment if succesive runes could still change the result.
|
||||
{NFD, "ö", "", false, 3, transform.ErrShortSrc},
|
||||
{NFC, "a\u0300", "", false, 4, transform.ErrShortSrc},
|
||||
{NFD, "a\u0300", "", false, 4, transform.ErrShortSrc},
|
||||
{NFC, "ö", "", false, 3, transform.ErrShortSrc},
|
||||
|
||||
{NFC, "a\u0300", "", true, 1, transform.ErrShortDst},
|
||||
// Theoretically could fit, but won't due to simplified checks.
|
||||
{NFC, "a\u0300", "", true, 2, transform.ErrShortDst},
|
||||
{NFC, "a\u0300", "", true, 3, transform.ErrShortDst},
|
||||
{NFC, "a\u0300", "\u00e0", true, 4, nil},
|
||||
|
||||
{NFD, "öa\u0300", "o\u0308", false, 8, transform.ErrShortSrc},
|
||||
{NFD, "öa\u0300ö", "o\u0308a\u0300", true, 8, transform.ErrShortDst},
|
||||
{NFD, "öa\u0300ö", "o\u0308a\u0300", false, 12, transform.ErrShortSrc},
|
||||
|
||||
// Illegal input is copied verbatim.
|
||||
{NFD, "\xbd\xb2=\xbc ", "\xbd\xb2=\xbc ", true, 8, nil},
|
||||
}
|
||||
b := make([]byte, 100)
|
||||
for i, tt := range tests {
|
||||
nDst, _, err := tt.f.Transform(b[:tt.dstSize], []byte(tt.in), tt.eof)
|
||||
out := string(b[:nDst])
|
||||
if out != tt.out || err != tt.err {
|
||||
t.Errorf("%d: was %+q (%v); want %+q (%v)", i, out, err, tt.out, tt.err)
|
||||
}
|
||||
if want := tt.f.String(tt.in)[:nDst]; want != out {
|
||||
t.Errorf("%d: incorect normalization: was %+q; want %+q", i, out, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var transBufSizes = []int{
|
||||
MaxTransformChunkSize,
|
||||
3 * MaxTransformChunkSize / 2,
|
||||
2 * MaxTransformChunkSize,
|
||||
3 * MaxTransformChunkSize,
|
||||
100 * MaxTransformChunkSize,
|
||||
}
|
||||
|
||||
func doTransNorm(f Form, buf []byte, b []byte) []byte {
|
||||
acc := []byte{}
|
||||
for p := 0; p < len(b); {
|
||||
nd, ns, _ := f.Transform(buf[:], b[p:], true)
|
||||
p += ns
|
||||
acc = append(acc, buf[:nd]...)
|
||||
}
|
||||
return acc
|
||||
}
|
||||
|
||||
func TestTransformNorm(t *testing.T) {
|
||||
for _, sz := range transBufSizes {
|
||||
buf := make([]byte, sz)
|
||||
runNormTests(t, fmt.Sprintf("Transform:%d", sz), func(f Form, out []byte, s string) []byte {
|
||||
return doTransNorm(f, buf, append(out, s...))
|
||||
})
|
||||
}
|
||||
}
|
275
vendor/golang.org/x/text/unicode/norm/ucd_test.go
generated
vendored
Normal file
275
vendor/golang.org/x/text/unicode/norm/ucd_test.go
generated
vendored
Normal file
|
@ -0,0 +1,275 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/testtext"
|
||||
)
|
||||
|
||||
var once sync.Once
|
||||
|
||||
func skipShort(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
once.Do(func() { loadTestData(t) })
|
||||
}
|
||||
|
||||
// This regression test runs the test set in NormalizationTest.txt
|
||||
// (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
|
||||
//
|
||||
// NormalizationTest.txt has form:
|
||||
// @Part0 # Specific cases
|
||||
// #
|
||||
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
|
||||
//
|
||||
// Each test has 5 columns (c1, c2, c3, c4, c5), where
|
||||
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
|
||||
//
|
||||
// CONFORMANCE:
|
||||
// 1. The following invariants must be true for all conformant implementations
|
||||
//
|
||||
// NFC
|
||||
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
|
||||
// c4 == NFC(c4) == NFC(c5)
|
||||
//
|
||||
// NFD
|
||||
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
|
||||
// c5 == NFD(c4) == NFD(c5)
|
||||
//
|
||||
// NFKC
|
||||
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
|
||||
//
|
||||
// NFKD
|
||||
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
|
||||
//
|
||||
// 2. For every code point X assigned in this version of Unicode that is not
|
||||
// specifically listed in Part 1, the following invariants must be true
|
||||
// for all conformant implementations:
|
||||
//
|
||||
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
|
||||
//
|
||||
|
||||
// Column types.
|
||||
const (
|
||||
cRaw = iota
|
||||
cNFC
|
||||
cNFD
|
||||
cNFKC
|
||||
cNFKD
|
||||
cMaxColumns
|
||||
)
|
||||
|
||||
// Holds data from NormalizationTest.txt
|
||||
var part []Part
|
||||
|
||||
type Part struct {
|
||||
name string
|
||||
number int
|
||||
tests []Test
|
||||
}
|
||||
|
||||
type Test struct {
|
||||
name string
|
||||
partnr int
|
||||
number int
|
||||
r rune // used for character by character test
|
||||
cols [cMaxColumns]string // Each has 5 entries, see below.
|
||||
}
|
||||
|
||||
func (t Test) Name() string {
|
||||
if t.number < 0 {
|
||||
return part[t.partnr].name
|
||||
}
|
||||
return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
|
||||
}
|
||||
|
||||
var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
|
||||
var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
|
||||
|
||||
var counter int
|
||||
|
||||
// Load the data form NormalizationTest.txt
|
||||
func loadTestData(t *testing.T) {
|
||||
f := gen.OpenUCDFile("NormalizationTest.txt")
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if len(line) == 0 || line[0] == '#' {
|
||||
continue
|
||||
}
|
||||
m := partRe.FindStringSubmatch(line)
|
||||
if m != nil {
|
||||
if len(m) < 3 {
|
||||
t.Fatal("Failed to parse Part: ", line)
|
||||
}
|
||||
i, err := strconv.Atoi(m[1])
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
name := m[2]
|
||||
part = append(part, Part{name: name[:len(name)-1], number: i})
|
||||
continue
|
||||
}
|
||||
m = testRe.FindStringSubmatch(line)
|
||||
if m == nil || len(m) < 7 {
|
||||
t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
|
||||
}
|
||||
test := Test{name: m[6], partnr: len(part) - 1, number: counter}
|
||||
counter++
|
||||
for j := 1; j < len(m)-1; j++ {
|
||||
for _, split := range strings.Split(m[j], " ") {
|
||||
r, err := strconv.ParseUint(split, 16, 64)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if test.r == 0 {
|
||||
// save for CharacterByCharacterTests
|
||||
test.r = rune(r)
|
||||
}
|
||||
var buf [utf8.UTFMax]byte
|
||||
sz := utf8.EncodeRune(buf[:], rune(r))
|
||||
test.cols[j-1] += string(buf[:sz])
|
||||
}
|
||||
}
|
||||
part := &part[len(part)-1]
|
||||
part.tests = append(part.tests, test)
|
||||
}
|
||||
if scanner.Err() != nil {
|
||||
t.Fatal(scanner.Err())
|
||||
}
|
||||
}
|
||||
|
||||
func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) {
|
||||
if gold != result {
|
||||
t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s",
|
||||
tc.Name(), name, fstr[f], test, result, gold, tc.name)
|
||||
}
|
||||
}
|
||||
|
||||
func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) {
|
||||
if result != want {
|
||||
t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want)
|
||||
}
|
||||
}
|
||||
|
||||
func doTest(t *testing.T, tc *Test, f Form, gold, test string) {
|
||||
testb := []byte(test)
|
||||
result := f.Bytes(testb)
|
||||
cmpResult(t, tc, "Bytes", f, gold, test, string(result))
|
||||
|
||||
sresult := f.String(test)
|
||||
cmpResult(t, tc, "String", f, gold, test, sresult)
|
||||
|
||||
acc := []byte{}
|
||||
i := Iter{}
|
||||
i.InitString(f, test)
|
||||
for !i.Done() {
|
||||
acc = append(acc, i.Next()...)
|
||||
}
|
||||
cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc))
|
||||
|
||||
buf := make([]byte, 128)
|
||||
acc = nil
|
||||
for p := 0; p < len(testb); {
|
||||
nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
|
||||
acc = append(acc, buf[:nDst]...)
|
||||
p += nSrc
|
||||
}
|
||||
cmpResult(t, tc, "Transform", f, gold, test, string(acc))
|
||||
|
||||
for i := range test {
|
||||
out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
|
||||
cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
|
||||
}
|
||||
cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
|
||||
cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
|
||||
}
|
||||
|
||||
func doConformanceTests(t *testing.T, tc *Test, partn int) {
|
||||
for i := 0; i <= 2; i++ {
|
||||
doTest(t, tc, NFC, tc.cols[1], tc.cols[i])
|
||||
doTest(t, tc, NFD, tc.cols[2], tc.cols[i])
|
||||
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
|
||||
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
|
||||
}
|
||||
for i := 3; i <= 4; i++ {
|
||||
doTest(t, tc, NFC, tc.cols[3], tc.cols[i])
|
||||
doTest(t, tc, NFD, tc.cols[4], tc.cols[i])
|
||||
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
|
||||
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
|
||||
}
|
||||
}
|
||||
|
||||
func TestCharacterByCharacter(t *testing.T) {
|
||||
skipShort(t)
|
||||
tests := part[1].tests
|
||||
var last rune = 0
|
||||
for i := 0; i <= len(tests); i++ { // last one is special case
|
||||
var r rune
|
||||
if i == len(tests) {
|
||||
r = 0x2FA1E // Don't have to go to 0x10FFFF
|
||||
} else {
|
||||
r = tests[i].r
|
||||
}
|
||||
for last++; last < r; last++ {
|
||||
// Check all characters that were not explicitly listed in the test.
|
||||
tc := &Test{partnr: 1, number: -1}
|
||||
char := string(last)
|
||||
doTest(t, tc, NFC, char, char)
|
||||
doTest(t, tc, NFD, char, char)
|
||||
doTest(t, tc, NFKC, char, char)
|
||||
doTest(t, tc, NFKD, char, char)
|
||||
}
|
||||
if i < len(tests) {
|
||||
doConformanceTests(t, &tests[i], 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStandardTests(t *testing.T) {
|
||||
skipShort(t)
|
||||
for _, j := range []int{0, 2, 3} {
|
||||
for _, test := range part[j].tests {
|
||||
doConformanceTests(t, &test, j)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestPerformance verifies that normalization is O(n). If any of the
|
||||
// code does not properly check for maxCombiningChars, normalization
|
||||
// may exhibit O(n**2) behavior.
|
||||
func TestPerformance(t *testing.T) {
|
||||
skipShort(t)
|
||||
runtime.GOMAXPROCS(2)
|
||||
success := make(chan bool, 1)
|
||||
go func() {
|
||||
buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
|
||||
buf = append(buf, "\u035B"...)
|
||||
NFC.Append(nil, buf...)
|
||||
success <- true
|
||||
}()
|
||||
timeout := time.After(1 * time.Second)
|
||||
select {
|
||||
case <-success:
|
||||
// test completed before the timeout
|
||||
case <-timeout:
|
||||
t.Errorf(`unexpectedly long time to complete PerformanceTest`)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue