2b39f9f4b2
Among others, this updates minio-go, so that the new "eu-west-3" zone for AWS is supported.
602 lines
15 KiB
Go
602 lines
15 KiB
Go
// Copyright 2014 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build ignore
|
|
|
|
// Generator for display name tables.
|
|
|
|
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"reflect"
|
|
"sort"
|
|
"strings"
|
|
|
|
"golang.org/x/text/internal/gen"
|
|
"golang.org/x/text/language"
|
|
"golang.org/x/text/unicode/cldr"
|
|
)
|
|
|
|
var (
|
|
test = flag.Bool("test", false,
|
|
"test existing tables; can be used to compare web data with package data.")
|
|
outputFile = flag.String("output", "tables.go", "output file")
|
|
|
|
stats = flag.Bool("stats", false, "prints statistics to stderr")
|
|
|
|
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
|
|
draft = flag.String("draft",
|
|
"contributed",
|
|
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
|
|
pkg = flag.String("package",
|
|
"display",
|
|
"the name of the package in which the generated file is to be included")
|
|
|
|
tags = newTagSet("tags",
|
|
[]language.Tag{},
|
|
"space-separated list of tags to include or empty for all")
|
|
dict = newTagSet("dict",
|
|
dictTags(),
|
|
"space-separated list or tags for which to include a Dictionary. "+
|
|
`"" means the common list from go.text/language.`)
|
|
)
|
|
|
|
func dictTags() (tag []language.Tag) {
|
|
// TODO: replace with language.Common.Tags() once supported.
|
|
const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
|
|
"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
|
|
"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
|
|
"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
|
|
"zh zh-Hans zh-Hant zu"
|
|
|
|
for _, s := range strings.Split(str, " ") {
|
|
tag = append(tag, language.MustParse(s))
|
|
}
|
|
return tag
|
|
}
|
|
|
|
func main() {
|
|
gen.Init()
|
|
|
|
// Read the CLDR zip file.
|
|
r := gen.OpenCLDRCoreZip()
|
|
defer r.Close()
|
|
|
|
d := &cldr.Decoder{}
|
|
d.SetDirFilter("main", "supplemental")
|
|
d.SetSectionFilter("localeDisplayNames")
|
|
data, err := d.DecodeZip(r)
|
|
if err != nil {
|
|
log.Fatalf("DecodeZip: %v", err)
|
|
}
|
|
|
|
w := gen.NewCodeWriter()
|
|
defer w.WriteGoFile(*outputFile, "display")
|
|
|
|
gen.WriteCLDRVersion(w)
|
|
|
|
b := builder{
|
|
w: w,
|
|
data: data,
|
|
group: make(map[string]*group),
|
|
}
|
|
b.generate()
|
|
}
|
|
|
|
const tagForm = language.All
|
|
|
|
// tagSet is used to parse command line flags of tags. It implements the
|
|
// flag.Value interface.
|
|
type tagSet map[language.Tag]bool
|
|
|
|
func newTagSet(name string, tags []language.Tag, usage string) tagSet {
|
|
f := tagSet(make(map[language.Tag]bool))
|
|
for _, t := range tags {
|
|
f[t] = true
|
|
}
|
|
flag.Var(f, name, usage)
|
|
return f
|
|
}
|
|
|
|
// String implements the String method of the flag.Value interface.
|
|
func (f tagSet) String() string {
|
|
tags := []string{}
|
|
for t := range f {
|
|
tags = append(tags, t.String())
|
|
}
|
|
sort.Strings(tags)
|
|
return strings.Join(tags, " ")
|
|
}
|
|
|
|
// Set implements Set from the flag.Value interface.
|
|
func (f tagSet) Set(s string) error {
|
|
if s != "" {
|
|
for _, s := range strings.Split(s, " ") {
|
|
if s != "" {
|
|
tag, err := tagForm.Parse(s)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
f[tag] = true
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (f tagSet) contains(t language.Tag) bool {
|
|
if len(f) == 0 {
|
|
return true
|
|
}
|
|
return f[t]
|
|
}
|
|
|
|
// builder is used to create all tables with display name information.
|
|
type builder struct {
|
|
w *gen.CodeWriter
|
|
|
|
data *cldr.CLDR
|
|
|
|
fromLocs []string
|
|
|
|
// destination tags for the current locale.
|
|
toTags []string
|
|
toTagIndex map[string]int
|
|
|
|
// list of supported tags
|
|
supported []language.Tag
|
|
|
|
// key-value pairs per group
|
|
group map[string]*group
|
|
|
|
// statistics
|
|
sizeIndex int // total size of all indexes of headers
|
|
sizeData int // total size of all data of headers
|
|
totalSize int
|
|
}
|
|
|
|
type group struct {
|
|
// Maps from a given language to the Namer data for this language.
|
|
lang map[language.Tag]keyValues
|
|
headers []header
|
|
|
|
toTags []string
|
|
threeStart int
|
|
fourPlusStart int
|
|
}
|
|
|
|
// set sets the typ to the name for locale loc.
|
|
func (g *group) set(t language.Tag, typ, name string) {
|
|
kv := g.lang[t]
|
|
if kv == nil {
|
|
kv = make(keyValues)
|
|
g.lang[t] = kv
|
|
}
|
|
if kv[typ] == "" {
|
|
kv[typ] = name
|
|
}
|
|
}
|
|
|
|
type keyValues map[string]string
|
|
|
|
type header struct {
|
|
tag language.Tag
|
|
data string
|
|
index []uint16
|
|
}
|
|
|
|
var versionInfo = `// Version is deprecated. Use CLDRVersion.
|
|
const Version = %#v
|
|
|
|
`
|
|
|
|
var self = language.MustParse("mul")
|
|
|
|
// generate builds and writes all tables.
|
|
func (b *builder) generate() {
|
|
fmt.Fprintf(b.w, versionInfo, cldr.Version)
|
|
|
|
b.filter()
|
|
b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
|
|
if ldn.Languages != nil {
|
|
for _, v := range ldn.Languages.Language {
|
|
lang := v.Type
|
|
if lang == "root" {
|
|
// We prefer the data from "und"
|
|
// TODO: allow both the data for root and und somehow.
|
|
continue
|
|
}
|
|
tag := tagForm.MustParse(lang)
|
|
if tags.contains(tag) {
|
|
g.set(loc, tag.String(), v.Data())
|
|
}
|
|
}
|
|
}
|
|
})
|
|
b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
|
|
if ldn.Scripts != nil {
|
|
for _, v := range ldn.Scripts.Script {
|
|
code := language.MustParseScript(v.Type)
|
|
if code.IsPrivateUse() { // Qaaa..Qabx
|
|
// TODO: data currently appears to be very meager.
|
|
// Reconsider if we have data for English.
|
|
if loc == language.English {
|
|
log.Fatal("Consider including data for private use scripts.")
|
|
}
|
|
continue
|
|
}
|
|
g.set(loc, code.String(), v.Data())
|
|
}
|
|
}
|
|
})
|
|
b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
|
|
if ldn.Territories != nil {
|
|
for _, v := range ldn.Territories.Territory {
|
|
g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
|
|
}
|
|
}
|
|
})
|
|
|
|
b.makeSupported()
|
|
|
|
b.writeParents()
|
|
|
|
b.writeGroup("lang")
|
|
b.writeGroup("script")
|
|
b.writeGroup("region")
|
|
|
|
b.w.WriteConst("numSupported", len(b.supported))
|
|
buf := bytes.Buffer{}
|
|
for _, tag := range b.supported {
|
|
fmt.Fprint(&buf, tag.String(), "|")
|
|
}
|
|
b.w.WriteConst("supported", buf.String())
|
|
|
|
b.writeDictionaries()
|
|
|
|
b.supported = []language.Tag{self}
|
|
|
|
// Compute the names of locales in their own language. Some of these names
|
|
// may be specified in their parent locales. We iterate the maximum depth
|
|
// of the parent three times to match successive parents of tags until a
|
|
// possible match is found.
|
|
for i := 0; i < 4; i++ {
|
|
b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
|
|
parent := tag
|
|
if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
|
|
parent, _ = language.Raw.Compose(b)
|
|
}
|
|
if ldn.Languages != nil {
|
|
for _, v := range ldn.Languages.Language {
|
|
key := tagForm.MustParse(v.Type)
|
|
saved := key
|
|
if key == parent {
|
|
g.set(self, tag.String(), v.Data())
|
|
}
|
|
for k := 0; k < i; k++ {
|
|
key = key.Parent()
|
|
}
|
|
if key == tag {
|
|
g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
b.writeGroup("self")
|
|
}
|
|
|
|
func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
|
|
b.sizeIndex = 0
|
|
b.sizeData = 0
|
|
b.toTags = nil
|
|
b.fromLocs = nil
|
|
b.toTagIndex = make(map[string]int)
|
|
|
|
g := b.group[name]
|
|
if g == nil {
|
|
g = &group{lang: make(map[language.Tag]keyValues)}
|
|
b.group[name] = g
|
|
}
|
|
for _, loc := range b.data.Locales() {
|
|
// We use RawLDML instead of LDML as we are managing our own inheritance
|
|
// in this implementation.
|
|
ldml := b.data.RawLDML(loc)
|
|
|
|
// We do not support the POSIX variant (it is not a supported BCP 47
|
|
// variant). This locale also doesn't happen to contain any data, so
|
|
// we'll skip it by checking for this.
|
|
tag, err := tagForm.Parse(loc)
|
|
if err != nil {
|
|
if ldml.LocaleDisplayNames != nil {
|
|
log.Fatalf("setData: %v", err)
|
|
}
|
|
continue
|
|
}
|
|
if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
|
|
f(g, tag, ldml.LocaleDisplayNames)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (b *builder) filter() {
|
|
filter := func(s *cldr.Slice) {
|
|
if *short {
|
|
s.SelectOnePerGroup("alt", []string{"short", ""})
|
|
} else {
|
|
s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
|
|
}
|
|
d, err := cldr.ParseDraft(*draft)
|
|
if err != nil {
|
|
log.Fatalf("filter: %v", err)
|
|
}
|
|
s.SelectDraft(d)
|
|
}
|
|
for _, loc := range b.data.Locales() {
|
|
if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
|
|
if ldn.Languages != nil {
|
|
s := cldr.MakeSlice(&ldn.Languages.Language)
|
|
if filter(&s); len(ldn.Languages.Language) == 0 {
|
|
ldn.Languages = nil
|
|
}
|
|
}
|
|
if ldn.Scripts != nil {
|
|
s := cldr.MakeSlice(&ldn.Scripts.Script)
|
|
if filter(&s); len(ldn.Scripts.Script) == 0 {
|
|
ldn.Scripts = nil
|
|
}
|
|
}
|
|
if ldn.Territories != nil {
|
|
s := cldr.MakeSlice(&ldn.Territories.Territory)
|
|
if filter(&s); len(ldn.Territories.Territory) == 0 {
|
|
ldn.Territories = nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// makeSupported creates a list of all supported locales.
|
|
func (b *builder) makeSupported() {
|
|
// tags across groups
|
|
for _, g := range b.group {
|
|
for t, _ := range g.lang {
|
|
b.supported = append(b.supported, t)
|
|
}
|
|
}
|
|
b.supported = b.supported[:unique(tagsSorter(b.supported))]
|
|
|
|
}
|
|
|
|
type tagsSorter []language.Tag
|
|
|
|
func (a tagsSorter) Len() int { return len(a) }
|
|
func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
|
|
|
|
func (b *builder) writeGroup(name string) {
|
|
g := b.group[name]
|
|
|
|
for _, kv := range g.lang {
|
|
for t, _ := range kv {
|
|
g.toTags = append(g.toTags, t)
|
|
}
|
|
}
|
|
g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
|
|
|
|
// Allocate header per supported value.
|
|
g.headers = make([]header, len(b.supported))
|
|
for i, sup := range b.supported {
|
|
kv, ok := g.lang[sup]
|
|
if !ok {
|
|
g.headers[i].tag = sup
|
|
continue
|
|
}
|
|
data := []byte{}
|
|
index := make([]uint16, len(g.toTags), len(g.toTags)+1)
|
|
for j, t := range g.toTags {
|
|
index[j] = uint16(len(data))
|
|
data = append(data, kv[t]...)
|
|
}
|
|
index = append(index, uint16(len(data)))
|
|
|
|
// Trim the tail of the index.
|
|
// TODO: indexes can be reduced in size quite a bit more.
|
|
n := len(index)
|
|
for ; n >= 2 && index[n-2] == index[n-1]; n-- {
|
|
}
|
|
index = index[:n]
|
|
|
|
// Workaround for a bug in CLDR 26.
|
|
// See http://unicode.org/cldr/trac/ticket/8042.
|
|
if cldr.Version == "26" && sup.String() == "hsb" {
|
|
data = bytes.Replace(data, []byte{'"'}, nil, 1)
|
|
}
|
|
g.headers[i] = header{sup, string(data), index}
|
|
}
|
|
g.writeTable(b.w, name)
|
|
}
|
|
|
|
type tagsBySize []string
|
|
|
|
func (l tagsBySize) Len() int { return len(l) }
|
|
func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
|
|
func (l tagsBySize) Less(i, j int) bool {
|
|
a, b := l[i], l[j]
|
|
// Sort single-tag entries based on size first. Otherwise alphabetic.
|
|
if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
|
|
return len(a) < len(b)
|
|
}
|
|
return a < b
|
|
}
|
|
|
|
// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
|
|
// of tags[i].
|
|
func parentIndices(tags []language.Tag) []int16 {
|
|
index := make(map[language.Tag]int16)
|
|
for i, t := range tags {
|
|
index[t] = int16(i)
|
|
}
|
|
|
|
// Construct default parents.
|
|
parents := make([]int16, len(tags))
|
|
for i, t := range tags {
|
|
parents[i] = -1
|
|
for t = t.Parent(); t != language.Und; t = t.Parent() {
|
|
if j, ok := index[t]; ok {
|
|
parents[i] = j
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return parents
|
|
}
|
|
|
|
func (b *builder) writeParents() {
|
|
parents := parentIndices(b.supported)
|
|
fmt.Fprintf(b.w, "var parents = ")
|
|
b.w.WriteArray(parents)
|
|
}
|
|
|
|
// writeKeys writes keys to a special index used by the display package.
|
|
// tags are assumed to be sorted by length.
|
|
func writeKeys(w *gen.CodeWriter, name string, keys []string) {
|
|
w.Size += int(3 * reflect.TypeOf("").Size())
|
|
w.WriteComment("Number of keys: %d", len(keys))
|
|
fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
|
|
for i := 2; i <= 4; i++ {
|
|
sub := []string{}
|
|
for _, t := range keys {
|
|
if len(t) != i {
|
|
break
|
|
}
|
|
sub = append(sub, t)
|
|
}
|
|
s := strings.Join(sub, "")
|
|
w.WriteString(s)
|
|
fmt.Fprintf(w, ",\n")
|
|
keys = keys[len(sub):]
|
|
}
|
|
fmt.Fprintln(w, "\t}")
|
|
if len(keys) > 0 {
|
|
w.Size += int(reflect.TypeOf([]string{}).Size())
|
|
fmt.Fprintf(w, "\t%sTagsLong = ", name)
|
|
w.WriteSlice(keys)
|
|
}
|
|
fmt.Fprintln(w, ")\n")
|
|
}
|
|
|
|
// identifier creates an identifier from the given tag.
|
|
func identifier(t language.Tag) string {
|
|
return strings.Replace(t.String(), "-", "", -1)
|
|
}
|
|
|
|
func (h *header) writeEntry(w *gen.CodeWriter, name string) {
|
|
if len(dict) > 0 && dict.contains(h.tag) {
|
|
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
|
|
fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
|
|
fmt.Fprintln(w, "\t},")
|
|
} else if len(h.data) == 0 {
|
|
fmt.Fprintln(w, "\t\t{}, //", h.tag)
|
|
} else {
|
|
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
|
|
w.WriteString(h.data)
|
|
fmt.Fprintln(w, ",")
|
|
w.WriteSlice(h.index)
|
|
fmt.Fprintln(w, ",\n\t},")
|
|
}
|
|
}
|
|
|
|
// write the data for the given header as single entries. The size for this data
|
|
// was already accounted for in writeEntry.
|
|
func (h *header) writeSingle(w *gen.CodeWriter, name string) {
|
|
if len(dict) > 0 && dict.contains(h.tag) {
|
|
tag := identifier(h.tag)
|
|
w.WriteConst(tag+name+"Str", h.data)
|
|
|
|
// Note that we create a slice instead of an array. If we use an array
|
|
// we need to refer to it as a[:] in other tables, which will cause the
|
|
// array to always be included by the linker. See Issue 7651.
|
|
w.WriteVar(tag+name+"Idx", h.index)
|
|
}
|
|
}
|
|
|
|
// WriteTable writes an entry for a single Namer.
|
|
func (g *group) writeTable(w *gen.CodeWriter, name string) {
|
|
start := w.Size
|
|
writeKeys(w, name, g.toTags)
|
|
w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
|
|
|
|
fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
|
|
|
|
title := strings.Title(name)
|
|
for _, h := range g.headers {
|
|
h.writeEntry(w, title)
|
|
}
|
|
fmt.Fprintln(w, "}\n")
|
|
|
|
for _, h := range g.headers {
|
|
h.writeSingle(w, title)
|
|
}
|
|
n := w.Size - start
|
|
fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
|
|
}
|
|
|
|
func (b *builder) writeDictionaries() {
|
|
fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
|
|
fmt.Fprintln(b.w, "var (")
|
|
parents := parentIndices(b.supported)
|
|
|
|
for i, t := range b.supported {
|
|
if dict.contains(t) {
|
|
ident := identifier(t)
|
|
fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
|
|
if p := parents[i]; p == -1 {
|
|
fmt.Fprintln(b.w, "\t\tnil,")
|
|
} else {
|
|
fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
|
|
}
|
|
fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
|
|
fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
|
|
fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
|
|
fmt.Fprintln(b.w, "\t}")
|
|
}
|
|
}
|
|
fmt.Fprintln(b.w, ")")
|
|
|
|
var s string
|
|
var a []uint16
|
|
sz := reflect.TypeOf(s).Size()
|
|
sz += reflect.TypeOf(a).Size()
|
|
sz *= 3
|
|
sz += reflect.TypeOf(&a).Size()
|
|
n := int(sz) * len(dict)
|
|
fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
|
|
|
|
b.w.Size += n
|
|
}
|
|
|
|
// unique sorts the given lists and removes duplicate entries by swapping them
|
|
// past position k, where k is the number of unique values. It returns k.
|
|
func unique(a sort.Interface) int {
|
|
if a.Len() == 0 {
|
|
return 0
|
|
}
|
|
sort.Sort(a)
|
|
k := 1
|
|
for i := 1; i < a.Len(); i++ {
|
|
if a.Less(k-1, i) {
|
|
if k != i {
|
|
a.Swap(k, i)
|
|
}
|
|
k++
|
|
}
|
|
}
|
|
return k
|
|
}
|