163 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
		
		
			
		
	
	
			163 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
|   | // Copyright 2015 The Go Authors. All rights reserved. | ||
|  | // Use of this source code is governed by a BSD-style | ||
|  | // license that can be found in the LICENSE file. | ||
|  | 
 | ||
|  | // +build ignore | ||
|  | 
 | ||
|  | package main | ||
|  | 
 | ||
|  | // This file generates derivative tables based on the language package itself. | ||
|  | 
 | ||
|  | import ( | ||
|  | 	"bytes" | ||
|  | 	"flag" | ||
|  | 	"fmt" | ||
|  | 	"io/ioutil" | ||
|  | 	"log" | ||
|  | 	"reflect" | ||
|  | 	"sort" | ||
|  | 	"strings" | ||
|  | 
 | ||
|  | 	"golang.org/x/text/internal/gen" | ||
|  | 	"golang.org/x/text/language" | ||
|  | 	"golang.org/x/text/unicode/cldr" | ||
|  | ) | ||
|  | 
 | ||
|  | var ( | ||
|  | 	test = flag.Bool("test", false, | ||
|  | 		"test existing tables; can be used to compare web data with package data.") | ||
|  | 
 | ||
|  | 	draft = flag.String("draft", | ||
|  | 		"contributed", | ||
|  | 		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`) | ||
|  | ) | ||
|  | 
 | ||
|  | func main() { | ||
|  | 	gen.Init() | ||
|  | 
 | ||
|  | 	// Read the CLDR zip file. | ||
|  | 	r := gen.OpenCLDRCoreZip() | ||
|  | 	defer r.Close() | ||
|  | 
 | ||
|  | 	d := &cldr.Decoder{} | ||
|  | 	data, err := d.DecodeZip(r) | ||
|  | 	if err != nil { | ||
|  | 		log.Fatalf("DecodeZip: %v", err) | ||
|  | 	} | ||
|  | 
 | ||
|  | 	w := gen.NewCodeWriter() | ||
|  | 	defer func() { | ||
|  | 		buf := &bytes.Buffer{} | ||
|  | 
 | ||
|  | 		if _, err = w.WriteGo(buf, "language"); err != nil { | ||
|  | 			log.Fatalf("Error formatting file index.go: %v", err) | ||
|  | 		} | ||
|  | 
 | ||
|  | 		// Since we're generating a table for our own package we need to rewrite | ||
|  | 		// doing the equivalent of go fmt -r 'language.b -> b'. Using | ||
|  | 		// bytes.Replace will do. | ||
|  | 		out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1) | ||
|  | 		if err := ioutil.WriteFile("index.go", out, 0600); err != nil { | ||
|  | 			log.Fatalf("Could not create file index.go: %v", err) | ||
|  | 		} | ||
|  | 	}() | ||
|  | 
 | ||
|  | 	m := map[language.Tag]bool{} | ||
|  | 	for _, lang := range data.Locales() { | ||
|  | 		// We include all locales unconditionally to be consistent with en_US. | ||
|  | 		// We want en_US, even though it has no data associated with it. | ||
|  | 
 | ||
|  | 		// TODO: put any of the languages for which no data exists at the end | ||
|  | 		// of the index. This allows all components based on ICU to use that | ||
|  | 		// as the cutoff point. | ||
|  | 		// if x := data.RawLDML(lang); false || | ||
|  | 		// 	x.LocaleDisplayNames != nil || | ||
|  | 		// 	x.Characters != nil || | ||
|  | 		// 	x.Delimiters != nil || | ||
|  | 		// 	x.Measurement != nil || | ||
|  | 		// 	x.Dates != nil || | ||
|  | 		// 	x.Numbers != nil || | ||
|  | 		// 	x.Units != nil || | ||
|  | 		// 	x.ListPatterns != nil || | ||
|  | 		// 	x.Collations != nil || | ||
|  | 		// 	x.Segmentations != nil || | ||
|  | 		// 	x.Rbnf != nil || | ||
|  | 		// 	x.Annotations != nil || | ||
|  | 		// 	x.Metadata != nil { | ||
|  | 
 | ||
|  | 		// TODO: support POSIX natively, albeit non-standard. | ||
|  | 		tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) | ||
|  | 		m[tag] = true | ||
|  | 		// } | ||
|  | 	} | ||
|  | 	// Include locales for plural rules, which uses a different structure. | ||
|  | 	for _, plurals := range data.Supplemental().Plurals { | ||
|  | 		for _, rules := range plurals.PluralRules { | ||
|  | 			for _, lang := range strings.Split(rules.Locales, " ") { | ||
|  | 				m[language.Make(lang)] = true | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	var core, special []language.Tag | ||
|  | 
 | ||
|  | 	for t := range m { | ||
|  | 		if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { | ||
|  | 			log.Fatalf("Unexpected extension %v in %v", x, t) | ||
|  | 		} | ||
|  | 		if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { | ||
|  | 			core = append(core, t) | ||
|  | 		} else { | ||
|  | 			special = append(special, t) | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	w.WriteComment(` | ||
|  | 	NumCompactTags is the number of common tags. The maximum tag is | ||
|  | 	NumCompactTags-1.`) | ||
|  | 	w.WriteConst("NumCompactTags", len(core)+len(special)) | ||
|  | 
 | ||
|  | 	sort.Sort(byAlpha(special)) | ||
|  | 	w.WriteVar("specialTags", special) | ||
|  | 
 | ||
|  | 	// TODO: order by frequency? | ||
|  | 	sort.Sort(byAlpha(core)) | ||
|  | 
 | ||
|  | 	// Size computations are just an estimate. | ||
|  | 	w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size()) | ||
|  | 	w.Size += len(core) * 6 // size of uint32 and uint16 | ||
|  | 
 | ||
|  | 	fmt.Fprintln(w) | ||
|  | 	fmt.Fprintln(w, "var coreTags = map[uint32]uint16{") | ||
|  | 	fmt.Fprintln(w, "0x0: 0, // und") | ||
|  | 	i := len(special) + 1 // Und and special tags already written. | ||
|  | 	for _, t := range core { | ||
|  | 		if t == language.Und { | ||
|  | 			continue | ||
|  | 		} | ||
|  | 		fmt.Fprint(w.Hash, t, i) | ||
|  | 		b, s, r := t.Raw() | ||
|  | 		fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n", | ||
|  | 			getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number | ||
|  | 			getIndex(s, 2), | ||
|  | 			getIndex(r, 3), | ||
|  | 			i, t) | ||
|  | 		i++ | ||
|  | 	} | ||
|  | 	fmt.Fprintln(w, "}") | ||
|  | } | ||
|  | 
 | ||
|  | // getIndex prints the subtag type and extracts its index of size nibble. | ||
|  | // If the index is less than n nibbles, the result is prefixed with 0s. | ||
|  | func getIndex(x interface{}, n int) string { | ||
|  | 	s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00} | ||
|  | 	s = s[strings.Index(s, "0x")+2 : len(s)-1] | ||
|  | 	return strings.Repeat("0", n-len(s)) + s | ||
|  | } | ||
|  | 
 | ||
|  | type byAlpha []language.Tag | ||
|  | 
 | ||
|  | func (a byAlpha) Len() int           { return len(a) } | ||
|  | func (a byAlpha) Swap(i, j int)      { a[i], a[j] = a[j], a[i] } | ||
|  | func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() } |