| 
									
										
										
										
											2016-02-11 16:16:52 +02:00
										 |  |  | // Copyright 2013 The Go Authors. All rights reserved. | 
					
						
							|  |  |  | // Use of this source code is governed by a BSD-style | 
					
						
							|  |  |  | // license that can be found in the LICENSE file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // +build ignore | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Language tag table generator. | 
					
						
							|  |  |  | // Data read from the web. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package main | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bufio" | 
					
						
							|  |  |  | 	"flag" | 
					
						
							|  |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"io" | 
					
						
							|  |  |  | 	"io/ioutil" | 
					
						
							|  |  |  | 	"log" | 
					
						
							|  |  |  | 	"math" | 
					
						
							|  |  |  | 	"reflect" | 
					
						
							|  |  |  | 	"regexp" | 
					
						
							|  |  |  | 	"sort" | 
					
						
							|  |  |  | 	"strconv" | 
					
						
							|  |  |  | 	"strings" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"golang.org/x/text/internal/gen" | 
					
						
							|  |  |  | 	"golang.org/x/text/internal/tag" | 
					
						
							|  |  |  | 	"golang.org/x/text/unicode/cldr" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var ( | 
					
						
							|  |  |  | 	test = flag.Bool("test", | 
					
						
							|  |  |  | 		false, | 
					
						
							|  |  |  | 		"test existing tables; can be used to compare web data with package data.") | 
					
						
							|  |  |  | 	outputFile = flag.String("output", | 
					
						
							|  |  |  | 		"tables.go", | 
					
						
							|  |  |  | 		"output file for generated tables") | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var comment = []string{ | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | lang holds an alphabetically sorted list of ISO-639 language identifiers. | 
					
						
							|  |  |  | All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. | 
					
						
							|  |  |  | For 2-byte language identifiers, the two successive bytes have the following meaning: | 
					
						
							|  |  |  |     - if the first letter of the 2- and 3-letter ISO codes are the same: | 
					
						
							|  |  |  |       the second and third letter of the 3-letter ISO code. | 
					
						
							|  |  |  |     - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. | 
					
						
							|  |  |  | For 3-byte language identifiers the 4th byte is 0.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | langNoIndex is a bit vector of all 3-letter language codes that are not used as an index | 
					
						
							|  |  |  | in lookup tables. The language ids for these language codes are derived directly | 
					
						
							|  |  |  | from the letters and are not consecutive.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives | 
					
						
							|  |  |  | to 2-letter language codes that cannot be derived using the method described above. | 
					
						
							|  |  |  | Each 3-letter code is followed by its 1-byte langID.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | altLangIndex is used to convert indexes in altLangISO3 to langIDs.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | langAliasMap maps langIDs to their suggested replacements.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | script is an alphabetically sorted list of ISO 15924 codes. The index | 
					
						
							|  |  |  | of the script in the string, divided by 4, is the internal scriptID.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | isoRegionOffset needs to be added to the index of regionISO to obtain the regionID | 
					
						
							|  |  |  | for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for | 
					
						
							|  |  |  | the UN.M49 codes used for groups.)`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | regionISO holds a list of alphabetically sorted 2-letter ISO region codes. | 
					
						
							|  |  |  | Each 2-letter codes is followed by two bytes with the following meaning: | 
					
						
							|  |  |  |     - [A-Z}{2}: the first letter of the 2-letter code plus these two  | 
					
						
							|  |  |  |                 letters form the 3-letter ISO code. | 
					
						
							|  |  |  |     - 0, n:     index into altRegionISO3.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | regionTypes defines the status of a region for various standards.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are | 
					
						
							|  |  |  | codes indicating collections of regions.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | m49Index gives indexes into fromM49 based on the three most significant bits | 
					
						
							|  |  |  | of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in | 
					
						
							|  |  |  |    fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] | 
					
						
							|  |  |  | for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. | 
					
						
							|  |  |  | The region code is stored in the 9 lsb of the indexed value.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | altRegionISO3 holds a list of 3-letter region codes that cannot be | 
					
						
							|  |  |  | mapped to 2-letter codes using the default algorithm. This is a short list.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | altRegionIDs holds a list of regionIDs the positions of which match those | 
					
						
							|  |  |  | of the 3-letter ISO codes in altRegionISO3.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | variantNumSpecialized is the number of specialized variants in variants.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | suppressScript is an index from langID to the dominant script for that language, | 
					
						
							|  |  |  | if it exists.  If a script is given, it should be suppressed from the language tag.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | likelyLang is a lookup table, indexed by langID, for the most likely | 
					
						
							|  |  |  | scripts and regions given incomplete information. If more entries exist for a | 
					
						
							|  |  |  | given language, region and script are the index and size respectively | 
					
						
							|  |  |  | of the list in likelyLangList.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | likelyLangList holds lists info associated with likelyLang.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | likelyRegion is a lookup table, indexed by regionID, for the most likely | 
					
						
							|  |  |  | languages and scripts given incomplete information. If more entries exist | 
					
						
							|  |  |  | for a given regionID, lang and script are the index and size respectively | 
					
						
							|  |  |  | of the list in likelyRegionList. | 
					
						
							|  |  |  | TODO: exclude containers and user-definable regions from the list.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | likelyRegionList holds lists info associated with likelyRegion.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | likelyScript is a lookup table, indexed by scriptID, for the most likely | 
					
						
							|  |  |  | languages and regions given a script.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | matchLang holds pairs of langIDs of base languages that are typically | 
					
						
							|  |  |  | mutually intelligible. Each pair is associated with a confidence and | 
					
						
							|  |  |  | whether the intelligibility goes one or both ways.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | matchScript holds pairs of scriptIDs where readers of one script | 
					
						
							|  |  |  | can typically also read the other. Each is associated with a confidence.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | nRegionGroups is the number of region groups.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | regionInclusion maps region identifiers to sets of regions in regionInclusionBits, | 
					
						
							|  |  |  | where each set holds all groupings that are directly connected in a region | 
					
						
							|  |  |  | containment graph.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | regionInclusionBits is an array of bit vectors where every vector represents | 
					
						
							|  |  |  | a set of region groupings.  These sets are used to compute the distance | 
					
						
							|  |  |  | between two regions for the purpose of language matching.`, | 
					
						
							|  |  |  | 	` | 
					
						
							|  |  |  | regionInclusionNext marks, for each entry in regionInclusionBits, the set of | 
					
						
							|  |  |  | all groups that are reachable from the groups set in the respective entry.`, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // TODO: consider changing some of these structures to tries. This can reduce | 
					
						
							|  |  |  | // memory, but may increase the need for memory allocations. This could be | 
					
						
							|  |  |  | // mitigated if we can piggyback on language tags for common cases. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func failOnError(e error) { | 
					
						
							|  |  |  | 	if e != nil { | 
					
						
							|  |  |  | 		log.Panic(e) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type setType int | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	Indexed setType = 1 + iota // all elements must be of same size | 
					
						
							|  |  |  | 	Linear | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type stringSet struct { | 
					
						
							|  |  |  | 	s              []string | 
					
						
							|  |  |  | 	sorted, frozen bool | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// We often need to update values after the creation of an index is completed. | 
					
						
							|  |  |  | 	// We include a convenience map for keeping track of this. | 
					
						
							|  |  |  | 	update map[string]string | 
					
						
							|  |  |  | 	typ    setType // used for checking. | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) clone() stringSet { | 
					
						
							|  |  |  | 	c := *ss | 
					
						
							|  |  |  | 	c.s = append([]string(nil), c.s...) | 
					
						
							|  |  |  | 	return c | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) setType(t setType) { | 
					
						
							|  |  |  | 	if ss.typ != t && ss.typ != 0 { | 
					
						
							|  |  |  | 		log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // parse parses a whitespace-separated string and initializes ss with its | 
					
						
							|  |  |  | // components. | 
					
						
							|  |  |  | func (ss *stringSet) parse(s string) { | 
					
						
							|  |  |  | 	scan := bufio.NewScanner(strings.NewReader(s)) | 
					
						
							|  |  |  | 	scan.Split(bufio.ScanWords) | 
					
						
							|  |  |  | 	for scan.Scan() { | 
					
						
							|  |  |  | 		ss.add(scan.Text()) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) assertChangeable() { | 
					
						
							|  |  |  | 	if ss.frozen { | 
					
						
							|  |  |  | 		log.Panic("attempt to modify a frozen stringSet") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) add(s string) { | 
					
						
							|  |  |  | 	ss.assertChangeable() | 
					
						
							|  |  |  | 	ss.s = append(ss.s, s) | 
					
						
							|  |  |  | 	ss.sorted = ss.frozen | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) freeze() { | 
					
						
							|  |  |  | 	ss.compact() | 
					
						
							|  |  |  | 	ss.frozen = true | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) compact() { | 
					
						
							|  |  |  | 	if ss.sorted { | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	a := ss.s | 
					
						
							|  |  |  | 	sort.Strings(a) | 
					
						
							|  |  |  | 	k := 0 | 
					
						
							|  |  |  | 	for i := 1; i < len(a); i++ { | 
					
						
							|  |  |  | 		if a[k] != a[i] { | 
					
						
							|  |  |  | 			a[k+1] = a[i] | 
					
						
							|  |  |  | 			k++ | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	ss.s = a[:k+1] | 
					
						
							|  |  |  | 	ss.sorted = ss.frozen | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type funcSorter struct { | 
					
						
							|  |  |  | 	fn func(a, b string) bool | 
					
						
							|  |  |  | 	sort.StringSlice | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (s funcSorter) Less(i, j int) bool { | 
					
						
							|  |  |  | 	return s.fn(s.StringSlice[i], s.StringSlice[j]) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) sortFunc(f func(a, b string) bool) { | 
					
						
							|  |  |  | 	ss.compact() | 
					
						
							|  |  |  | 	sort.Sort(funcSorter{f, sort.StringSlice(ss.s)}) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) remove(s string) { | 
					
						
							|  |  |  | 	ss.assertChangeable() | 
					
						
							|  |  |  | 	if i, ok := ss.find(s); ok { | 
					
						
							|  |  |  | 		copy(ss.s[i:], ss.s[i+1:]) | 
					
						
							|  |  |  | 		ss.s = ss.s[:len(ss.s)-1] | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) replace(ol, nu string) { | 
					
						
							|  |  |  | 	ss.s[ss.index(ol)] = nu | 
					
						
							|  |  |  | 	ss.sorted = ss.frozen | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) index(s string) int { | 
					
						
							|  |  |  | 	ss.setType(Indexed) | 
					
						
							|  |  |  | 	i, ok := ss.find(s) | 
					
						
							|  |  |  | 	if !ok { | 
					
						
							|  |  |  | 		if i < len(ss.s) { | 
					
						
							|  |  |  | 			log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i]) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		log.Panicf("find: item %q is not in list", s) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return i | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) find(s string) (int, bool) { | 
					
						
							|  |  |  | 	ss.compact() | 
					
						
							|  |  |  | 	i := sort.SearchStrings(ss.s, s) | 
					
						
							|  |  |  | 	return i, i != len(ss.s) && ss.s[i] == s | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) slice() []string { | 
					
						
							|  |  |  | 	ss.compact() | 
					
						
							|  |  |  | 	return ss.s | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (ss *stringSet) updateLater(v, key string) { | 
					
						
							|  |  |  | 	if ss.update == nil { | 
					
						
							|  |  |  | 		ss.update = map[string]string{} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	ss.update[v] = key | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // join joins the string and ensures that all entries are of the same length. | 
					
						
							|  |  |  | func (ss *stringSet) join() string { | 
					
						
							|  |  |  | 	ss.setType(Indexed) | 
					
						
							|  |  |  | 	n := len(ss.s[0]) | 
					
						
							|  |  |  | 	for _, s := range ss.s { | 
					
						
							|  |  |  | 		if len(s) != n { | 
					
						
							|  |  |  | 			log.Panicf("join: not all entries are of the same length: %q", s) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	ss.s = append(ss.s, strings.Repeat("\xff", n)) | 
					
						
							|  |  |  | 	return strings.Join(ss.s, "") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // ianaEntry holds information for an entry in the IANA Language Subtag Repository. | 
					
						
							|  |  |  | // All types use the same entry. | 
					
						
							|  |  |  | // See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various | 
					
						
							|  |  |  | // fields. | 
					
						
							|  |  |  | type ianaEntry struct { | 
					
						
							|  |  |  | 	typ            string | 
					
						
							|  |  |  | 	description    []string | 
					
						
							|  |  |  | 	scope          string | 
					
						
							|  |  |  | 	added          string | 
					
						
							|  |  |  | 	preferred      string | 
					
						
							|  |  |  | 	deprecated     string | 
					
						
							|  |  |  | 	suppressScript string | 
					
						
							|  |  |  | 	macro          string | 
					
						
							|  |  |  | 	prefix         []string | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type builder struct { | 
					
						
							|  |  |  | 	w    *gen.CodeWriter | 
					
						
							|  |  |  | 	hw   io.Writer // MultiWriter for w and w.Hash | 
					
						
							|  |  |  | 	data *cldr.CLDR | 
					
						
							|  |  |  | 	supp *cldr.SupplementalData | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// indices | 
					
						
							|  |  |  | 	locale      stringSet // common locales | 
					
						
							|  |  |  | 	lang        stringSet // canonical language ids (2 or 3 letter ISO codes) with data | 
					
						
							|  |  |  | 	langNoIndex stringSet // 3-letter ISO codes with no associated data | 
					
						
							|  |  |  | 	script      stringSet // 4-letter ISO codes | 
					
						
							|  |  |  | 	region      stringSet // 2-letter ISO or 3-digit UN M49 codes | 
					
						
							|  |  |  | 	variant     stringSet // 4-8-alphanumeric variant code. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Region codes that are groups with their corresponding group IDs. | 
					
						
							|  |  |  | 	groups map[int]index | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// langInfo | 
					
						
							|  |  |  | 	registry map[string]*ianaEntry | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type index uint | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newBuilder(w *gen.CodeWriter) *builder { | 
					
						
							|  |  |  | 	r := gen.OpenCLDRCoreZip() | 
					
						
							|  |  |  | 	defer r.Close() | 
					
						
							|  |  |  | 	d := &cldr.Decoder{} | 
					
						
							|  |  |  | 	data, err := d.DecodeZip(r) | 
					
						
							|  |  |  | 	failOnError(err) | 
					
						
							|  |  |  | 	b := builder{ | 
					
						
							|  |  |  | 		w:    w, | 
					
						
							|  |  |  | 		hw:   io.MultiWriter(w, w.Hash), | 
					
						
							|  |  |  | 		data: data, | 
					
						
							|  |  |  | 		supp: data.Supplemental(), | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.parseRegistry() | 
					
						
							|  |  |  | 	return &b | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) parseRegistry() { | 
					
						
							|  |  |  | 	r := gen.OpenIANAFile("assignments/language-subtag-registry") | 
					
						
							|  |  |  | 	defer r.Close() | 
					
						
							|  |  |  | 	b.registry = make(map[string]*ianaEntry) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	scan := bufio.NewScanner(r) | 
					
						
							|  |  |  | 	scan.Split(bufio.ScanWords) | 
					
						
							|  |  |  | 	var record *ianaEntry | 
					
						
							|  |  |  | 	for more := scan.Scan(); more; { | 
					
						
							|  |  |  | 		key := scan.Text() | 
					
						
							|  |  |  | 		more = scan.Scan() | 
					
						
							|  |  |  | 		value := scan.Text() | 
					
						
							|  |  |  | 		switch key { | 
					
						
							|  |  |  | 		case "Type:": | 
					
						
							|  |  |  | 			record = &ianaEntry{typ: value} | 
					
						
							|  |  |  | 		case "Subtag:", "Tag:": | 
					
						
							|  |  |  | 			if s := strings.SplitN(value, "..", 2); len(s) > 1 { | 
					
						
							|  |  |  | 				for a := s[0]; a <= s[1]; a = inc(a) { | 
					
						
							|  |  |  | 					b.addToRegistry(a, record) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				b.addToRegistry(value, record) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		case "Suppress-Script:": | 
					
						
							|  |  |  | 			record.suppressScript = value | 
					
						
							|  |  |  | 		case "Added:": | 
					
						
							|  |  |  | 			record.added = value | 
					
						
							|  |  |  | 		case "Deprecated:": | 
					
						
							|  |  |  | 			record.deprecated = value | 
					
						
							|  |  |  | 		case "Macrolanguage:": | 
					
						
							|  |  |  | 			record.macro = value | 
					
						
							|  |  |  | 		case "Preferred-Value:": | 
					
						
							|  |  |  | 			record.preferred = value | 
					
						
							|  |  |  | 		case "Prefix:": | 
					
						
							|  |  |  | 			record.prefix = append(record.prefix, value) | 
					
						
							|  |  |  | 		case "Scope:": | 
					
						
							|  |  |  | 			record.scope = value | 
					
						
							|  |  |  | 		case "Description:": | 
					
						
							|  |  |  | 			buf := []byte(value) | 
					
						
							|  |  |  | 			for more = scan.Scan(); more; more = scan.Scan() { | 
					
						
							|  |  |  | 				b := scan.Bytes() | 
					
						
							|  |  |  | 				if b[0] == '%' || b[len(b)-1] == ':' { | 
					
						
							|  |  |  | 					break | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				buf = append(buf, ' ') | 
					
						
							|  |  |  | 				buf = append(buf, b...) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			record.description = append(record.description, string(buf)) | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		default: | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		more = scan.Scan() | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if scan.Err() != nil { | 
					
						
							|  |  |  | 		log.Panic(scan.Err()) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) addToRegistry(key string, entry *ianaEntry) { | 
					
						
							|  |  |  | 	if info, ok := b.registry[key]; ok { | 
					
						
							|  |  |  | 		if info.typ != "language" || entry.typ != "extlang" { | 
					
						
							|  |  |  | 			log.Fatalf("parseRegistry: tag %q already exists", key) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		b.registry[key] = entry | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var commentIndex = make(map[string]string) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func init() { | 
					
						
							|  |  |  | 	for _, s := range comment { | 
					
						
							|  |  |  | 		key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0]) | 
					
						
							|  |  |  | 		commentIndex[key] = s | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) comment(name string) { | 
					
						
							|  |  |  | 	if s := commentIndex[name]; len(s) > 0 { | 
					
						
							|  |  |  | 		b.w.WriteComment(s) | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		fmt.Fprintln(b.w) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) pf(f string, x ...interface{}) { | 
					
						
							|  |  |  | 	fmt.Fprintf(b.hw, f, x...) | 
					
						
							|  |  |  | 	fmt.Fprint(b.hw, "\n") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) p(x ...interface{}) { | 
					
						
							|  |  |  | 	fmt.Fprintln(b.hw, x...) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) addSize(s int) { | 
					
						
							|  |  |  | 	b.w.Size += s | 
					
						
							|  |  |  | 	b.pf("// Size: %d bytes", s) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeConst(name string, x interface{}) { | 
					
						
							|  |  |  | 	b.comment(name) | 
					
						
							|  |  |  | 	b.w.WriteConst(name, x) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // writeConsts computes f(v) for all v in values and writes the results | 
					
						
							|  |  |  | // as constants named _v to a single constant block. | 
					
						
							|  |  |  | func (b *builder) writeConsts(f func(string) int, values ...string) { | 
					
						
							|  |  |  | 	b.pf("const (") | 
					
						
							|  |  |  | 	for _, v := range values { | 
					
						
							|  |  |  | 		b.pf("\t_%s = %v", v, f(v)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.pf(")") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // writeType writes the type of the given value, which must be a struct. | 
					
						
							|  |  |  | func (b *builder) writeType(value interface{}) { | 
					
						
							|  |  |  | 	b.comment(reflect.TypeOf(value).Name()) | 
					
						
							|  |  |  | 	b.w.WriteType(value) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeSlice(name string, ss interface{}) { | 
					
						
							|  |  |  | 	b.writeSliceAddSize(name, 0, ss) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) { | 
					
						
							|  |  |  | 	b.comment(name) | 
					
						
							|  |  |  | 	b.w.Size += extraSize | 
					
						
							|  |  |  | 	v := reflect.ValueOf(ss) | 
					
						
							|  |  |  | 	t := v.Type().Elem() | 
					
						
							|  |  |  | 	b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	fmt.Fprintf(b.w, "var %s = ", name) | 
					
						
							|  |  |  | 	b.w.WriteArray(ss) | 
					
						
							|  |  |  | 	b.p() | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type fromTo struct { | 
					
						
							|  |  |  | 	from, to uint16 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) { | 
					
						
							|  |  |  | 	ss.sortFunc(func(a, b string) bool { | 
					
						
							|  |  |  | 		return index(a) < index(b) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	m := []fromTo{} | 
					
						
							|  |  |  | 	for _, s := range ss.s { | 
					
						
							|  |  |  | 		m = append(m, fromTo{index(s), index(ss.update[s])}) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice(name, m) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const base = 'z' - 'a' + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func strToInt(s string) uint { | 
					
						
							|  |  |  | 	v := uint(0) | 
					
						
							|  |  |  | 	for i := 0; i < len(s); i++ { | 
					
						
							|  |  |  | 		v *= base | 
					
						
							|  |  |  | 		v += uint(s[i] - 'a') | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return v | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // converts the given integer to the original ASCII string passed to strToInt. | 
					
						
							|  |  |  | // len(s) must match the number of characters obtained. | 
					
						
							|  |  |  | func intToStr(v uint, s []byte) { | 
					
						
							|  |  |  | 	for i := len(s) - 1; i >= 0; i-- { | 
					
						
							|  |  |  | 		s[i] = byte(v%base) + 'a' | 
					
						
							|  |  |  | 		v /= base | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeBitVector(name string, ss []string) { | 
					
						
							|  |  |  | 	vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8))) | 
					
						
							|  |  |  | 	for _, s := range ss { | 
					
						
							|  |  |  | 		v := strToInt(s) | 
					
						
							|  |  |  | 		vec[v/8] |= 1 << (v % 8) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice(name, vec) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // TODO: convert this type into a list or two-stage trie. | 
					
						
							|  |  |  | func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) { | 
					
						
							|  |  |  | 	b.comment(name) | 
					
						
							|  |  |  | 	v := reflect.ValueOf(m) | 
					
						
							|  |  |  | 	sz := v.Len() * (2 + int(v.Type().Key().Size())) | 
					
						
							|  |  |  | 	for _, k := range m { | 
					
						
							|  |  |  | 		sz += len(k) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.addSize(sz) | 
					
						
							|  |  |  | 	keys := []string{} | 
					
						
							|  |  |  | 	b.pf(`var %s = map[string]uint16{`, name) | 
					
						
							|  |  |  | 	for k := range m { | 
					
						
							|  |  |  | 		keys = append(keys, k) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	sort.Strings(keys) | 
					
						
							|  |  |  | 	for _, k := range keys { | 
					
						
							|  |  |  | 		b.pf("\t%q: %v,", k, f(m[k])) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.p("}") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeMap(name string, m interface{}) { | 
					
						
							|  |  |  | 	b.comment(name) | 
					
						
							|  |  |  | 	v := reflect.ValueOf(m) | 
					
						
							|  |  |  | 	sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size())) | 
					
						
							|  |  |  | 	b.addSize(sz) | 
					
						
							|  |  |  | 	f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool { | 
					
						
							|  |  |  | 		return strings.IndexRune("{}, ", r) != -1 | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	sort.Strings(f[1:]) | 
					
						
							|  |  |  | 	b.pf(`var %s = %s{`, name, f[0]) | 
					
						
							|  |  |  | 	for _, kv := range f[1:] { | 
					
						
							|  |  |  | 		b.pf("\t%s,", kv) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.p("}") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) langIndex(s string) uint16 { | 
					
						
							|  |  |  | 	if s == "und" { | 
					
						
							|  |  |  | 		return 0 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if i, ok := b.lang.find(s); ok { | 
					
						
							|  |  |  | 		return uint16(i) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return uint16(strToInt(s)) + uint16(len(b.lang.s)) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // inc advances the string to its lexicographical successor. | 
					
						
							|  |  |  | func inc(s string) string { | 
					
						
							|  |  |  | 	const maxTagLength = 4 | 
					
						
							|  |  |  | 	var buf [maxTagLength]byte | 
					
						
							|  |  |  | 	intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)]) | 
					
						
							|  |  |  | 	for i := 0; i < len(s); i++ { | 
					
						
							|  |  |  | 		if s[i] <= 'Z' { | 
					
						
							|  |  |  | 			buf[i] -= 'a' - 'A' | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return string(buf[:len(s)]) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) parseIndices() { | 
					
						
							|  |  |  | 	meta := b.supp.Metadata | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for k, v := range b.registry { | 
					
						
							|  |  |  | 		var ss *stringSet | 
					
						
							|  |  |  | 		switch v.typ { | 
					
						
							|  |  |  | 		case "language": | 
					
						
							|  |  |  | 			if len(k) == 2 || v.suppressScript != "" || v.scope == "special" { | 
					
						
							|  |  |  | 				b.lang.add(k) | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				ss = &b.langNoIndex | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		case "region": | 
					
						
							|  |  |  | 			ss = &b.region | 
					
						
							|  |  |  | 		case "script": | 
					
						
							|  |  |  | 			ss = &b.script | 
					
						
							|  |  |  | 		case "variant": | 
					
						
							|  |  |  | 			ss = &b.variant | 
					
						
							|  |  |  | 		default: | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		ss.add(k) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Include any language for which there is data. | 
					
						
							|  |  |  | 	for _, lang := range b.data.Locales() { | 
					
						
							|  |  |  | 		if x := b.data.RawLDML(lang); false || | 
					
						
							|  |  |  | 			x.LocaleDisplayNames != nil || | 
					
						
							|  |  |  | 			x.Characters != nil || | 
					
						
							|  |  |  | 			x.Delimiters != nil || | 
					
						
							|  |  |  | 			x.Measurement != nil || | 
					
						
							|  |  |  | 			x.Dates != nil || | 
					
						
							|  |  |  | 			x.Numbers != nil || | 
					
						
							|  |  |  | 			x.Units != nil || | 
					
						
							|  |  |  | 			x.ListPatterns != nil || | 
					
						
							|  |  |  | 			x.Collations != nil || | 
					
						
							|  |  |  | 			x.Segmentations != nil || | 
					
						
							|  |  |  | 			x.Rbnf != nil || | 
					
						
							|  |  |  | 			x.Annotations != nil || | 
					
						
							|  |  |  | 			x.Metadata != nil { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			from := strings.Split(lang, "_") | 
					
						
							|  |  |  | 			if lang := from[0]; lang != "root" { | 
					
						
							|  |  |  | 				b.lang.add(lang) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Include locales for plural rules, which uses a different structure. | 
					
						
							|  |  |  | 	for _, plurals := range b.data.Supplemental().Plurals { | 
					
						
							|  |  |  | 		for _, rules := range plurals.PluralRules { | 
					
						
							|  |  |  | 			for _, lang := range strings.Split(rules.Locales, " ") { | 
					
						
							|  |  |  | 				if lang = strings.Split(lang, "_")[0]; lang != "root" { | 
					
						
							|  |  |  | 					b.lang.add(lang) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Include languages in likely subtags. | 
					
						
							|  |  |  | 	for _, m := range b.supp.LikelySubtags.LikelySubtag { | 
					
						
							|  |  |  | 		from := strings.Split(m.From, "_") | 
					
						
							|  |  |  | 		b.lang.add(from[0]) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Include ISO-639 alpha-3 bibliographic entries. | 
					
						
							|  |  |  | 	for _, a := range meta.Alias.LanguageAlias { | 
					
						
							|  |  |  | 		if a.Reason == "bibliographic" { | 
					
						
							|  |  |  | 			b.langNoIndex.add(a.Type) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Include regions in territoryAlias (not all are in the IANA registry!) | 
					
						
							|  |  |  | 	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { | 
					
						
							|  |  |  | 		if len(reg.Type) == 2 { | 
					
						
							|  |  |  | 			b.region.add(reg.Type) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, s := range b.lang.s { | 
					
						
							|  |  |  | 		if len(s) == 3 { | 
					
						
							|  |  |  | 			b.langNoIndex.remove(s) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeConst("numLanguages", len(b.lang.slice())+len(b.langNoIndex.slice())) | 
					
						
							|  |  |  | 	b.writeConst("numScripts", len(b.script.slice())) | 
					
						
							|  |  |  | 	b.writeConst("numRegions", len(b.region.slice())) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Add dummy codes at the start of each list to represent "unspecified". | 
					
						
							|  |  |  | 	b.lang.add("---") | 
					
						
							|  |  |  | 	b.script.add("----") | 
					
						
							|  |  |  | 	b.region.add("---") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// common locales | 
					
						
							|  |  |  | 	b.locale.parse(meta.DefaultContent.Locales) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-28 20:05:01 +03:00
										 |  |  | // TODO: region inclusion data will probably not be use used in future matchers. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-11 16:16:52 +02:00
										 |  |  | func (b *builder) computeRegionGroups() { | 
					
						
							|  |  |  | 	b.groups = make(map[int]index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Create group indices. | 
					
						
							|  |  |  | 	for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID. | 
					
						
							|  |  |  | 		b.groups[i] = index(len(b.groups)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for _, g := range b.supp.TerritoryContainment.Group { | 
					
						
							| 
									
										
										
										
											2016-10-28 20:05:01 +03:00
										 |  |  | 		// Skip UN and EURO zone as they are flattening the containment | 
					
						
							|  |  |  | 		// relationship. | 
					
						
							|  |  |  | 		if g.Type == "EZ" || g.Type == "UN" { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2016-02-11 16:16:52 +02:00
										 |  |  | 		group := b.region.index(g.Type) | 
					
						
							|  |  |  | 		if _, ok := b.groups[group]; !ok { | 
					
						
							|  |  |  | 			b.groups[group] = index(len(b.groups)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if len(b.groups) > 32 { | 
					
						
							|  |  |  | 		log.Fatalf("only 32 groups supported, found %d", len(b.groups)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeConst("nRegionGroups", len(b.groups)) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var langConsts = []string{ | 
					
						
							|  |  |  | 	"af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es", | 
					
						
							|  |  |  | 	"et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is", | 
					
						
							|  |  |  | 	"it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml", | 
					
						
							|  |  |  | 	"mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt", | 
					
						
							|  |  |  | 	"ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th", | 
					
						
							|  |  |  | 	"tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu", | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// constants for grandfathered tags (if not already defined) | 
					
						
							|  |  |  | 	"jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu", | 
					
						
							|  |  |  | 	"nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // writeLanguage generates all tables needed for language canonicalization. | 
					
						
							|  |  |  | func (b *builder) writeLanguage() { | 
					
						
							|  |  |  | 	meta := b.supp.Metadata | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.writeConst("nonCanonicalUnd", b.lang.index("und")) | 
					
						
							|  |  |  | 	b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...) | 
					
						
							|  |  |  | 	b.writeConst("langPrivateStart", b.langIndex("qaa")) | 
					
						
							|  |  |  | 	b.writeConst("langPrivateEnd", b.langIndex("qtz")) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Get language codes that need to be mapped (overlong 3-letter codes, | 
					
						
							|  |  |  | 	// deprecated 2-letter codes, legacy and grandfathered tags.) | 
					
						
							|  |  |  | 	langAliasMap := stringSet{} | 
					
						
							|  |  |  | 	aliasTypeMap := map[string]langAliasType{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// altLangISO3 get the alternative ISO3 names that need to be mapped. | 
					
						
							|  |  |  | 	altLangISO3 := stringSet{} | 
					
						
							|  |  |  | 	// Add dummy start to avoid the use of index 0. | 
					
						
							|  |  |  | 	altLangISO3.add("---") | 
					
						
							|  |  |  | 	altLangISO3.updateLater("---", "aa") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	lang := b.lang.clone() | 
					
						
							|  |  |  | 	for _, a := range meta.Alias.LanguageAlias { | 
					
						
							|  |  |  | 		if a.Replacement == "" { | 
					
						
							|  |  |  | 			a.Replacement = "und" | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// TODO: support mapping to tags | 
					
						
							|  |  |  | 		repl := strings.SplitN(a.Replacement, "_", 2)[0] | 
					
						
							|  |  |  | 		if a.Reason == "overlong" { | 
					
						
							|  |  |  | 			if len(a.Replacement) == 2 && len(a.Type) == 3 { | 
					
						
							|  |  |  | 				lang.updateLater(a.Replacement, a.Type) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} else if len(a.Type) <= 3 { | 
					
						
							|  |  |  | 			switch a.Reason { | 
					
						
							|  |  |  | 			case "macrolanguage": | 
					
						
							|  |  |  | 				aliasTypeMap[a.Type] = langMacro | 
					
						
							|  |  |  | 			case "deprecated": | 
					
						
							|  |  |  | 				// handled elsewhere | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			case "bibliographic", "legacy": | 
					
						
							|  |  |  | 				if a.Type == "no" { | 
					
						
							|  |  |  | 					continue | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				aliasTypeMap[a.Type] = langLegacy | 
					
						
							|  |  |  | 			default: | 
					
						
							|  |  |  | 				log.Fatalf("new %s alias: %s", a.Reason, a.Type) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			langAliasMap.add(a.Type) | 
					
						
							|  |  |  | 			langAliasMap.updateLater(a.Type, repl) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Manually add the mapping of "nb" (Norwegian) to its macro language. | 
					
						
							|  |  |  | 	// This can be removed if CLDR adopts this change. | 
					
						
							|  |  |  | 	langAliasMap.add("nb") | 
					
						
							|  |  |  | 	langAliasMap.updateLater("nb", "no") | 
					
						
							|  |  |  | 	aliasTypeMap["nb"] = langMacro | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for k, v := range b.registry { | 
					
						
							|  |  |  | 		// Also add deprecated values for 3-letter ISO codes, which CLDR omits. | 
					
						
							|  |  |  | 		if v.typ == "language" && v.deprecated != "" && v.preferred != "" { | 
					
						
							|  |  |  | 			langAliasMap.add(k) | 
					
						
							|  |  |  | 			langAliasMap.updateLater(k, v.preferred) | 
					
						
							|  |  |  | 			aliasTypeMap[k] = langDeprecated | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Fix CLDR mappings. | 
					
						
							|  |  |  | 	lang.updateLater("tl", "tgl") | 
					
						
							|  |  |  | 	lang.updateLater("sh", "hbs") | 
					
						
							|  |  |  | 	lang.updateLater("mo", "mol") | 
					
						
							|  |  |  | 	lang.updateLater("no", "nor") | 
					
						
							|  |  |  | 	lang.updateLater("tw", "twi") | 
					
						
							|  |  |  | 	lang.updateLater("nb", "nob") | 
					
						
							|  |  |  | 	lang.updateLater("ak", "aka") | 
					
						
							| 
									
										
										
										
											2016-10-28 20:05:01 +03:00
										 |  |  | 	lang.updateLater("bh", "bih") | 
					
						
							| 
									
										
										
										
											2016-02-11 16:16:52 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// Ensure that each 2-letter code is matched with a 3-letter code. | 
					
						
							|  |  |  | 	for _, v := range lang.s[1:] { | 
					
						
							|  |  |  | 		s, ok := lang.update[v] | 
					
						
							|  |  |  | 		if !ok { | 
					
						
							|  |  |  | 			if s, ok = lang.update[langAliasMap.update[v]]; !ok { | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			lang.update[v] = s | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if v[0] != s[0] { | 
					
						
							|  |  |  | 			altLangISO3.add(s) | 
					
						
							|  |  |  | 			altLangISO3.updateLater(s, v) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Complete canonialized language tags. | 
					
						
							|  |  |  | 	lang.freeze() | 
					
						
							|  |  |  | 	for i, v := range lang.s { | 
					
						
							|  |  |  | 		// We can avoid these manual entries by using the IANI registry directly. | 
					
						
							|  |  |  | 		// Seems easier to update the list manually, as changes are rare. | 
					
						
							|  |  |  | 		// The panic in this loop will trigger if we miss an entry. | 
					
						
							|  |  |  | 		add := "" | 
					
						
							|  |  |  | 		if s, ok := lang.update[v]; ok { | 
					
						
							|  |  |  | 			if s[0] == v[0] { | 
					
						
							|  |  |  | 				add = s[1:] | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				add = string([]byte{0, byte(altLangISO3.index(s))}) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} else if len(v) == 3 { | 
					
						
							|  |  |  | 			add = "\x00" | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			log.Panicf("no data for long form of %q", v) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		lang.s[i] += add | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeConst("lang", tag.Index(lang.join())) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.writeConst("langNoIndexOffset", len(b.lang.s)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// space of all valid 3-letter language identifiers. | 
					
						
							|  |  |  | 	b.writeBitVector("langNoIndex", b.langNoIndex.slice()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	altLangIndex := []uint16{} | 
					
						
							|  |  |  | 	for i, s := range altLangISO3.slice() { | 
					
						
							|  |  |  | 		altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))}) | 
					
						
							|  |  |  | 		if i > 0 { | 
					
						
							|  |  |  | 			idx := b.lang.index(altLangISO3.update[s]) | 
					
						
							|  |  |  | 			altLangIndex = append(altLangIndex, uint16(idx)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeConst("altLangISO3", tag.Index(altLangISO3.join())) | 
					
						
							|  |  |  | 	b.writeSlice("altLangIndex", altLangIndex) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.writeSortedMap("langAliasMap", &langAliasMap, b.langIndex) | 
					
						
							|  |  |  | 	types := make([]langAliasType, len(langAliasMap.s)) | 
					
						
							|  |  |  | 	for i, s := range langAliasMap.s { | 
					
						
							|  |  |  | 		types[i] = aliasTypeMap[s] | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("langAliasTypes", types) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var scriptConsts = []string{ | 
					
						
							|  |  |  | 	"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy", | 
					
						
							|  |  |  | 	"Zzzz", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeScript() { | 
					
						
							|  |  |  | 	b.writeConsts(b.script.index, scriptConsts...) | 
					
						
							|  |  |  | 	b.writeConst("script", tag.Index(b.script.join())) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	supp := make([]uint8, len(b.lang.slice())) | 
					
						
							|  |  |  | 	for i, v := range b.lang.slice()[1:] { | 
					
						
							|  |  |  | 		if sc := b.registry[v].suppressScript; sc != "" { | 
					
						
							|  |  |  | 			supp[i+1] = uint8(b.script.index(sc)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("suppressScript", supp) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// There is only one deprecated script in CLDR. This value is hard-coded. | 
					
						
							|  |  |  | 	// We check here if the code must be updated. | 
					
						
							|  |  |  | 	for _, a := range b.supp.Metadata.Alias.ScriptAlias { | 
					
						
							|  |  |  | 		if a.Type != "Qaai" { | 
					
						
							|  |  |  | 			log.Panicf("unexpected deprecated stript %q", a.Type) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func parseM49(s string) int16 { | 
					
						
							|  |  |  | 	if len(s) == 0 { | 
					
						
							|  |  |  | 		return 0 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	v, err := strconv.ParseUint(s, 10, 10) | 
					
						
							|  |  |  | 	failOnError(err) | 
					
						
							|  |  |  | 	return int16(v) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var regionConsts = []string{ | 
					
						
							|  |  |  | 	"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US", | 
					
						
							|  |  |  | 	"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo. | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeRegion() { | 
					
						
							|  |  |  | 	b.writeConsts(b.region.index, regionConsts...) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	isoOffset := b.region.index("AA") | 
					
						
							|  |  |  | 	m49map := make([]int16, len(b.region.slice())) | 
					
						
							|  |  |  | 	fromM49map := make(map[int16]int) | 
					
						
							|  |  |  | 	altRegionISO3 := "" | 
					
						
							|  |  |  | 	altRegionIDs := []uint16{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.writeConst("isoRegionOffset", isoOffset) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// 2-letter region lookup and mapping to numeric codes. | 
					
						
							|  |  |  | 	regionISO := b.region.clone() | 
					
						
							|  |  |  | 	regionISO.s = regionISO.s[isoOffset:] | 
					
						
							|  |  |  | 	regionISO.sorted = false | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	regionTypes := make([]byte, len(b.region.s)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Is the region valid BCP 47? | 
					
						
							|  |  |  | 	for s, e := range b.registry { | 
					
						
							|  |  |  | 		if len(s) == 2 && s == strings.ToUpper(s) { | 
					
						
							|  |  |  | 			i := b.region.index(s) | 
					
						
							|  |  |  | 			for _, d := range e.description { | 
					
						
							|  |  |  | 				if strings.Contains(d, "Private use") { | 
					
						
							|  |  |  | 					regionTypes[i] = iso3166UserAssgined | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			regionTypes[i] |= bcp47Region | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Is the region a valid ccTLD? | 
					
						
							|  |  |  | 	r := gen.OpenIANAFile("domains/root/db") | 
					
						
							|  |  |  | 	defer r.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	buf, err := ioutil.ReadAll(r) | 
					
						
							|  |  |  | 	failOnError(err) | 
					
						
							|  |  |  | 	re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) | 
					
						
							|  |  |  | 	for _, m := range re.FindAllSubmatch(buf, -1) { | 
					
						
							|  |  |  | 		i := b.region.index(strings.ToUpper(string(m[1]))) | 
					
						
							|  |  |  | 		regionTypes[i] |= ccTLD | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.writeSlice("regionTypes", regionTypes) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	iso3Set := make(map[string]int) | 
					
						
							|  |  |  | 	update := func(iso2, iso3 string) { | 
					
						
							|  |  |  | 		i := regionISO.index(iso2) | 
					
						
							|  |  |  | 		if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] { | 
					
						
							|  |  |  | 			regionISO.s[i] += iso3[1:] | 
					
						
							|  |  |  | 			iso3Set[iso3] = -1 | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			if ok && j >= 0 { | 
					
						
							|  |  |  | 				regionISO.s[i] += string([]byte{0, byte(j)}) | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				iso3Set[iso3] = len(altRegionISO3) | 
					
						
							|  |  |  | 				regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))}) | 
					
						
							|  |  |  | 				altRegionISO3 += iso3 | 
					
						
							|  |  |  | 				altRegionIDs = append(altRegionIDs, uint16(isoOffset+i)) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for _, tc := range b.supp.CodeMappings.TerritoryCodes { | 
					
						
							|  |  |  | 		i := regionISO.index(tc.Type) + isoOffset | 
					
						
							|  |  |  | 		if d := m49map[i]; d != 0 { | 
					
						
							|  |  |  | 			log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		m49 := parseM49(tc.Numeric) | 
					
						
							|  |  |  | 		m49map[i] = m49 | 
					
						
							|  |  |  | 		if r := fromM49map[m49]; r == 0 { | 
					
						
							|  |  |  | 			fromM49map[m49] = i | 
					
						
							|  |  |  | 		} else if r != i { | 
					
						
							|  |  |  | 			dep := b.registry[regionISO.s[r-isoOffset]].deprecated | 
					
						
							|  |  |  | 			if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) { | 
					
						
							|  |  |  | 				fromM49map[m49] = i | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for _, ta := range b.supp.Metadata.Alias.TerritoryAlias { | 
					
						
							|  |  |  | 		if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 { | 
					
						
							|  |  |  | 			from := parseM49(ta.Type) | 
					
						
							|  |  |  | 			if r := fromM49map[from]; r == 0 { | 
					
						
							|  |  |  | 				fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for _, tc := range b.supp.CodeMappings.TerritoryCodes { | 
					
						
							|  |  |  | 		if len(tc.Alpha3) == 3 { | 
					
						
							|  |  |  | 			update(tc.Type, tc.Alpha3) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// This entries are not included in territoryCodes. Mostly 3-letter variants | 
					
						
							|  |  |  | 	// of deleted codes and an entry for QU. | 
					
						
							|  |  |  | 	for _, m := range []struct{ iso2, iso3 string }{ | 
					
						
							|  |  |  | 		{"CT", "CTE"}, | 
					
						
							|  |  |  | 		{"DY", "DHY"}, | 
					
						
							|  |  |  | 		{"HV", "HVO"}, | 
					
						
							|  |  |  | 		{"JT", "JTN"}, | 
					
						
							|  |  |  | 		{"MI", "MID"}, | 
					
						
							|  |  |  | 		{"NH", "NHB"}, | 
					
						
							|  |  |  | 		{"NQ", "ATN"}, | 
					
						
							|  |  |  | 		{"PC", "PCI"}, | 
					
						
							|  |  |  | 		{"PU", "PUS"}, | 
					
						
							|  |  |  | 		{"PZ", "PCZ"}, | 
					
						
							|  |  |  | 		{"RH", "RHO"}, | 
					
						
							|  |  |  | 		{"VD", "VDR"}, | 
					
						
							|  |  |  | 		{"WK", "WAK"}, | 
					
						
							|  |  |  | 		// These three-letter codes are used for others as well. | 
					
						
							|  |  |  | 		{"FQ", "ATF"}, | 
					
						
							|  |  |  | 	} { | 
					
						
							|  |  |  | 		update(m.iso2, m.iso3) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for i, s := range regionISO.s { | 
					
						
							|  |  |  | 		if len(s) != 4 { | 
					
						
							|  |  |  | 			regionISO.s[i] = s + "  " | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeConst("regionISO", tag.Index(regionISO.join())) | 
					
						
							|  |  |  | 	b.writeConst("altRegionISO3", altRegionISO3) | 
					
						
							|  |  |  | 	b.writeSlice("altRegionIDs", altRegionIDs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Create list of deprecated regions. | 
					
						
							|  |  |  | 	// TODO: consider inserting SF -> FI. Not included by CLDR, but is the only | 
					
						
							|  |  |  | 	// Transitionally-reserved mapping not included. | 
					
						
							|  |  |  | 	regionOldMap := stringSet{} | 
					
						
							|  |  |  | 	// Include regions in territoryAlias (not all are in the IANA registry!) | 
					
						
							|  |  |  | 	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { | 
					
						
							|  |  |  | 		if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 { | 
					
						
							|  |  |  | 			regionOldMap.add(reg.Type) | 
					
						
							|  |  |  | 			regionOldMap.updateLater(reg.Type, reg.Replacement) | 
					
						
							|  |  |  | 			i, _ := regionISO.find(reg.Type) | 
					
						
							|  |  |  | 			j, _ := regionISO.find(reg.Replacement) | 
					
						
							|  |  |  | 			if k := m49map[i+isoOffset]; k == 0 { | 
					
						
							|  |  |  | 				m49map[i+isoOffset] = m49map[j+isoOffset] | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 { | 
					
						
							|  |  |  | 		return uint16(b.region.index(s)) | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	// 3-digit region lookup, groupings. | 
					
						
							|  |  |  | 	for i := 1; i < isoOffset; i++ { | 
					
						
							|  |  |  | 		m := parseM49(b.region.s[i]) | 
					
						
							|  |  |  | 		m49map[i] = m | 
					
						
							|  |  |  | 		fromM49map[m] = i | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("m49", m49map) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	const ( | 
					
						
							|  |  |  | 		searchBits = 7 | 
					
						
							|  |  |  | 		regionBits = 9 | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 	if len(m49map) >= 1<<regionBits { | 
					
						
							|  |  |  | 		log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	m49Index := [9]int16{} | 
					
						
							|  |  |  | 	fromM49 := []uint16{} | 
					
						
							|  |  |  | 	m49 := []int{} | 
					
						
							|  |  |  | 	for k, _ := range fromM49map { | 
					
						
							|  |  |  | 		m49 = append(m49, int(k)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	sort.Ints(m49) | 
					
						
							|  |  |  | 	for _, k := range m49[1:] { | 
					
						
							|  |  |  | 		val := (k & (1<<searchBits - 1)) << regionBits | 
					
						
							|  |  |  | 		fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)])) | 
					
						
							|  |  |  | 		m49Index[1:][k>>searchBits] = int16(len(fromM49)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("m49Index", m49Index) | 
					
						
							|  |  |  | 	b.writeSlice("fromM49", fromM49) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	// TODO: put these lists in regionTypes as user data? Could be used for | 
					
						
							|  |  |  | 	// various optimizations and refinements and could be exposed in the API. | 
					
						
							|  |  |  | 	iso3166Except = "AC CP DG EA EU FX IC SU TA UK" | 
					
						
							|  |  |  | 	iso3166Trans  = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions. | 
					
						
							|  |  |  | 	// DY and RH are actually not deleted, but indeterminately reserved. | 
					
						
							|  |  |  | 	iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	iso3166UserAssgined = 1 << iota | 
					
						
							|  |  |  | 	ccTLD | 
					
						
							|  |  |  | 	bcp47Region | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func find(list []string, s string) int { | 
					
						
							|  |  |  | 	for i, t := range list { | 
					
						
							|  |  |  | 		if t == s { | 
					
						
							|  |  |  | 			return i | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return -1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // writeVariants generates per-variant information and creates a map from variant | 
					
						
							|  |  |  | // name to index value. We assign index values such that sorting multiple | 
					
						
							|  |  |  | // variants by index value will result in the correct order. | 
					
						
							|  |  |  | // There are two types of variants: specialized and general. Specialized variants | 
					
						
							|  |  |  | // are only applicable to certain language or language-script pairs. Generalized | 
					
						
							|  |  |  | // variants apply to any language. Generalized variants always sort after | 
					
						
							|  |  |  | // specialized variants.  We will therefore always assign a higher index value | 
					
						
							|  |  |  | // to a generalized variant than any other variant. Generalized variants are | 
					
						
							|  |  |  | // sorted alphabetically among themselves. | 
					
						
							|  |  |  | // Specialized variants may also sort after other specialized variants. Such | 
					
						
							|  |  |  | // variants will be ordered after any of the variants they may follow. | 
					
						
							|  |  |  | // We assume that if a variant x is followed by a variant y, then for any prefix | 
					
						
							|  |  |  | // p of x, p-x is a prefix of y. This allows us to order tags based on the | 
					
						
							|  |  |  | // maximum of the length of any of its prefixes. | 
					
						
							|  |  |  | // TODO: it is possible to define a set of Prefix values on variants such that | 
					
						
							|  |  |  | // a total order cannot be defined to the point that this algorithm breaks. | 
					
						
							|  |  |  | // In other words, we cannot guarantee the same order of variants for the | 
					
						
							|  |  |  | // future using the same algorithm or for non-compliant combinations of | 
					
						
							|  |  |  | // variants. For this reason, consider using simple alphabetic sorting | 
					
						
							|  |  |  | // of variants and ignore Prefix restrictions altogether. | 
					
						
							|  |  |  | func (b *builder) writeVariant() { | 
					
						
							|  |  |  | 	generalized := stringSet{} | 
					
						
							|  |  |  | 	specialized := stringSet{} | 
					
						
							|  |  |  | 	specializedExtend := stringSet{} | 
					
						
							|  |  |  | 	// Collate the variants by type and check assumptions. | 
					
						
							|  |  |  | 	for _, v := range b.variant.slice() { | 
					
						
							|  |  |  | 		e := b.registry[v] | 
					
						
							|  |  |  | 		if len(e.prefix) == 0 { | 
					
						
							|  |  |  | 			generalized.add(v) | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		c := strings.Split(e.prefix[0], "-") | 
					
						
							|  |  |  | 		hasScriptOrRegion := false | 
					
						
							|  |  |  | 		if len(c) > 1 { | 
					
						
							|  |  |  | 			_, hasScriptOrRegion = b.script.find(c[1]) | 
					
						
							|  |  |  | 			if !hasScriptOrRegion { | 
					
						
							|  |  |  | 				_, hasScriptOrRegion = b.region.find(c[1]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if len(c) == 1 || len(c) == 2 && hasScriptOrRegion { | 
					
						
							|  |  |  | 			// Variant is preceded by a language. | 
					
						
							|  |  |  | 			specialized.add(v) | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// Variant is preceded by another variant. | 
					
						
							|  |  |  | 		specializedExtend.add(v) | 
					
						
							|  |  |  | 		prefix := c[0] + "-" | 
					
						
							|  |  |  | 		if hasScriptOrRegion { | 
					
						
							|  |  |  | 			prefix += c[1] | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		for _, p := range e.prefix { | 
					
						
							|  |  |  | 			// Verify that the prefix minus the last element is a prefix of the | 
					
						
							|  |  |  | 			// predecessor element. | 
					
						
							|  |  |  | 			i := strings.LastIndex(p, "-") | 
					
						
							|  |  |  | 			pred := b.registry[p[i+1:]] | 
					
						
							|  |  |  | 			if find(pred.prefix, p[:i]) < 0 { | 
					
						
							|  |  |  | 				log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			// The sorting used below does not work in the general case. It works | 
					
						
							|  |  |  | 			// if we assume that variants that may be followed by others only have | 
					
						
							|  |  |  | 			// prefixes of the same length. Verify this. | 
					
						
							|  |  |  | 			count := strings.Count(p[:i], "-") | 
					
						
							|  |  |  | 			for _, q := range pred.prefix { | 
					
						
							|  |  |  | 				if c := strings.Count(q, "-"); c != count { | 
					
						
							|  |  |  | 					log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if !strings.HasPrefix(p, prefix) { | 
					
						
							|  |  |  | 				log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Sort extended variants. | 
					
						
							|  |  |  | 	a := specializedExtend.s | 
					
						
							|  |  |  | 	less := func(v, w string) bool { | 
					
						
							|  |  |  | 		// Sort by the maximum number of elements. | 
					
						
							|  |  |  | 		maxCount := func(s string) (max int) { | 
					
						
							|  |  |  | 			for _, p := range b.registry[s].prefix { | 
					
						
							|  |  |  | 				if c := strings.Count(p, "-"); c > max { | 
					
						
							|  |  |  | 					max = c | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if cv, cw := maxCount(v), maxCount(w); cv != cw { | 
					
						
							|  |  |  | 			return cv < cw | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// Sort by name as tie breaker. | 
					
						
							|  |  |  | 		return v < w | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	sort.Sort(funcSorter{less, sort.StringSlice(a)}) | 
					
						
							|  |  |  | 	specializedExtend.frozen = true | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Create index from variant name to index. | 
					
						
							|  |  |  | 	variantIndex := make(map[string]uint8) | 
					
						
							|  |  |  | 	add := func(s []string) { | 
					
						
							|  |  |  | 		for _, v := range s { | 
					
						
							|  |  |  | 			variantIndex[v] = uint8(len(variantIndex)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	add(specialized.slice()) | 
					
						
							|  |  |  | 	add(specializedExtend.s) | 
					
						
							|  |  |  | 	numSpecialized := len(variantIndex) | 
					
						
							|  |  |  | 	add(generalized.slice()) | 
					
						
							|  |  |  | 	if n := len(variantIndex); n > 255 { | 
					
						
							|  |  |  | 		log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeMap("variantIndex", variantIndex) | 
					
						
							|  |  |  | 	b.writeConst("variantNumSpecialized", numSpecialized) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeLanguageInfo() { | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // writeLikelyData writes tables that are used both for finding parent relations and for | 
					
						
							|  |  |  | // language matching.  Each entry contains additional bits to indicate the status of the | 
					
						
							|  |  |  | // data to know when it cannot be used for parent relations. | 
					
						
							|  |  |  | func (b *builder) writeLikelyData() { | 
					
						
							|  |  |  | 	const ( | 
					
						
							|  |  |  | 		isList = 1 << iota | 
					
						
							|  |  |  | 		scriptInFrom | 
					
						
							|  |  |  | 		regionInFrom | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 	type ( // generated types | 
					
						
							|  |  |  | 		likelyScriptRegion struct { | 
					
						
							|  |  |  | 			region uint16 | 
					
						
							|  |  |  | 			script uint8 | 
					
						
							|  |  |  | 			flags  uint8 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		likelyLangScript struct { | 
					
						
							|  |  |  | 			lang   uint16 | 
					
						
							|  |  |  | 			script uint8 | 
					
						
							|  |  |  | 			flags  uint8 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		likelyLangRegion struct { | 
					
						
							|  |  |  | 			lang   uint16 | 
					
						
							|  |  |  | 			region uint16 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// likelyTag is used for getting likely tags for group regions, where | 
					
						
							|  |  |  | 		// the likely region might be a region contained in the group. | 
					
						
							|  |  |  | 		likelyTag struct { | 
					
						
							|  |  |  | 			lang   uint16 | 
					
						
							|  |  |  | 			region uint16 | 
					
						
							|  |  |  | 			script uint8 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 	var ( // generated variables | 
					
						
							|  |  |  | 		likelyRegionGroup = make([]likelyTag, len(b.groups)) | 
					
						
							|  |  |  | 		likelyLang        = make([]likelyScriptRegion, len(b.lang.s)) | 
					
						
							|  |  |  | 		likelyRegion      = make([]likelyLangScript, len(b.region.s)) | 
					
						
							|  |  |  | 		likelyScript      = make([]likelyLangRegion, len(b.script.s)) | 
					
						
							|  |  |  | 		likelyLangList    = []likelyScriptRegion{} | 
					
						
							|  |  |  | 		likelyRegionList  = []likelyLangScript{} | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 	type fromTo struct { | 
					
						
							|  |  |  | 		from, to []string | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	langToOther := map[int][]fromTo{} | 
					
						
							|  |  |  | 	regionToOther := map[int][]fromTo{} | 
					
						
							|  |  |  | 	for _, m := range b.supp.LikelySubtags.LikelySubtag { | 
					
						
							|  |  |  | 		from := strings.Split(m.From, "_") | 
					
						
							|  |  |  | 		to := strings.Split(m.To, "_") | 
					
						
							|  |  |  | 		if len(to) != 3 { | 
					
						
							|  |  |  | 			log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if len(from) > 3 { | 
					
						
							|  |  |  | 			log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if from[0] != to[0] && from[0] != "und" { | 
					
						
							|  |  |  | 			log.Fatalf("unexpected language change in expansion: %s -> %s", from, to) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if len(from) == 3 { | 
					
						
							|  |  |  | 			if from[2] != to[2] { | 
					
						
							|  |  |  | 				log.Fatalf("unexpected region change in expansion: %s -> %s", from, to) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if from[0] != "und" { | 
					
						
							|  |  |  | 				log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if len(from) == 1 || from[0] != "und" { | 
					
						
							|  |  |  | 			id := 0 | 
					
						
							|  |  |  | 			if from[0] != "und" { | 
					
						
							|  |  |  | 				id = b.lang.index(from[0]) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			langToOther[id] = append(langToOther[id], fromTo{from, to}) | 
					
						
							|  |  |  | 		} else if len(from) == 2 && len(from[1]) == 4 { | 
					
						
							|  |  |  | 			sid := b.script.index(from[1]) | 
					
						
							|  |  |  | 			likelyScript[sid].lang = uint16(b.langIndex(to[0])) | 
					
						
							|  |  |  | 			likelyScript[sid].region = uint16(b.region.index(to[2])) | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			r := b.region.index(from[len(from)-1]) | 
					
						
							|  |  |  | 			if id, ok := b.groups[r]; ok { | 
					
						
							|  |  |  | 				if from[0] != "und" { | 
					
						
							|  |  |  | 					log.Fatalf("region changed unexpectedly: %s -> %s", from, to) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				likelyRegionGroup[id].lang = uint16(b.langIndex(to[0])) | 
					
						
							|  |  |  | 				likelyRegionGroup[id].script = uint8(b.script.index(to[1])) | 
					
						
							|  |  |  | 				likelyRegionGroup[id].region = uint16(b.region.index(to[2])) | 
					
						
							|  |  |  | 			} else { | 
					
						
							|  |  |  | 				regionToOther[r] = append(regionToOther[r], fromTo{from, to}) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeType(likelyLangRegion{}) | 
					
						
							|  |  |  | 	b.writeSlice("likelyScript", likelyScript) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for id := range b.lang.s { | 
					
						
							|  |  |  | 		list := langToOther[id] | 
					
						
							|  |  |  | 		if len(list) == 1 { | 
					
						
							|  |  |  | 			likelyLang[id].region = uint16(b.region.index(list[0].to[2])) | 
					
						
							|  |  |  | 			likelyLang[id].script = uint8(b.script.index(list[0].to[1])) | 
					
						
							|  |  |  | 		} else if len(list) > 1 { | 
					
						
							|  |  |  | 			likelyLang[id].flags = isList | 
					
						
							|  |  |  | 			likelyLang[id].region = uint16(len(likelyLangList)) | 
					
						
							|  |  |  | 			likelyLang[id].script = uint8(len(list)) | 
					
						
							|  |  |  | 			for _, x := range list { | 
					
						
							|  |  |  | 				flags := uint8(0) | 
					
						
							|  |  |  | 				if len(x.from) > 1 { | 
					
						
							|  |  |  | 					if x.from[1] == x.to[2] { | 
					
						
							|  |  |  | 						flags = regionInFrom | 
					
						
							|  |  |  | 					} else { | 
					
						
							|  |  |  | 						flags = scriptInFrom | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				likelyLangList = append(likelyLangList, likelyScriptRegion{ | 
					
						
							|  |  |  | 					region: uint16(b.region.index(x.to[2])), | 
					
						
							|  |  |  | 					script: uint8(b.script.index(x.to[1])), | 
					
						
							|  |  |  | 					flags:  flags, | 
					
						
							|  |  |  | 				}) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// TODO: merge suppressScript data with this table. | 
					
						
							|  |  |  | 	b.writeType(likelyScriptRegion{}) | 
					
						
							|  |  |  | 	b.writeSlice("likelyLang", likelyLang) | 
					
						
							|  |  |  | 	b.writeSlice("likelyLangList", likelyLangList) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for id := range b.region.s { | 
					
						
							|  |  |  | 		list := regionToOther[id] | 
					
						
							|  |  |  | 		if len(list) == 1 { | 
					
						
							|  |  |  | 			likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0])) | 
					
						
							|  |  |  | 			likelyRegion[id].script = uint8(b.script.index(list[0].to[1])) | 
					
						
							|  |  |  | 			if len(list[0].from) > 2 { | 
					
						
							|  |  |  | 				likelyRegion[id].flags = scriptInFrom | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} else if len(list) > 1 { | 
					
						
							|  |  |  | 			likelyRegion[id].flags = isList | 
					
						
							|  |  |  | 			likelyRegion[id].lang = uint16(len(likelyRegionList)) | 
					
						
							|  |  |  | 			likelyRegion[id].script = uint8(len(list)) | 
					
						
							|  |  |  | 			for i, x := range list { | 
					
						
							|  |  |  | 				if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 { | 
					
						
							|  |  |  | 					log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				x := likelyLangScript{ | 
					
						
							|  |  |  | 					lang:   uint16(b.langIndex(x.to[0])), | 
					
						
							|  |  |  | 					script: uint8(b.script.index(x.to[1])), | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				if len(list[0].from) > 2 { | 
					
						
							|  |  |  | 					x.flags = scriptInFrom | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				likelyRegionList = append(likelyRegionList, x) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeType(likelyLangScript{}) | 
					
						
							|  |  |  | 	b.writeSlice("likelyRegion", likelyRegion) | 
					
						
							|  |  |  | 	b.writeSlice("likelyRegionList", likelyRegionList) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.writeType(likelyTag{}) | 
					
						
							|  |  |  | 	b.writeSlice("likelyRegionGroup", likelyRegionGroup) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type mutualIntelligibility struct { | 
					
						
							|  |  |  | 	want, have uint16 | 
					
						
							|  |  |  | 	conf       uint8 | 
					
						
							|  |  |  | 	oneway     bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type scriptIntelligibility struct { | 
					
						
							|  |  |  | 	lang       uint16 // langID or 0 if * | 
					
						
							|  |  |  | 	want, have uint8 | 
					
						
							|  |  |  | 	conf       uint8 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type sortByConf []mutualIntelligibility | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (l sortByConf) Less(a, b int) bool { | 
					
						
							|  |  |  | 	return l[a].conf > l[b].conf | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (l sortByConf) Swap(a, b int) { | 
					
						
							|  |  |  | 	l[a], l[b] = l[b], l[a] | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (l sortByConf) Len() int { | 
					
						
							|  |  |  | 	return len(l) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // toConf converts a percentage value [0, 100] to a confidence class. | 
					
						
							|  |  |  | func toConf(pct uint8) uint8 { | 
					
						
							|  |  |  | 	switch { | 
					
						
							|  |  |  | 	case pct == 100: | 
					
						
							|  |  |  | 		return 3 // Exact | 
					
						
							|  |  |  | 	case pct >= 90: | 
					
						
							|  |  |  | 		return 2 // High | 
					
						
							|  |  |  | 	case pct > 50: | 
					
						
							|  |  |  | 		return 1 // Low | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 		return 0 // No | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // writeMatchData writes tables with languages and scripts for which there is | 
					
						
							|  |  |  | // mutual intelligibility. The data is based on CLDR's languageMatching data. | 
					
						
							|  |  |  | // Note that we use a different algorithm than the one defined by CLDR and that | 
					
						
							|  |  |  | // we slightly modify the data. For example, we convert scores to confidence levels. | 
					
						
							|  |  |  | // We also drop all region-related data as we use a different algorithm to | 
					
						
							|  |  |  | // determine region equivalence. | 
					
						
							|  |  |  | func (b *builder) writeMatchData() { | 
					
						
							|  |  |  | 	b.writeType(mutualIntelligibility{}) | 
					
						
							|  |  |  | 	b.writeType(scriptIntelligibility{}) | 
					
						
							|  |  |  | 	lm := b.supp.LanguageMatching.LanguageMatches | 
					
						
							|  |  |  | 	cldr.MakeSlice(&lm).SelectAnyOf("type", "written") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	matchLang := []mutualIntelligibility{} | 
					
						
							|  |  |  | 	matchScript := []scriptIntelligibility{} | 
					
						
							|  |  |  | 	// Convert the languageMatch entries in lists keyed by desired language. | 
					
						
							|  |  |  | 	for _, m := range lm[0].LanguageMatch { | 
					
						
							|  |  |  | 		// Different versions of CLDR use different separators. | 
					
						
							|  |  |  | 		desired := strings.Replace(m.Desired, "-", "_", -1) | 
					
						
							|  |  |  | 		supported := strings.Replace(m.Supported, "-", "_", -1) | 
					
						
							|  |  |  | 		d := strings.Split(desired, "_") | 
					
						
							|  |  |  | 		s := strings.Split(supported, "_") | 
					
						
							|  |  |  | 		if len(d) != len(s) || len(d) > 2 { | 
					
						
							|  |  |  | 			// Skip all entries with regions and work around CLDR bug. | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		pct, _ := strconv.ParseInt(m.Percent, 10, 8) | 
					
						
							|  |  |  | 		if len(d) == 2 && d[0] == s[0] && len(d[1]) == 4 { | 
					
						
							|  |  |  | 			// language-script pair. | 
					
						
							|  |  |  | 			lang := uint16(0) | 
					
						
							|  |  |  | 			if d[0] != "*" { | 
					
						
							|  |  |  | 				lang = uint16(b.langIndex(d[0])) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			matchScript = append(matchScript, scriptIntelligibility{ | 
					
						
							|  |  |  | 				lang: lang, | 
					
						
							|  |  |  | 				want: uint8(b.script.index(d[1])), | 
					
						
							|  |  |  | 				have: uint8(b.script.index(s[1])), | 
					
						
							|  |  |  | 				conf: toConf(uint8(pct)), | 
					
						
							|  |  |  | 			}) | 
					
						
							|  |  |  | 			if m.Oneway != "true" { | 
					
						
							|  |  |  | 				matchScript = append(matchScript, scriptIntelligibility{ | 
					
						
							|  |  |  | 					lang: lang, | 
					
						
							|  |  |  | 					want: uint8(b.script.index(s[1])), | 
					
						
							|  |  |  | 					have: uint8(b.script.index(d[1])), | 
					
						
							|  |  |  | 					conf: toConf(uint8(pct)), | 
					
						
							|  |  |  | 				}) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} else if len(d) == 1 && d[0] != "*" { | 
					
						
							|  |  |  | 			if pct == 100 { | 
					
						
							|  |  |  | 				// nb == no is already handled by macro mapping. Check there | 
					
						
							|  |  |  | 				// really is only this case. | 
					
						
							|  |  |  | 				if d[0] != "no" || s[0] != "nb" { | 
					
						
							|  |  |  | 					log.Fatalf("unhandled equivalence %s == %s", s[0], d[0]) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				continue | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			matchLang = append(matchLang, mutualIntelligibility{ | 
					
						
							|  |  |  | 				want:   uint16(b.langIndex(d[0])), | 
					
						
							|  |  |  | 				have:   uint16(b.langIndex(s[0])), | 
					
						
							|  |  |  | 				conf:   uint8(pct), | 
					
						
							|  |  |  | 				oneway: m.Oneway == "true", | 
					
						
							|  |  |  | 			}) | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			// TODO: Handle other mappings. | 
					
						
							|  |  |  | 			a := []string{"*;*", "*_*;*_*", "es_MX;es_419"} | 
					
						
							|  |  |  | 			s := strings.Join([]string{desired, supported}, ";") | 
					
						
							|  |  |  | 			if i := sort.SearchStrings(a, s); i == len(a) || a[i] != s { | 
					
						
							|  |  |  | 				log.Printf("%q not handled", s) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	sort.Stable(sortByConf(matchLang)) | 
					
						
							|  |  |  | 	// collapse percentage into confidence classes | 
					
						
							|  |  |  | 	for i, m := range matchLang { | 
					
						
							|  |  |  | 		matchLang[i].conf = toConf(m.conf) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("matchLang", matchLang) | 
					
						
							|  |  |  | 	b.writeSlice("matchScript", matchScript) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeRegionInclusionData() { | 
					
						
							|  |  |  | 	var ( | 
					
						
							|  |  |  | 		// mm holds for each group the set of groups with a distance of 1. | 
					
						
							|  |  |  | 		mm = make(map[int][]index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// containment holds for each group the transitive closure of | 
					
						
							|  |  |  | 		// containment of other groups. | 
					
						
							|  |  |  | 		containment = make(map[index][]index) | 
					
						
							|  |  |  | 	) | 
					
						
							|  |  |  | 	for _, g := range b.supp.TerritoryContainment.Group { | 
					
						
							| 
									
										
										
										
											2016-10-28 20:05:01 +03:00
										 |  |  | 		// Skip UN and EURO zone as they are flattening the containment | 
					
						
							|  |  |  | 		// relationship. | 
					
						
							|  |  |  | 		if g.Type == "EZ" || g.Type == "UN" { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2016-02-11 16:16:52 +02:00
										 |  |  | 		group := b.region.index(g.Type) | 
					
						
							|  |  |  | 		groupIdx := b.groups[group] | 
					
						
							|  |  |  | 		for _, mem := range strings.Split(g.Contains, " ") { | 
					
						
							|  |  |  | 			r := b.region.index(mem) | 
					
						
							|  |  |  | 			mm[r] = append(mm[r], groupIdx) | 
					
						
							|  |  |  | 			if g, ok := b.groups[r]; ok { | 
					
						
							|  |  |  | 				mm[group] = append(mm[group], g) | 
					
						
							|  |  |  | 				containment[groupIdx] = append(containment[groupIdx], g) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	regionContainment := make([]uint32, len(b.groups)) | 
					
						
							|  |  |  | 	for _, g := range b.groups { | 
					
						
							|  |  |  | 		l := containment[g] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Compute the transitive closure of containment. | 
					
						
							|  |  |  | 		for i := 0; i < len(l); i++ { | 
					
						
							|  |  |  | 			l = append(l, containment[l[i]]...) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Compute the bitmask. | 
					
						
							|  |  |  | 		regionContainment[g] = 1 << g | 
					
						
							|  |  |  | 		for _, v := range l { | 
					
						
							|  |  |  | 			regionContainment[g] |= 1 << v | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// log.Printf("%d: %X", g, regionContainment[g]) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("regionContainment", regionContainment) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	regionInclusion := make([]uint8, len(b.region.s)) | 
					
						
							|  |  |  | 	bvs := make(map[uint32]index) | 
					
						
							|  |  |  | 	// Make the first bitvector positions correspond with the groups. | 
					
						
							|  |  |  | 	for r, i := range b.groups { | 
					
						
							|  |  |  | 		bv := uint32(1 << i) | 
					
						
							|  |  |  | 		for _, g := range mm[r] { | 
					
						
							|  |  |  | 			bv |= 1 << g | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		bvs[bv] = i | 
					
						
							|  |  |  | 		regionInclusion[r] = uint8(bvs[bv]) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for r := 1; r < len(b.region.s); r++ { | 
					
						
							|  |  |  | 		if _, ok := b.groups[r]; !ok { | 
					
						
							|  |  |  | 			bv := uint32(0) | 
					
						
							|  |  |  | 			for _, g := range mm[r] { | 
					
						
							|  |  |  | 				bv |= 1 << g | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if bv == 0 { | 
					
						
							|  |  |  | 				// Pick the world for unspecified regions. | 
					
						
							|  |  |  | 				bv = 1 << b.groups[b.region.index("001")] | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if _, ok := bvs[bv]; !ok { | 
					
						
							|  |  |  | 				bvs[bv] = index(len(bvs)) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			regionInclusion[r] = uint8(bvs[bv]) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("regionInclusion", regionInclusion) | 
					
						
							|  |  |  | 	regionInclusionBits := make([]uint32, len(bvs)) | 
					
						
							|  |  |  | 	for k, v := range bvs { | 
					
						
							|  |  |  | 		regionInclusionBits[v] = uint32(k) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// Add bit vectors for increasingly large distances until a fixed point is reached. | 
					
						
							|  |  |  | 	regionInclusionNext := []uint8{} | 
					
						
							|  |  |  | 	for i := 0; i < len(regionInclusionBits); i++ { | 
					
						
							|  |  |  | 		bits := regionInclusionBits[i] | 
					
						
							|  |  |  | 		next := bits | 
					
						
							|  |  |  | 		for i := uint(0); i < uint(len(b.groups)); i++ { | 
					
						
							|  |  |  | 			if bits&(1<<i) != 0 { | 
					
						
							|  |  |  | 				next |= regionInclusionBits[i] | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if _, ok := bvs[next]; !ok { | 
					
						
							|  |  |  | 			bvs[next] = index(len(bvs)) | 
					
						
							|  |  |  | 			regionInclusionBits = append(regionInclusionBits, next) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		regionInclusionNext = append(regionInclusionNext, uint8(bvs[next])) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSlice("regionInclusionBits", regionInclusionBits) | 
					
						
							|  |  |  | 	b.writeSlice("regionInclusionNext", regionInclusionNext) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type parentRel struct { | 
					
						
							|  |  |  | 	lang       uint16 | 
					
						
							|  |  |  | 	script     uint8 | 
					
						
							|  |  |  | 	maxScript  uint8 | 
					
						
							|  |  |  | 	toRegion   uint16 | 
					
						
							|  |  |  | 	fromRegion []uint16 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *builder) writeParents() { | 
					
						
							|  |  |  | 	b.writeType(parentRel{}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	parents := []parentRel{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Construct parent overrides. | 
					
						
							|  |  |  | 	n := 0 | 
					
						
							|  |  |  | 	for _, p := range b.data.Supplemental().ParentLocales.ParentLocale { | 
					
						
							|  |  |  | 		// Skipping non-standard scripts to root is implemented using addTags. | 
					
						
							|  |  |  | 		if p.Parent == "root" { | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		sub := strings.Split(p.Parent, "_") | 
					
						
							|  |  |  | 		parent := parentRel{lang: b.langIndex(sub[0])} | 
					
						
							|  |  |  | 		if len(sub) == 2 { | 
					
						
							|  |  |  | 			// TODO: check that all undefined scripts are indeed Latn in these | 
					
						
							|  |  |  | 			// cases. | 
					
						
							|  |  |  | 			parent.maxScript = uint8(b.script.index("Latn")) | 
					
						
							|  |  |  | 			parent.toRegion = uint16(b.region.index(sub[1])) | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			parent.script = uint8(b.script.index(sub[1])) | 
					
						
							|  |  |  | 			parent.maxScript = parent.script | 
					
						
							|  |  |  | 			parent.toRegion = uint16(b.region.index(sub[2])) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		for _, c := range strings.Split(p.Locales, " ") { | 
					
						
							|  |  |  | 			region := b.region.index(c[strings.LastIndex(c, "_")+1:]) | 
					
						
							|  |  |  | 			parent.fromRegion = append(parent.fromRegion, uint16(region)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		parents = append(parents, parent) | 
					
						
							|  |  |  | 		n += len(parent.fromRegion) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.writeSliceAddSize("parents", n*2, parents) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func main() { | 
					
						
							|  |  |  | 	gen.Init() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	gen.Repackage("gen_common.go", "common.go", "language") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	w := gen.NewCodeWriter() | 
					
						
							|  |  |  | 	defer w.WriteGoFile("tables.go", "language") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b := newBuilder(w) | 
					
						
							|  |  |  | 	gen.WriteCLDRVersion(w) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	b.parseIndices() | 
					
						
							|  |  |  | 	b.writeType(fromTo{}) | 
					
						
							|  |  |  | 	b.writeLanguage() | 
					
						
							|  |  |  | 	b.writeScript() | 
					
						
							|  |  |  | 	b.writeRegion() | 
					
						
							|  |  |  | 	b.writeVariant() | 
					
						
							|  |  |  | 	// TODO: b.writeLocale() | 
					
						
							|  |  |  | 	b.computeRegionGroups() | 
					
						
							|  |  |  | 	b.writeLikelyData() | 
					
						
							|  |  |  | 	b.writeMatchData() | 
					
						
							|  |  |  | 	b.writeRegionInclusionData() | 
					
						
							|  |  |  | 	b.writeParents() | 
					
						
							|  |  |  | } |