// Copyright 2013 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.//go:generate go run gen.go gen_common.go -output tables.gopackage language // import "golang.org/x/text/internal/language"// TODO: Remove above NOTE after:// - verifying that tables are dropped correctly (most notably matcher tables).import ()const (// maxCoreSize is the maximum size of a BCP 47 tag without variants and // extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes. maxCoreSize = 12// max99thPercentileSize is a somewhat arbitrary buffer size that presumably // is large enough to hold at least 99% of the BCP 47 tags. max99thPercentileSize = 32// maxSimpleUExtensionSize is the maximum size of a -u extension with one // key-type pair. Equals len("-u-") + key (2) + dash + max value (8). maxSimpleUExtensionSize = 14)// Tag represents a BCP 47 language tag. It is used to specify an instance of a// specific language or locale. All language tag values are guaranteed to be// well-formed. The zero value of Tag is Und.typeTagstruct {// TODO: the following fields have the form TagTypeID. This name is chosen // to allow refactoring the public package without conflicting with its // Base, Script, and Region methods. Once the transition is fully completed // the ID can be stripped from the name. LangID Language RegionID Region// TODO: we will soon run out of positions for ScriptID. Idea: instead of // storing lang, region, and ScriptID codes, store only the compact index and // have a lookup table from this code to its expansion. This greatly speeds // up table lookup, speed up common variant cases. // This will also immediately free up 3 extra bytes. Also, the pVariant // field can now be moved to the lookup table, as the compact index uniquely // determines the offset of a possible variant. ScriptID Script pVariant byte// offset in str, includes preceding '-' pExt uint16// offset of first extension, includes preceding '-'// str is the string representation of the Tag. It will only be used if the // tag has variants or extensions. str string}// Make is a convenience wrapper for Parse that omits the error.// In case of an error, a sensible default is returned.func ( string) Tag { , := Parse()return}// Raw returns the raw base language, script and region, without making an// attempt to infer their values.// TODO: consider removingfunc ( Tag) () ( Language, Script, Region) {return .LangID, .ScriptID, .RegionID}// equalTags compares language, script and region subtags only.func ( Tag) ( Tag) bool {return .LangID == .LangID && .ScriptID == .ScriptID && .RegionID == .RegionID}// IsRoot returns true if t is equal to language "und".func ( Tag) () bool {ifint(.pVariant) < len(.str) {returnfalse }return .equalTags(Und)}// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use// tag.func ( Tag) () bool {return .str != "" && .pVariant == 0}// RemakeString is used to update t.str in case lang, script or region changed.// It is assumed that pExt and pVariant still point to the start of the// respective parts.func ( *Tag) () {if .str == "" {return } := .str[.pVariant:]if .pVariant > 0 { = [1:] }if .equalTags(Und) && strings.HasPrefix(, "x-") { .str = .pVariant = 0 .pExt = 0return }var [max99thPercentileSize]byte// avoid extra memory allocation in most cases. := [:.genCoreBytes([:])]if != "" { := len() - int(.pVariant) = append(, '-') = append(, ...) .pVariant = uint8(int(.pVariant) + ) .pExt = uint16(int(.pExt) + ) } else { .pVariant = uint8(len()) .pExt = uint16(len()) } .str = string()}// genCoreBytes writes a string for the base languages, script and region tags// to the given buffer and returns the number of bytes written. It will never// write more than maxCoreSize bytes.func ( *Tag) ( []byte) int { := .LangID.StringToBuf([:])if .ScriptID != 0 { += copy([:], "-") += copy([:], .ScriptID.String()) }if .RegionID != 0 { += copy([:], "-") += copy([:], .RegionID.String()) }return}// String returns the canonical string representation of the language tag.func ( Tag) () string {if .str != "" {return .str }if .ScriptID == 0 && .RegionID == 0 {return .LangID.String() } := [maxCoreSize]byte{}returnstring([:.genCoreBytes([:])])}// MarshalText implements encoding.TextMarshaler.func ( Tag) () ( []byte, error) {if .str != "" { = append(, .str...) } elseif .ScriptID == 0 && .RegionID == 0 { = append(, .LangID.String()...) } else { := [maxCoreSize]byte{} = [:.genCoreBytes([:])] }return , nil}// UnmarshalText implements encoding.TextUnmarshaler.func ( *Tag) ( []byte) error { , := Parse(string()) * = return}// Variants returns the part of the tag holding all variants or the empty string// if there are no variants defined.func ( Tag) () string {if .pVariant == 0 {return"" }return .str[.pVariant:.pExt]}// VariantOrPrivateUseTags returns variants or private use tags.func ( Tag) () string {if .pExt > 0 {return .str[.pVariant:.pExt] }return .str[.pVariant:]}// HasString reports whether this tag defines more than just the raw// components.func ( Tag) () bool {return .str != ""}// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a// specific language are substituted with fields from the parent language.// The parent for a language may change for newer versions of CLDR.func ( Tag) () Tag {if .str != "" {// Strip the variants and extensions. , , := .Raw() = Tag{LangID: , ScriptID: , RegionID: }if .RegionID == 0 && .ScriptID != 0 && .LangID != 0 { , := addTags(Tag{LangID: .LangID})if .ScriptID == .ScriptID {returnTag{LangID: .LangID} } }return }if .LangID != 0 {if .RegionID != 0 { := .ScriptIDif == 0 { , := addTags() = .ScriptID }for := rangeparents {ifLanguage(parents[].lang) == .LangID && Script(parents[].maxScript) == {for , := rangeparents[].fromRegion {ifRegion() == .RegionID {returnTag{LangID: .LangID,ScriptID: Script(parents[].script),RegionID: Region(parents[].toRegion), } } } } }// Strip the script if it is the default one. , := addTags(Tag{LangID: .LangID})if .ScriptID != {returnTag{LangID: .LangID, ScriptID: } }returnTag{LangID: .LangID} } elseif .ScriptID != 0 {// The parent for an base-script pair with a non-default script is // "und" instead of the base language. , := addTags(Tag{LangID: .LangID})if .ScriptID != .ScriptID {returnUnd }returnTag{LangID: .LangID} } }returnUnd}// ParseExtension parses s as an extension and returns it on success.func ( string) ( string, error) {deferfunc() {ifrecover() != nil { = "" = ErrSyntax } }() := makeScannerString()varintif := len(.token); != 1 {return"", ErrSyntax } .toLower(0, len(.b)) = parseExtension(&)if != len() {return"", ErrSyntax }returnstring(.b), nil}// HasVariants reports whether t has variants.func ( Tag) () bool {returnuint16(.pVariant) < .pExt}// HasExtensions reports whether t has extensions.func ( Tag) () bool {returnint(.pExt) < len(.str)}// Extension returns the extension of type x for tag t. It will return// false for ok if t does not have the requested extension. The returned// extension will be invalid in this case.func ( Tag) ( byte) ( string, bool) {for := int(.pExt); < len(.str)-1; {varstring , = getExtension(.str, )if [0] == {return , true } }return"", false}// Extensions returns all extensions of t.func ( Tag) () []string { := []string{}for := int(.pExt); < len(.str)-1; {varstring , = getExtension(.str, ) = append(, ) }return}// TypeForKey returns the type associated with the given key, where key and type// are of the allowed values defined for the Unicode locale extension ('u') in// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.// TypeForKey will traverse the inheritance chain to get the correct value.//// If there are multiple types associated with a key, only the first will be// returned. If there is no type associated with a key, it returns the empty// string.func ( Tag) ( string) string {if , , , := .findTypeForKey(); != { := .str[:]if := strings.IndexByte(, '-'); >= 0 { = [:] }return }return""}var ( errPrivateUse = errors.New("cannot set a key on a private use tag") errInvalidArguments = errors.New("invalid key or type"))// SetTypeForKey returns a new Tag with the key set to type, where key and type// are of the allowed values defined for the Unicode locale extension ('u') in// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.// An empty value removes an existing pair with the same key.func ( Tag) (, string) (Tag, error) {if .IsPrivateUse() {return , errPrivateUse }iflen() != 2 {return , errInvalidArguments }// Remove the setting if value is "".if == "" { , , , := .findTypeForKey()if != {// Remove a possible empty extension.switch {case .str[-2] != '-': // has previous elements.case == len(.str), // end of string +2 < len(.str) && .str[+2] == '-': // end of extension -= 2 }if == int(.pVariant) && == len(.str) { .str = "" .pVariant, .pExt = 0, 0 } else { .str = fmt.Sprintf("%s%s", .str[:], .str[:]) } }return , nil }iflen() < 3 || len() > 8 {return , errInvalidArguments }var ( [maxCoreSize + maxSimpleUExtensionSize]byteint// start of the -u extension. )// Generate the tag string if needed.if .str == "" { = .genCoreBytes([:]) [] = '-' ++ }// Create new key-type pair and parse it to verify. := [:]copy(, "u-")copy([2:], ) [4] = '-' = [:5+copy([5:], )] := makeScanner()ifparseExtensions(&); .err != nil {return , .err }// Assemble the replacement string.if .str == "" { .pVariant, .pExt = byte(-1), uint16(-1) .str = string([:+len()]) } else { := .str , , , := .findTypeForKey()if == {if { = [2:] } .str = fmt.Sprintf("%s-%s%s", [:], , [:]) } else { .str = fmt.Sprintf("%s-%s%s", [:+3], , [:]) } }return , nil}// findTypeForKey returns the start and end position for the type corresponding// to key or the point at which to insert the key-value pair if the type// wasn't found. The hasExt return value reports whether an -u extension was present.// Note: the extensions are typically very small and are likely to contain// only one key-type pair.func ( Tag) ( string) (, , int, bool) { := int(.pExt)iflen() != 2 || == len(.str) || == 0 {return , , , false } := .str// Find the correct extension.for ++; [] != 'u'; ++ {if [] > 'u' { --return , , , false }if = nextExtension(, ); == len() {returnlen(), len(), len(), false } }// Proceed to the hyphen following the extension name. ++// curKey is the key currently being processed. := ""// Iterate over keys until we get the end of a section.for { = for ++; < len() && [] != '-'; ++ { } := - - 1if <= 2 && == {if < { ++ }return , , , true }switch {case0, // invalid string1: // next extensionreturn , , , truecase2:// next key = [+1 : ]if > {return , , , true } = = } }}// ParseBase parses a 2- or 3-letter ISO 639 code.// It returns a ValueError if s is a well-formed but unknown language identifier// or another error if another error occurred.func ( string) ( Language, error) {deferfunc() {ifrecover() != nil { = 0 = ErrSyntax } }()if := len(); < 2 || 3 < {return0, ErrSyntax }var [3]bytereturngetLangID([:copy([:], )])}// ParseScript parses a 4-letter ISO 15924 code.// It returns a ValueError if s is a well-formed but unknown script identifier// or another error if another error occurred.func ( string) ( Script, error) {deferfunc() {ifrecover() != nil { = 0 = ErrSyntax } }()iflen() != 4 {return0, ErrSyntax }var [4]bytereturngetScriptID(script, [:copy([:], )])}// EncodeM49 returns the Region for the given UN M.49 code.// It returns an error if r is not a valid code.func ( int) (Region, error) {returngetRegionM49()}// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.// It returns a ValueError if s is a well-formed but unknown region identifier// or another error if another error occurred.func ( string) ( Region, error) {deferfunc() {ifrecover() != nil { = 0 = ErrSyntax } }()if := len(); < 2 || 3 < {return0, ErrSyntax }var [3]bytereturngetRegionID([:copy([:], )])}// IsCountry returns whether this region is a country or autonomous area. This// includes non-standard definitions from CLDR.func ( Region) () bool {if == 0 || .IsGroup() || .IsPrivateUse() && != _XK {returnfalse }returntrue}// IsGroup returns whether this region defines a collection of regions. This// includes non-standard definitions from CLDR.func ( Region) () bool {if == 0 {returnfalse }returnint(regionInclusion[]) < len(regionContainment)}// Contains returns whether Region c is contained by Region r. It returns true// if c == r.func ( Region) ( Region) bool {if == {returntrue } := regionInclusion[]if >= nRegionGroups {returnfalse } := regionContainment[] := regionInclusion[] := regionInclusionBits[]// A contained country may belong to multiple disjoint groups. Matching any // of these indicates containment. If the contained region is a group, it // must strictly be a subset.if >= nRegionGroups {return & != 0 }return &^ == 0}var errNoTLD = errors.New("language: region is not a valid ccTLD")// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.// In all other cases it returns either the region itself or an error.//// This method may return an error for a region for which there exists a// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The// region will already be canonicalized it was obtained from a Tag that was// obtained using any of the default methods.func ( Region) () (Region, error) {// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the // difference between ISO 3166-1 and IANA ccTLD.if == _GB { = _UK }if (.typ() & ccTLD) == 0 {return0, errNoTLD }return , nil}// Canonicalize returns the region or a possible replacement if the region is// deprecated. It will not return a replacement for deprecated regions that// are split into multiple regions.func ( Region) () Region {if := normRegion(); != 0 {return }return}// Variant represents a registered variant of a language as defined by BCP 47.typeVariantstruct { ID uint8 str string}// ParseVariant parses and returns a Variant. An error is returned if s is not// a valid variant.func ( string) ( Variant, error) {deferfunc() {ifrecover() != nil { = Variant{} = ErrSyntax } }() = strings.ToLower()if , := variantIndex[]; {returnVariant{, }, nil }returnVariant{}, NewValueError([]byte())}// String returns the string representation of the variant.func ( Variant) () string {return .str}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.