// Copyright 2013 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package languageimport ()// findIndex tries to find the given tag in idx and returns a standardized error// if it could not be found.func findIndex( tag.Index, []byte, string) ( int, error) {if !tag.FixCase(, ) {return0, ErrSyntax } := .Index()if == -1 {return0, NewValueError() }return , nil}func searchUint( []uint16, uint16) int {returnsort.Search(len(), func( int) bool {return [] >= })}typeLanguageuint16// getLangID returns the langID of s if s is a canonical subtag// or langUnknown if s is not a canonical subtag.func getLangID( []byte) (Language, error) {iflen() == 2 {returngetLangISO2() }returngetLangISO3()}// TODO language normalization as well as the AliasMaps could be moved to the// higher level package, but it is a bit tricky to separate the generation.func ( Language) () (Language, AliasType) {returnnormLang()}// normLang returns the mapped langID of id according to mapping m.func normLang( Language) (Language, AliasType) { := sort.Search(len(AliasMap), func( int) bool {returnAliasMap[].From >= uint16() })if < len(AliasMap) && AliasMap[].From == uint16() {returnLanguage(AliasMap[].To), AliasTypes[] }return , AliasTypeUnknown}// getLangISO2 returns the langID for the given 2-letter ISO language code// or unknownLang if this does not exist.func getLangISO2( []byte) (Language, error) {if !tag.FixCase("zz", ) {return0, ErrSyntax }if := lang.Index(); != -1 && lang.Elem()[3] != 0 {returnLanguage(), nil }return0, NewValueError()}const base = 'z' - 'a' + 1func strToInt( []byte) uint { := uint(0)for := 0; < len(); ++ { *= base += uint([] - 'a') }return}// converts the given integer to the original ASCII string passed to strToInt.// len(s) must match the number of characters obtained.func intToStr( uint, []byte) {for := len() - 1; >= 0; -- { [] = byte(%base) + 'a' /= base }}// getLangISO3 returns the langID for the given 3-letter ISO language code// or unknownLang if this does not exist.func getLangISO3( []byte) (Language, error) {iftag.FixCase("und", ) {// first try to match canonical 3-letter entriesfor := lang.Index([:2]); != -1; = lang.Next([:2], ) {if := lang.Elem(); [3] == 0 && [2] == [2] {// We treat "und" as special and always translate it to "unspecified". // Note that ZZ and Zzzz are private use and are not treated as // unspecified by default. := Language()if == nonCanonicalUnd {return0, nil }return , nil } }if := altLangISO3.Index(); != -1 {returnLanguage(altLangIndex[altLangISO3.Elem()[3]]), nil } := strToInt()iflangNoIndex[/8]&(1<<(%8)) != 0 {returnLanguage() + langNoIndexOffset, nil }// Check for non-canonical uses of ISO3.for := lang.Index([:1]); != -1; = lang.Next([:1], ) {if := lang.Elem(); [2] == [1] && [3] == [2] {returnLanguage(), nil } }return0, NewValueError() }return0, ErrSyntax}// StringToBuf writes the string to b and returns the number of bytes// written. cap(b) must be >= 3.func ( Language) ( []byte) int {if >= langNoIndexOffset {intToStr(uint()-langNoIndexOffset, [:3])return3 } elseif == 0 {returncopy(, "und") } := lang[<<2:]if [3] == 0 {returncopy(, [:3]) }returncopy(, [:2])}// String returns the BCP 47 representation of the langID.// Use b as variable name, instead of id, to ensure the variable// used is consistent with that of Base in which this type is embedded.func ( Language) () string {if == 0 {return"und" } elseif >= langNoIndexOffset { -= langNoIndexOffset := [3]byte{}intToStr(uint(), [:])returnstring([:]) } := lang.Elem(int())if [3] == 0 {return [:3] }return [:2]}// ISO3 returns the ISO 639-3 language code.func ( Language) () string {if == 0 || >= langNoIndexOffset {return .String() } := lang.Elem(int())if [3] == 0 {return [:3] } elseif [2] == 0 {returnaltLangISO3.Elem(int([3]))[:3] }// This allocation will only happen for 3-letter ISO codes // that are non-canonical BCP 47 language identifiers.return [0:1] + [2:4]}// IsPrivateUse reports whether this language code is reserved for private use.func ( Language) () bool {returnlangPrivateStart <= && <= langPrivateEnd}// SuppressScript returns the script marked as SuppressScript in the IANA// language tag repository, or 0 if there is no such script.func ( Language) () Script {if < langNoIndexOffset {returnScript(suppressScript[]) }return0}typeRegionuint16// getRegionID returns the region id for s if s is a valid 2-letter region code// or unknownRegion.func getRegionID( []byte) (Region, error) {iflen() == 3 {ifisAlpha([0]) {returngetRegionISO3() }if , := strconv.ParseUint(string(), 10, 10); == nil {returngetRegionM49(int()) } }returngetRegionISO2()}// getRegionISO2 returns the regionID for the given 2-letter ISO country code// or unknownRegion if this does not exist.func getRegionISO2( []byte) (Region, error) { , := findIndex(regionISO, , "ZZ")if != nil {return0, }returnRegion() + isoRegionOffset, nil}// getRegionISO3 returns the regionID for the given 3-letter ISO country code// or unknownRegion if this does not exist.func getRegionISO3( []byte) (Region, error) {iftag.FixCase("ZZZ", ) {for := regionISO.Index([:1]); != -1; = regionISO.Next([:1], ) {if := regionISO.Elem(); [2] == [1] && [3] == [2] {returnRegion() + isoRegionOffset, nil } }for := 0; < len(altRegionISO3); += 3 {iftag.Compare(altRegionISO3[:+3], ) == 0 {returnRegion(altRegionIDs[/3]), nil } }return0, NewValueError() }return0, ErrSyntax}func getRegionM49( int) (Region, error) {if0 < && <= 999 {const ( = 7 = 9 = 1<< - 1 ) := >> := fromM49[m49Index[]:m49Index[+1]] := uint16() << // we rely on bits shifting out := sort.Search(len(), func( int) bool {return [] >= })if := fromM49[int(m49Index[])+]; &^ == {returnRegion( & ), nil } }varValueErrorfmt.Fprint(bytes.NewBuffer([]byte(.v[:])), )return0, }// normRegion returns a region if r is deprecated or 0 otherwise.// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).// TODO: consider mapping split up regions to new most populous one (like CLDR).func normRegion( Region) Region { := regionOldMap := sort.Search(len(), func( int) bool {return [].From >= uint16() })if < len() && [].From == uint16() {returnRegion([].To) }return0}const ( iso3166UserAssigned = 1 << iota ccTLD bcp47Region)func ( Region) () byte {returnregionTypes[]}// String returns the BCP 47 representation for the region.// It returns "ZZ" for an unspecified region.func ( Region) () string {if < isoRegionOffset {if == 0 {return"ZZ" }returnfmt.Sprintf("%03d", .M49()) } -= isoRegionOffsetreturnregionISO.Elem(int())[:2]}// ISO3 returns the 3-letter ISO code of r.// Note that not all regions have a 3-letter ISO code.// In such cases this method returns "ZZZ".func ( Region) () string {if < isoRegionOffset {return"ZZZ" } -= isoRegionOffset := regionISO.Elem(int())switch [2] {case0:returnaltRegionISO3[[3]:][:3]case' ':return"ZZZ" }return [0:1] + [2:4]}// M49 returns the UN M.49 encoding of r, or 0 if this encoding// is not defined for r.func ( Region) () int {returnint(m49[])}// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This// may include private-use tags that are assigned by CLDR and used in this// implementation. So IsPrivateUse and IsCountry can be simultaneously true.func ( Region) () bool {return .typ()&iso3166UserAssigned != 0}typeScriptuint16// getScriptID returns the script id for string s. It assumes that s// is of the format [A-Z][a-z]{3}.func getScriptID( tag.Index, []byte) (Script, error) { , := findIndex(, , "Zzzz")returnScript(), }// String returns the script code in title case.// It returns "Zzzz" for an unspecified script.func ( Script) () string {if == 0 {return"Zzzz" }returnscript.Elem(int())}// IsPrivateUse reports whether this script code is reserved for private use.func ( Script) () bool {return_Qaaa <= && <= _Qabx}const ( maxAltTaglen = len("en-US-POSIX") maxLen = maxAltTaglen)var (// grandfatheredMap holds a mapping from legacy and grandfathered tags to // their base language or index to more elaborate tag. grandfatheredMap = map[[maxLen]byte]int16{ [maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban [maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami [maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn [maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak [maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon [maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux [maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo [maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn [maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao [maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay [maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu [maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok [maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL [maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE [maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu [maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan [maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang// Grandfathered tags with no modern replacement will be converted as // follows: [maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish [maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed [maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default [maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian [maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min// CLDR-specific tag. [maxLen]byte{'r', 'o', 'o', 't'}: 0, // root [maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX" } altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102} altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix")func grandfathered( [maxAltTaglen]byte) ( Tag, bool) {if , := grandfatheredMap[]; {if < 0 {returnMake(altTags[altTagIndex[--1]:altTagIndex[-]]), true } .LangID = Language()return , true }return , false}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.