// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package language

import (
	
	
	
	

	
)

// isAlpha returns true if the byte is not a digit.
// b must be an ASCII letter or digit.
func isAlpha( byte) bool {
	return  > '9'
}

// isAlphaNum returns true if the string contains only ASCII letters or digits.
func isAlphaNum( []byte) bool {
	for ,  := range  {
		if !('a' <=  &&  <= 'z' || 'A' <=  &&  <= 'Z' || '0' <=  &&  <= '9') {
			return false
		}
	}
	return true
}

// ErrSyntax is returned by any of the parsing functions when the
// input is not well-formed, according to BCP 47.
// TODO: return the position at which the syntax error occurred?
var ErrSyntax = errors.New("language: tag is not well-formed")

// ErrDuplicateKey is returned when a tag contains the same key twice with
// different values in the -u section.
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")

// ValueError is returned by any of the parsing functions when the
// input is well-formed but the respective subtag is not recognized
// as a valid value.
type ValueError struct {
	v [8]byte
}

// NewValueError creates a new ValueError.
func ( []byte) ValueError {
	var  ValueError
	copy(.v[:], )
	return 
}

func ( ValueError) () []byte {
	 := bytes.IndexByte(.v[:], 0)
	if  == -1 {
		 = 8
	}
	return .v[:]
}

// Error implements the error interface.
func ( ValueError) () string {
	return fmt.Sprintf("language: subtag %q is well-formed but unknown", .tag())
}

// Subtag returns the subtag for which the error occurred.
func ( ValueError) () string {
	return string(.tag())
}

// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
type scanner struct {
	b     []byte
	bytes [max99thPercentileSize]byte
	token []byte
	start int // start position of the current token
	end   int // end position of the current token
	next  int // next point for scan
	err   error
	done  bool
}

func makeScannerString( string) scanner {
	 := scanner{}
	if len() <= len(.bytes) {
		.b = .bytes[:copy(.bytes[:], )]
	} else {
		.b = []byte()
	}
	.init()
	return 
}

// makeScanner returns a scanner using b as the input buffer.
// b is not copied and may be modified by the scanner routines.
func makeScanner( []byte) scanner {
	 := scanner{b: }
	.init()
	return 
}

func ( *scanner) () {
	for ,  := range .b {
		if  == '_' {
			.b[] = '-'
		}
	}
	.scan()
}

// restToLower converts the string between start and end to lower case.
func ( *scanner) (,  int) {
	for  := ;  < ; ++ {
		 := .b[]
		if 'A' <=  &&  <= 'Z' {
			.b[] += 'a' - 'A'
		}
	}
}

func ( *scanner) ( error) {
	if .err == nil || ( == ErrSyntax && .err != ErrSyntax) {
		.err = 
	}
}

// resizeRange shrinks or grows the array at position oldStart such that
// a new string of size newSize can fit between oldStart and oldEnd.
// Sets the scan point to after the resized range.
func ( *scanner) (, ,  int) {
	.start = 
	if  :=  + ;  !=  {
		 :=  - 
		var  []byte
		if  := len(.b) + ;  > cap(.b) {
			 = make([]byte, )
			copy(, .b[:])
		} else {
			 = .b[:]
		}
		copy([:], .b[:])
		.b = 
		.next =  + (.next - .end)
		.end = 
	}
}

// replace replaces the current token with repl.
func ( *scanner) ( string) {
	.resizeRange(.start, .end, len())
	copy(.b[.start:], )
}

// gobble removes the current token from the input.
// Caller must call scan after calling gobble.
func ( *scanner) ( error) {
	.setError()
	if .start == 0 {
		.b = .b[:+copy(.b, .b[.next:])]
		.end = 0
	} else {
		.b = .b[:.start-1+copy(.b[.start-1:], .b[.end:])]
		.end = .start - 1
	}
	.next = .start
}

// deleteRange removes the given range from s.b before the current token.
func ( *scanner) (,  int) {
	.b = .b[:+copy(.b[:], .b[:])]
	 :=  - 
	.next -= 
	.start -= 
	.end -= 
}

// scan parses the next token of a BCP 47 string.  Tokens that are larger
// than 8 characters or include non-alphanumeric characters result in an error
// and are gobbled and removed from the output.
// It returns the end position of the last token consumed.
func ( *scanner) () ( int) {
	 = .end
	.token = nil
	for .start = .next; .next < len(.b); {
		 := bytes.IndexByte(.b[.next:], '-')
		if  == -1 {
			.end = len(.b)
			.next = len(.b)
			 = .end - .start
		} else {
			.end = .next + 
			.next = .end + 1
		}
		 := .b[.start:.end]
		if  < 1 ||  > 8 || !isAlphaNum() {
			.gobble(ErrSyntax)
			continue
		}
		.token = 
		return 
	}
	if  := len(.b);  > 0 && .b[-1] == '-' {
		.setError(ErrSyntax)
		.b = .b[:len(.b)-1]
	}
	.done = true
	return 
}

// acceptMinSize parses multiple tokens of the given size or greater.
// It returns the end position of the last token consumed.
func ( *scanner) ( int) ( int) {
	 = .end
	.scan()
	for ; len(.token) >= ; .scan() {
		 = .end
	}
	return 
}

// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
// failed it returns an error and any part of the tag that could be parsed.
// If parsing succeeded but an unknown value was found, it returns
// ValueError. The Tag returned in this case is just stripped of the unknown
// value. All other values are preserved. It accepts tags in the BCP 47 format
// and extensions to this standard defined in
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
func ( string) ( Tag,  error) {
	// TODO: consider supporting old-style locale key-value pairs.
	if  == "" {
		return Und, ErrSyntax
	}
	defer func() {
		if recover() != nil {
			 = Und
			 = ErrSyntax
			return
		}
	}()
	if len() <= maxAltTaglen {
		 := [maxAltTaglen]byte{}
		for ,  := range  {
			// Generating invalid UTF-8 is okay as it won't match.
			if 'A' <=  &&  <= 'Z' {
				 += 'a' - 'A'
			} else if  == '_' {
				 = '-'
			}
			[] = byte()
		}
		if ,  := grandfathered();  {
			return , nil
		}
	}
	 := makeScannerString()
	return parse(&, )
}

func parse( *scanner,  string) ( Tag,  error) {
	 = Und
	var  int
	if  := len(.token);  <= 1 {
		.toLower(0, len(.b))
		if  == 0 || .token[0] != 'x' {
			return , ErrSyntax
		}
		 = parseExtensions()
	} else if  >= 4 {
		return Und, ErrSyntax
	} else { // the usual case
		,  = parseTag(, true)
		if  := len(.token);  == 1 {
			.pExt = uint16()
			 = parseExtensions()
		} else if  < len(.b) {
			.setError(ErrSyntax)
			.b = .b[:]
		}
	}
	if int(.pVariant) < len(.b) {
		if  < len() {
			 = [:]
		}
		if len() > 0 && tag.Compare(, .b) == 0 {
			.str = 
		} else {
			.str = string(.b)
		}
	} else {
		.pVariant, .pExt = 0, 0
	}
	return , .err
}

// parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed.
// If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
func parseTag( *scanner,  bool) ( Tag,  int) {
	var  error
	// TODO: set an error if an unknown lang, script or region is encountered.
	.LangID,  = getLangID(.token)
	.setError()
	.replace(.LangID.String())
	 := .start
	 = .scan()
	for len(.token) == 3 && isAlpha(.token[0]) {
		// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
		// to a tag of the form <extlang>.
		if  {
			,  := getLangID(.token)
			if  != 0 {
				.LangID = 
				 := .String()
				copy(.b[:], )
				.b[+len()] = '-'
				.start =  + len() + 1
			}
			.gobble()
		}
		 = .scan()
	}
	if len(.token) == 4 && isAlpha(.token[0]) {
		.ScriptID,  = getScriptID(script, .token)
		if .ScriptID == 0 {
			.gobble()
		}
		 = .scan()
	}
	if  := len(.token);  >= 2 &&  <= 3 {
		.RegionID,  = getRegionID(.token)
		if .RegionID == 0 {
			.gobble()
		} else {
			.replace(.RegionID.String())
		}
		 = .scan()
	}
	.toLower(.start, len(.b))
	.pVariant = byte()
	 = parseVariants(, , )
	.pExt = uint16()
	return , 
}

var separator = []byte{'-'}

// parseVariants scans tokens as long as each token is a valid variant string.
// Duplicate variants are removed.
func parseVariants( *scanner,  int,  Tag) int {
	 := .start
	 := [4]uint8{}
	 := [4][]byte{}
	 := [:0]
	 := [:0]
	 := -1
	 := false
	for ; len(.token) >= 4; .scan() {
		// TODO: measure the impact of needing this conversion and redesign
		// the data structure if there is an issue.
		,  := variantIndex[string(.token)]
		if ! {
			// unknown variant
			// TODO: allow user-defined variants?
			.gobble(NewValueError(.token))
			continue
		}
		 = append(, )
		 = append(, .token)
		if ! {
			if  < int() {
				 = int()
			} else {
				 = true
				// There is no legal combinations of more than 7 variants
				// (and this is by no means a useful sequence).
				const  = 8
				if len() >  {
					break
				}
			}
		}
		 = .end
	}
	if  {
		sort.Sort(variantsSort{, })
		,  := 0, -1
		for ,  := range  {
			 := int()
			if  ==  {
				// Remove duplicates.
				continue
			}
			[] = []
			[] = []
			++
			 = 
		}
		if  := bytes.Join([:], separator); len() == 0 {
			 =  - 1
		} else {
			.resizeRange(, , len())
			copy(.b[.start:], )
			 = .end
		}
	}
	return 
}

type variantsSort struct {
	i []uint8
	v [][]byte
}

func ( variantsSort) () int {
	return len(.i)
}

func ( variantsSort) (,  int) {
	.i[], .i[] = .i[], .i[]
	.v[], .v[] = .v[], .v[]
}

func ( variantsSort) (,  int) bool {
	return .i[] < .i[]
}

type bytesSort struct {
	b [][]byte
	n int // first n bytes to compare
}

func ( bytesSort) () int {
	return len(.b)
}

func ( bytesSort) (,  int) {
	.b[], .b[] = .b[], .b[]
}

func ( bytesSort) (,  int) bool {
	for  := 0;  < .n; ++ {
		if .b[][] == .b[][] {
			continue
		}
		return .b[][] < .b[][]
	}
	return false
}

// parseExtensions parses and normalizes the extensions in the buffer.
// It returns the last position of scan.b that is part of any extension.
// It also trims scan.b to remove excess parts accordingly.
func parseExtensions( *scanner) int {
	 := .start
	 := [][]byte{}
	 := []byte{}
	 := .end
	for len(.token) == 1 {
		 := .start
		 := .token[0]
		 = parseExtension()
		 := .b[:]
		if len() < 3 || ( != 'x' && len() < 4) {
			.setError(ErrSyntax)
			 = 
			continue
		} else if  ==  && ( == 'x' || .start == len(.b)) {
			.b = .b[:]
			return 
		} else if  == 'x' {
			 = 
			break
		}
		 = append(, )
	}
	sort.Sort(bytesSort{, 1})
	if len() > 0 {
		 = append(, )
	}
	.b = .b[:]
	if len() > 0 {
		.b = append(.b, bytes.Join(, separator)...)
	} else if  > 0 {
		// Strip trailing '-'.
		.b = .b[:-1]
	}
	return 
}

// parseExtension parses a single extension and returns the position of
// the extension end.
func parseExtension( *scanner) int {
	,  := .start, .end
	switch .token[0] {
	case 'u': // https://www.ietf.org/rfc/rfc6067.txt
		 := 
		.scan()
		for  := []byte{}; len(.token) > 2; .scan() {
			if bytes.Compare(.token, ) != -1 {
				// Attributes are unsorted. Start over from scratch.
				 :=  + 1
				.next = 
				 := [][]byte{}
				for .scan(); len(.token) > 2; .scan() {
					 = append(, .token)
					 = .end
				}
				sort.Sort(bytesSort{, 3})
				copy(.b[:], bytes.Join(, separator))
				break
			}
			 = .token
			 = .end
		}
		// Scan key-type sequences. A key is of length 2 and may be followed
		// by 0 or more "type" subtags from 3 to the maximum of 8 letters.
		var ,  []byte
		for  := ; len(.token) == 2;  =  {
			 = .token
			 = .end
			for .scan();  < .end && len(.token) > 2; .scan() {
				 = .end
			}
			// TODO: check key value validity
			if bytes.Compare(, ) != 1 || .err != nil {
				// We have an invalid key or the keys are not sorted.
				// Start scanning keys from scratch and reorder.
				 :=  + 1
				.next = 
				 := [][]byte{}
				for .scan(); len(.token) == 2; {
					 := .start
					 = .end
					for .scan();  < .end && len(.token) > 2; .scan() {
						 = .end
					}
					 = append(, .b[:])
				}
				sort.Stable(bytesSort{, 2})
				if  := len();  > 0 {
					 := 0
					for  := 1;  < ; ++ {
						if !bytes.Equal([][:2], [][:2]) {
							++
							[] = []
						} else if !bytes.Equal([], []) {
							.setError(ErrDuplicateKey)
						}
					}
					 = [:+1]
				}
				 := bytes.Join(, separator)
				if  :=  + len();  <  {
					.deleteRange(, )
					 = 
				}
				copy(.b[:], )
				break
			}
		}
	case 't': // https://www.ietf.org/rfc/rfc6497.txt
		.scan()
		if  := len(.token);  >= 2 &&  <= 3 && isAlpha(.token[1]) {
			_,  = parseTag(, false)
			.toLower(, )
		}
		for len(.token) == 2 && !isAlpha(.token[1]) {
			 = .acceptMinSize(3)
		}
	case 'x':
		 = .acceptMinSize(1)
	default:
		 = .acceptMinSize(2)
	}
	return 
}

// getExtension returns the name, body and end position of the extension.
func getExtension( string,  int) ( int,  string) {
	if [] == '-' {
		++
	}
	if [] == 'x' {
		return len(), [:]
	}
	 = nextExtension(, )
	return , [:]
}

// nextExtension finds the next extension within the string, searching
// for the -<char>- pattern from position p.
// In the fast majority of cases, language tags will have at most
// one extension and extensions tend to be small.
func nextExtension( string,  int) int {
	for  := len() - 3;  < ; {
		if [] == '-' {
			if [+2] == '-' {
				return 
			}
			 += 3
		} else {
			++
		}
	}
	return len()
}