package uniseg

import 

// The states of the word break parser.
const (
	wbAny = iota
	wbCR
	wbLF
	wbNewline
	wbWSegSpace
	wbHebrewLetter
	wbALetter
	wbWB7
	wbWB7c
	wbNumeric
	wbWB11
	wbKatakana
	wbExtendNumLet
	wbOddRI
	wbEvenRI
	wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c).
)

// The word break parser's breaking instructions.
const (
	wbDontBreak = iota
	wbBreak
)

// The word break parser's state transitions. It's anologous to grTransitions,
// see comments there for details. Unicode version 14.0.0.
var wbTransitions = map[[2]int][3]int{
	// WB3b.
	{wbAny, prNewline}: {wbNewline, wbBreak, 32},
	{wbAny, prCR}:      {wbCR, wbBreak, 32},
	{wbAny, prLF}:      {wbLF, wbBreak, 32},

	// WB3a.
	{wbNewline, prAny}: {wbAny, wbBreak, 31},
	{wbCR, prAny}:      {wbAny, wbBreak, 31},
	{wbLF, prAny}:      {wbAny, wbBreak, 31},

	// WB3.
	{wbCR, prLF}: {wbLF, wbDontBreak, 30},

	// WB3d.
	{wbAny, prWSegSpace}:       {wbWSegSpace, wbBreak, 9990},
	{wbWSegSpace, prWSegSpace}: {wbWSegSpace, wbDontBreak, 34},

	// WB5.
	{wbAny, prALetter}:               {wbALetter, wbBreak, 9990},
	{wbAny, prHebrewLetter}:          {wbHebrewLetter, wbBreak, 9990},
	{wbALetter, prALetter}:           {wbALetter, wbDontBreak, 50},
	{wbALetter, prHebrewLetter}:      {wbHebrewLetter, wbDontBreak, 50},
	{wbHebrewLetter, prALetter}:      {wbALetter, wbDontBreak, 50},
	{wbHebrewLetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50},

	// WB7. Transitions to wbWB7 handled by transitionWordBreakState().
	{wbWB7, prALetter}:      {wbALetter, wbDontBreak, 70},
	{wbWB7, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 70},

	// WB7a.
	{wbHebrewLetter, prSingleQuote}: {wbAny, wbDontBreak, 71},

	// WB7c. Transitions to wbWB7c handled by transitionWordBreakState().
	{wbWB7c, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 73},

	// WB8.
	{wbAny, prNumeric}:     {wbNumeric, wbBreak, 9990},
	{wbNumeric, prNumeric}: {wbNumeric, wbDontBreak, 80},

	// WB9.
	{wbALetter, prNumeric}:      {wbNumeric, wbDontBreak, 90},
	{wbHebrewLetter, prNumeric}: {wbNumeric, wbDontBreak, 90},

	// WB10.
	{wbNumeric, prALetter}:      {wbALetter, wbDontBreak, 100},
	{wbNumeric, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 100},

	// WB11. Transitions to wbWB11 handled by transitionWordBreakState().
	{wbWB11, prNumeric}: {wbNumeric, wbDontBreak, 110},

	// WB13.
	{wbAny, prKatakana}:      {wbKatakana, wbBreak, 9990},
	{wbKatakana, prKatakana}: {wbKatakana, wbDontBreak, 130},

	// WB13a.
	{wbAny, prExtendNumLet}:          {wbExtendNumLet, wbBreak, 9990},
	{wbALetter, prExtendNumLet}:      {wbExtendNumLet, wbDontBreak, 131},
	{wbHebrewLetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
	{wbNumeric, prExtendNumLet}:      {wbExtendNumLet, wbDontBreak, 131},
	{wbKatakana, prExtendNumLet}:     {wbExtendNumLet, wbDontBreak, 131},
	{wbExtendNumLet, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},

	// WB13b.
	{wbExtendNumLet, prALetter}:      {wbALetter, wbDontBreak, 132},
	{wbExtendNumLet, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 132},
	{wbExtendNumLet, prNumeric}:      {wbNumeric, wbDontBreak, 132},
	{wbExtendNumLet, prKatakana}:     {prKatakana, wbDontBreak, 132},
}

// transitionWordBreakState determines the new state of the word break parser
// given the current state and the next code point. It also returns whether a
// word boundary was detected. If more than one code point is needed to
// determine the new state, the byte slice or the string starting after rune "r"
// can be used (whichever is not nil or empty) for further lookups.
func transitionWordBreakState( int,  rune,  []byte,  string) ( int,  bool) {
	// Determine the property of the next character.
	 := property(workBreakCodePoints, )

	// "Replacing Ignore Rules".
	if  == prZWJ {
		// WB4 (for zero-width joiners).
		if  == wbNewline ||  == wbCR ||  == wbLF {
			return wbAny | wbZWJBit, true // Make sure we don't apply WB4 to WB3a.
		}
		if  < 0 {
			return wbAny | wbZWJBit, false
		}
		return  | wbZWJBit, false
	} else if  == prExtend ||  == prFormat {
		// WB4 (for Extend and Format).
		if  == wbNewline ||  == wbCR ||  == wbLF {
			return wbAny, true // Make sure we don't apply WB4 to WB3a.
		}
		if  == wbWSegSpace ||  == wbAny|wbZWJBit {
			return wbAny, false // We don't break but this is also not WB3d or WB3c.
		}
		if  < 0 {
			return wbAny, false
		}
		return , false
	} else if  == prExtendedPictographic &&  >= 0 && &wbZWJBit != 0 {
		// WB3c.
		return wbAny, false
	}
	if  >= 0 {
		 =  &^ wbZWJBit
	}

	// Find the applicable transition in the table.
	var  int
	,  := wbTransitions[[2]int{, }]
	if  {
		// We have a specific transition. We'll use it.
		, ,  = [0], [1] == wbBreak, [2]
	} else {
		// No specific transition found. Try the less specific ones.
		,  := wbTransitions[[2]int{, prAny}]
		,  := wbTransitions[[2]int{wbAny, }]
		if  &&  {
			// Both apply. We'll use a mix (see comments for grTransitions).
			, ,  = [0], [1] == wbBreak, [2]
			if [2] < [2] {
				,  = [1] == wbBreak, [2]
			}
		} else if  {
			// We only have a specific state.
			, ,  = [0], [1] == wbBreak, [2]
			// This branch will probably never be reached because okAnyState will
			// always be true given the current transition map. But we keep it here
			// for future modifications to the transition map where this may not be
			// true anymore.
		} else if  {
			// We only have a specific property.
			, ,  = [0], [1] == wbBreak, [2]
		} else {
			// No known transition. WB999: Any รท Any.
			, ,  = wbAny, true, 9990
		}
	}

	// For those rules that need to look up runes further in the string, we
	// determine the property after nextProperty, skipping over Format, Extend,
	// and ZWJ (according to WB4). It's -1 if not needed, if such a rune cannot
	// be determined (because the text ends or the rune is faulty).
	 := -1
	if  > 60 &&
		( == wbALetter ||  == wbHebrewLetter ||  == wbNumeric) &&
		( == prMidLetter ||  == prMidNumLet ||  == prSingleQuote || // WB6.
			 == prDoubleQuote || // WB7b.
			 == prMidNum) { // WB12.
		for {
			var (
				      rune
				 int
			)
			if  != nil { // Byte slice version.
				,  = utf8.DecodeRune()
				 = [:]
			} else { // String version.
				,  = utf8.DecodeRuneInString()
				 = [:]
			}
			if  == utf8.RuneError {
				break
			}
			 := property(workBreakCodePoints, )
			if  == prExtend ||  == prFormat ||  == prZWJ {
				continue
			}
			 = 
			break
		}
	}

	// WB6.
	if  > 60 &&
		( == wbALetter ||  == wbHebrewLetter) &&
		( == prMidLetter ||  == prMidNumLet ||  == prSingleQuote) &&
		( == prALetter ||  == prHebrewLetter) {
		return wbWB7, false
	}

	// WB7b.
	if  > 72 &&
		 == wbHebrewLetter &&
		 == prDoubleQuote &&
		 == prHebrewLetter {
		return wbWB7c, false
	}

	// WB12.
	if  > 120 &&
		 == wbNumeric &&
		( == prMidNum ||  == prMidNumLet ||  == prSingleQuote) &&
		 == prNumeric {
		return wbWB11, false
	}

	// WB15 and WB16.
	if  == wbAny &&  == prRegionalIndicator {
		if  != wbOddRI &&  != wbEvenRI { // Includes state == -1.
			// Transition into the first RI.
			return wbOddRI, true
		}
		if  == wbOddRI {
			// Don't break pairs of Regional Indicators.
			return wbEvenRI, false
		}
		return wbOddRI, true // We can break after a pair.
	}

	return
}