package uniseg
import "unicode/utf8"
const (
wbAny = iota
wbCR
wbLF
wbNewline
wbWSegSpace
wbHebrewLetter
wbALetter
wbWB7
wbWB7c
wbNumeric
wbWB11
wbKatakana
wbExtendNumLet
wbOddRI
wbEvenRI
wbZWJBit = 16
)
const (
wbDontBreak = iota
wbBreak
)
var wbTransitions = map [[2 ]int ][3 ]int {
{wbAny , prNewline }: {wbNewline , wbBreak , 32 },
{wbAny , prCR }: {wbCR , wbBreak , 32 },
{wbAny , prLF }: {wbLF , wbBreak , 32 },
{wbNewline , prAny }: {wbAny , wbBreak , 31 },
{wbCR , prAny }: {wbAny , wbBreak , 31 },
{wbLF , prAny }: {wbAny , wbBreak , 31 },
{wbCR , prLF }: {wbLF , wbDontBreak , 30 },
{wbAny , prWSegSpace }: {wbWSegSpace , wbBreak , 9990 },
{wbWSegSpace , prWSegSpace }: {wbWSegSpace , wbDontBreak , 34 },
{wbAny , prALetter }: {wbALetter , wbBreak , 9990 },
{wbAny , prHebrewLetter }: {wbHebrewLetter , wbBreak , 9990 },
{wbALetter , prALetter }: {wbALetter , wbDontBreak , 50 },
{wbALetter , prHebrewLetter }: {wbHebrewLetter , wbDontBreak , 50 },
{wbHebrewLetter , prALetter }: {wbALetter , wbDontBreak , 50 },
{wbHebrewLetter , prHebrewLetter }: {wbHebrewLetter , wbDontBreak , 50 },
{wbWB7 , prALetter }: {wbALetter , wbDontBreak , 70 },
{wbWB7 , prHebrewLetter }: {wbHebrewLetter , wbDontBreak , 70 },
{wbHebrewLetter , prSingleQuote }: {wbAny , wbDontBreak , 71 },
{wbWB7c , prHebrewLetter }: {wbHebrewLetter , wbDontBreak , 73 },
{wbAny , prNumeric }: {wbNumeric , wbBreak , 9990 },
{wbNumeric , prNumeric }: {wbNumeric , wbDontBreak , 80 },
{wbALetter , prNumeric }: {wbNumeric , wbDontBreak , 90 },
{wbHebrewLetter , prNumeric }: {wbNumeric , wbDontBreak , 90 },
{wbNumeric , prALetter }: {wbALetter , wbDontBreak , 100 },
{wbNumeric , prHebrewLetter }: {wbHebrewLetter , wbDontBreak , 100 },
{wbWB11 , prNumeric }: {wbNumeric , wbDontBreak , 110 },
{wbAny , prKatakana }: {wbKatakana , wbBreak , 9990 },
{wbKatakana , prKatakana }: {wbKatakana , wbDontBreak , 130 },
{wbAny , prExtendNumLet }: {wbExtendNumLet , wbBreak , 9990 },
{wbALetter , prExtendNumLet }: {wbExtendNumLet , wbDontBreak , 131 },
{wbHebrewLetter , prExtendNumLet }: {wbExtendNumLet , wbDontBreak , 131 },
{wbNumeric , prExtendNumLet }: {wbExtendNumLet , wbDontBreak , 131 },
{wbKatakana , prExtendNumLet }: {wbExtendNumLet , wbDontBreak , 131 },
{wbExtendNumLet , prExtendNumLet }: {wbExtendNumLet , wbDontBreak , 131 },
{wbExtendNumLet , prALetter }: {wbALetter , wbDontBreak , 132 },
{wbExtendNumLet , prHebrewLetter }: {wbHebrewLetter , wbDontBreak , 132 },
{wbExtendNumLet , prNumeric }: {wbNumeric , wbDontBreak , 132 },
{wbExtendNumLet , prKatakana }: {prKatakana , wbDontBreak , 132 },
}
func transitionWordBreakState(state int , r rune , b []byte , str string ) (newState int , wordBreak bool ) {
nextProperty := property (workBreakCodePoints , r )
if nextProperty == prZWJ {
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny | wbZWJBit , true
}
if state < 0 {
return wbAny | wbZWJBit , false
}
return state | wbZWJBit , false
} else if nextProperty == prExtend || nextProperty == prFormat {
if state == wbNewline || state == wbCR || state == wbLF {
return wbAny , true
}
if state == wbWSegSpace || state == wbAny |wbZWJBit {
return wbAny , false
}
if state < 0 {
return wbAny , false
}
return state , false
} else if nextProperty == prExtendedPictographic && state >= 0 && state &wbZWJBit != 0 {
return wbAny , false
}
if state >= 0 {
state = state &^ wbZWJBit
}
var rule int
transition , ok := wbTransitions [[2 ]int {state , nextProperty }]
if ok {
newState , wordBreak , rule = transition [0 ], transition [1 ] == wbBreak , transition [2 ]
} else {
transAnyProp , okAnyProp := wbTransitions [[2 ]int {state , prAny }]
transAnyState , okAnyState := wbTransitions [[2 ]int {wbAny , nextProperty }]
if okAnyProp && okAnyState {
newState , wordBreak , rule = transAnyState [0 ], transAnyState [1 ] == wbBreak , transAnyState [2 ]
if transAnyProp [2 ] < transAnyState [2 ] {
wordBreak , rule = transAnyProp [1 ] == wbBreak , transAnyProp [2 ]
}
} else if okAnyProp {
newState , wordBreak , rule = transAnyProp [0 ], transAnyProp [1 ] == wbBreak , transAnyProp [2 ]
} else if okAnyState {
newState , wordBreak , rule = transAnyState [0 ], transAnyState [1 ] == wbBreak , transAnyState [2 ]
} else {
newState , wordBreak , rule = wbAny , true , 9990
}
}
farProperty := -1
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter || state == wbNumeric ) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote ||
nextProperty == prDoubleQuote ||
nextProperty == prMidNum ) {
for {
var (
r rune
length int
)
if b != nil {
r , length = utf8 .DecodeRune (b )
b = b [length :]
} else {
r , length = utf8 .DecodeRuneInString (str )
str = str [length :]
}
if r == utf8 .RuneError {
break
}
prop := property (workBreakCodePoints , r )
if prop == prExtend || prop == prFormat || prop == prZWJ {
continue
}
farProperty = prop
break
}
}
if rule > 60 &&
(state == wbALetter || state == wbHebrewLetter ) &&
(nextProperty == prMidLetter || nextProperty == prMidNumLet || nextProperty == prSingleQuote ) &&
(farProperty == prALetter || farProperty == prHebrewLetter ) {
return wbWB7 , false
}
if rule > 72 &&
state == wbHebrewLetter &&
nextProperty == prDoubleQuote &&
farProperty == prHebrewLetter {
return wbWB7c , false
}
if rule > 120 &&
state == wbNumeric &&
(nextProperty == prMidNum || nextProperty == prMidNumLet || nextProperty == prSingleQuote ) &&
farProperty == prNumeric {
return wbWB11 , false
}
if newState == wbAny && nextProperty == prRegionalIndicator {
if state != wbOddRI && state != wbEvenRI {
return wbOddRI , true
}
if state == wbOddRI {
return wbEvenRI , false
}
return wbOddRI , true
}
return
}
The pages are generated with Golds v0.6.7 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds .