package unisegimport// FirstWord returns the first word found in the given byte slice according to// the rules of [Unicode Standard Annex #29, Word Boundaries]. This function can// be called continuously to extract all words from a byte slice, as illustrated// in the example below.//// If you don't know the current state, for example when calling the function// for the first time, you must pass -1. For consecutive calls, pass the state// and rest slice returned by the previous call.//// The "rest" slice is the sub-slice of the original byte slice "b" starting// after the last byte of the identified word. If the length of the "rest" slice// is 0, the entire byte slice "b" has been processed. The "word" byte slice is// the sub-slice of the input slice containing the identified word.//// Given an empty byte slice "b", the function returns nil values.//// [Unicode Standard Annex #29, Word Boundaries]: http://unicode.org/reports/tr29/#Word_Boundariesfunc ( []byte, int) (, []byte, int) {// An empty byte slice returns nothing.iflen() == 0 {return }// Extract the first rune. , := utf8.DecodeRune()iflen() <= { // If we're already past the end, there is nothing else to parse.return , nil, wbAny }// If we don't know the state, determine it now.if < 0 { , _ = transitionWordBreakState(, , [:], "") }// Transition until we find a boundary.varboolfor { , := utf8.DecodeRune([:]) , = transitionWordBreakState(, , [+:], "")if {return [:], [:], } += iflen() <= {return , nil, wbAny } }}// FirstWordInString is like [FirstWord] but its input and outputs are strings.func ( string, int) (, string, int) {// An empty byte slice returns nothing.iflen() == 0 {return }// Extract the first rune. , := utf8.DecodeRuneInString()iflen() <= { // If we're already past the end, there is nothing else to parse.return , "", wbAny }// If we don't know the state, determine it now.if < 0 { , _ = transitionWordBreakState(, , nil, [:]) }// Transition until we find a boundary.varboolfor { , := utf8.DecodeRuneInString([:]) , = transitionWordBreakState(, , nil, [+:])if {return [:], [:], } += iflen() <= {return , "", wbAny } }}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.