package unisegimport// FirstSentence returns the first sentence found in the given byte slice// according to the rules of [Unicode Standard Annex #29, Sentence Boundaries].// This function can be called continuously to extract all sentences from a byte// slice, as illustrated in the example below.//// If you don't know the current state, for example when calling the function// for the first time, you must pass -1. For consecutive calls, pass the state// and rest slice returned by the previous call.//// The "rest" slice is the sub-slice of the original byte slice "b" starting// after the last byte of the identified sentence. If the length of the "rest"// slice is 0, the entire byte slice "b" has been processed. The "sentence" byte// slice is the sub-slice of the input slice containing the identified sentence.//// Given an empty byte slice "b", the function returns nil values.//// [Unicode Standard Annex #29, Sentence Boundaries]: http://unicode.org/reports/tr29/#Sentence_Boundariesfunc ( []byte, int) (, []byte, int) {// An empty byte slice returns nothing.iflen() == 0 {return }// Extract the first rune. , := utf8.DecodeRune()iflen() <= { // If we're already past the end, there is nothing else to parse.return , nil, sbAny }// If we don't know the state, determine it now.if < 0 { , _ = transitionSentenceBreakState(, , [:], "") }// Transition until we find a boundary.varboolfor { , := utf8.DecodeRune([:]) , = transitionSentenceBreakState(, , [+:], "")if {return [:], [:], } += iflen() <= {return , nil, sbAny } }}// FirstSentenceInString is like [FirstSentence] but its input and outputs are// strings.func ( string, int) (, string, int) {// An empty byte slice returns nothing.iflen() == 0 {return }// Extract the first rune. , := utf8.DecodeRuneInString()iflen() <= { // If we're already past the end, there is nothing else to parse.return , "", sbAny }// If we don't know the state, determine it now.if < 0 { , _ = transitionSentenceBreakState(, , nil, [:]) }// Transition until we find a boundary.varboolfor { , := utf8.DecodeRuneInString([:]) , = transitionSentenceBreakState(, , nil, [+:])if {return [:], [:], } += iflen() <= {return , "", sbAny } }}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.