// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package transform provides reader and writer wrappers that transform the // bytes passing through as well as various transformations. Example // transformations provided by other packages include normalization and // conversion between character sets.
package transform // import "golang.org/x/text/transform" import ( ) var ( // ErrShortDst means that the destination buffer was too short to // receive all of the transformed bytes. ErrShortDst = errors.New("transform: short destination buffer") // ErrShortSrc means that the source buffer has insufficient data to // complete the transformation. ErrShortSrc = errors.New("transform: short source buffer") // ErrEndOfSpan means that the input and output (the transformed input) // are not identical. ErrEndOfSpan = errors.New("transform: input and output are not identical") // errInconsistentByteCount means that Transform returned success (nil // error) but also returned nSrc inconsistent with the src argument. errInconsistentByteCount = errors.New("transform: inconsistent byte count returned") // errShortInternal means that an internal buffer is not large enough // to make progress and the Transform operation must be aborted. errShortInternal = errors.New("transform: short internal buffer") ) // Transformer transforms bytes. type Transformer interface { // Transform writes to dst the transformed bytes read from src, and // returns the number of dst bytes written and src bytes read. The // atEOF argument tells whether src represents the last bytes of the // input. // // Callers should always process the nDst bytes produced and account // for the nSrc bytes consumed before considering the error err. // // A nil error means that all of the transformed bytes (whether freshly // transformed from src or left over from previous Transform calls) // were written to dst. A nil error can be returned regardless of // whether atEOF is true. If err is nil then nSrc must equal len(src); // the converse is not necessarily true. // // ErrShortDst means that dst was too short to receive all of the // transformed bytes. ErrShortSrc means that src had insufficient data // to complete the transformation. If both conditions apply, then // either error may be returned. Other than the error conditions listed // here, implementations are free to report other errors that arise. Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) // Reset resets the state and allows a Transformer to be reused. Reset() } // SpanningTransformer extends the Transformer interface with a Span method // that determines how much of the input already conforms to the Transformer. type SpanningTransformer interface { Transformer // Span returns a position in src such that transforming src[:n] results in // identical output src[:n] for these bytes. It does not necessarily return // the largest such n. The atEOF argument tells whether src represents the // last bytes of the input. // // Callers should always account for the n bytes consumed before // considering the error err. // // A nil error means that all input bytes are known to be identical to the // output produced by the Transformer. A nil error can be returned // regardless of whether atEOF is true. If err is nil, then n must // equal len(src); the converse is not necessarily true. // // ErrEndOfSpan means that the Transformer output may differ from the // input after n bytes. Note that n may be len(src), meaning that the output // would contain additional bytes after otherwise identical output. // ErrShortSrc means that src had insufficient data to determine whether the // remaining bytes would change. Other than the error conditions listed // here, implementations are free to report other errors that arise. // // Calling Span can modify the Transformer state as a side effect. In // effect, it does the transformation just as calling Transform would, only // without copying to a destination buffer and only up to a point it can // determine the input and output bytes are the same. This is obviously more // limited than calling Transform, but can be more efficient in terms of // copying and allocating buffers. Calls to Span and Transform may be // interleaved. Span(src []byte, atEOF bool) (n int, err error) } // NopResetter can be embedded by implementations of Transformer to add a nop // Reset method. type NopResetter struct{} // Reset implements the Reset method of the Transformer interface. func (NopResetter) () {} // Reader wraps another io.Reader by transforming the bytes read. type Reader struct { r io.Reader t Transformer err error // dst[dst0:dst1] contains bytes that have been transformed by t but // not yet copied out via Read. dst []byte dst0, dst1 int // src[src0:src1] contains bytes that have been read from r but not // yet transformed through t. src []byte src0, src1 int // transformComplete is whether the transformation is complete, // regardless of whether or not it was successful. transformComplete bool } const defaultBufSize = 4096 // NewReader returns a new Reader that wraps r by transforming the bytes read // via t. It calls Reset on t. func ( io.Reader, Transformer) *Reader { .Reset() return &Reader{ r: , t: , dst: make([]byte, defaultBufSize), src: make([]byte, defaultBufSize), } } // Read implements the io.Reader interface. func ( *Reader) ( []byte) (int, error) { , := 0, error(nil) for { // Copy out any transformed bytes and return the final error if we are done. if .dst0 != .dst1 { = copy(, .dst[.dst0:.dst1]) .dst0 += if .dst0 == .dst1 && .transformComplete { return , .err } return , nil } else if .transformComplete { return 0, .err } // Try to transform some source bytes, or to flush the transformer if we // are out of source bytes. We do this even if r.r.Read returned an error. // As the io.Reader documentation says, "process the n > 0 bytes returned // before considering the error". if .src0 != .src1 || .err != nil { .dst0 = 0 .dst1, , = .t.Transform(.dst, .src[.src0:.src1], .err == io.EOF) .src0 += switch { case == nil: if .src0 != .src1 { .err = errInconsistentByteCount } // The Transform call was successful; we are complete if we // cannot read more bytes into src. .transformComplete = .err != nil continue case == ErrShortDst && (.dst1 != 0 || != 0): // Make room in dst by copying out, and try again. continue case == ErrShortSrc && .src1-.src0 != len(.src) && .err == nil: // Read more bytes into src via the code below, and try again. default: .transformComplete = true // The reader error (r.err) takes precedence over the // transformer error (err) unless r.err is nil or io.EOF. if .err == nil || .err == io.EOF { .err = } continue } } // Move any untransformed source bytes to the start of the buffer // and read more bytes. if .src0 != 0 { .src0, .src1 = 0, copy(.src, .src[.src0:.src1]) } , .err = .r.Read(.src[.src1:]) .src1 += } } // TODO: implement ReadByte (and ReadRune??). // Writer wraps another io.Writer by transforming the bytes read. // The user needs to call Close to flush unwritten bytes that may // be buffered. type Writer struct { w io.Writer t Transformer dst []byte // src[:n] contains bytes that have not yet passed through t. src []byte n int } // NewWriter returns a new Writer that wraps w by transforming the bytes written // via t. It calls Reset on t. func ( io.Writer, Transformer) *Writer { .Reset() return &Writer{ w: , t: , dst: make([]byte, defaultBufSize), src: make([]byte, defaultBufSize), } } // Write implements the io.Writer interface. If there are not enough // bytes available to complete a Transform, the bytes will be buffered // for the next write. Call Close to convert the remaining bytes. func ( *Writer) ( []byte) ( int, error) { := if .n > 0 { // Append bytes from data to the last remainder. // TODO: limit the amount copied on first try. = copy(.src[.n:], ) .n += = .src[:.n] } for { , , := .t.Transform(.dst, , false) if , := .w.Write(.dst[:]); != nil { return , } = [:] if .n == 0 { += } else if len() <= { // Enough bytes from w.src have been consumed. We make src point // to data instead to reduce the copying. .n = 0 -= len() = [:] if < len() && ( == nil || == ErrShortSrc) { continue } } switch { case ErrShortDst: // This error is okay as long as we are making progress. if > 0 || > 0 { continue } case ErrShortSrc: if len() < len(.src) { := copy(.src, ) // If w.n > 0, bytes from data were already copied to w.src and n // was already set to the number of bytes consumed. if .n == 0 { += } .n = = nil } else if > 0 || > 0 { // Not enough buffer to store the remainder. Keep processing as // long as there is progress. Without this case, transforms that // require a lookahead larger than the buffer may result in an // error. This is not something one may expect to be common in // practice, but it may occur when buffers are set to small // sizes during testing. continue } case nil: if .n > 0 { = errInconsistentByteCount } } return , } } // Close implements the io.Closer interface. func ( *Writer) () error { := .src[:.n] for { , , := .t.Transform(.dst, , true) if , := .w.Write(.dst[:]); != nil { return } if != ErrShortDst { return } = [:] } } type nop struct{ NopResetter } func (nop) (, []byte, bool) (, int, error) { := copy(, ) if < len() { = ErrShortDst } return , , } func (nop) ( []byte, bool) ( int, error) { return len(), nil } type discard struct{ NopResetter } func (discard) (, []byte, bool) (, int, error) { return 0, len(), nil } var ( // Discard is a Transformer for which all Transform calls succeed // by consuming all bytes and writing nothing. Discard Transformer = discard{} // Nop is a SpanningTransformer that copies src to dst. Nop SpanningTransformer = nop{} ) // chain is a sequence of links. A chain with N Transformers has N+1 links and // N+1 buffers. Of those N+1 buffers, the first and last are the src and dst // buffers given to chain.Transform and the middle N-1 buffers are intermediate // buffers owned by the chain. The i'th link transforms bytes from the i'th // buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer // chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N). type chain struct { link []link err error // errStart is the index at which the error occurred plus 1. Processing // errStart at this level at the next call to Transform. As long as // errStart > 0, chain will not consume any more source bytes. errStart int } func ( *chain) ( int, error) { if := + 1; > .errStart { .errStart = .err = } } type link struct { t Transformer // b[p:n] holds the bytes to be transformed by t. b []byte p int n int } func ( *link) () []byte { return .b[.p:.n] } func ( *link) () []byte { return .b[.n:] } // Chain returns a Transformer that applies t in sequence. func ( ...Transformer) Transformer { if len() == 0 { return nop{} } := &chain{link: make([]link, len()+1)} for , := range { .link[].t = } // Allocate intermediate buffers. := make([][defaultBufSize]byte, len()-1) for := range { .link[+1].b = [][:] } return } // Reset resets the state of Chain. It calls Reset on all the Transformers. func ( *chain) () { for , := range .link { if .t != nil { .t.Reset() } .link[].p, .link[].n = 0, 0 } } // TODO: make chain use Span (is going to be fun to implement!) // Transform applies the transformers of c in sequence. func ( *chain) (, []byte, bool) (, int, error) { // Set up src and dst in the chain. := &.link[0] := &.link[len(.link)-1] .b, .p, .n = , 0, len() .b, .n = , 0 var , bool // for detecting progress // i is the index of the next Transformer to apply, for i in [low, high]. // low is the lowest index for which c.link[low] may still produce bytes. // high is the highest index for which c.link[high] has a Transformer. // The error returned by Transform determines whether to increase or // decrease i. We try to completely fill a buffer before converting it. for , , := .errStart, .errStart, len(.link)-2; <= && <= ; { , := &.link[], &.link[+1] , , := .t.Transform(.dst(), .src(), && == ) .n += .p += if > 0 && .p == .n { .p, .n = 0, 0 } , = , false switch { case ErrShortDst: // Process the destination buffer next. Return if we are already // at the high index. if == { return .n, .p, ErrShortDst } if .n != 0 { ++ // If the Transformer at the next index is not able to process any // source bytes there is nothing that can be done to make progress // and the bytes will remain unprocessed. lastFull is used to // detect this and break out of the loop with a fatal error. = true continue } // The destination buffer was too small, but is completely empty. // Return a fatal error as this transformation can never complete. .fatalError(, errShortInternal) case ErrShortSrc: if == 0 { // Save ErrShortSrc in err. All other errors take precedence. = ErrShortSrc break } // Source bytes were depleted before filling up the destination buffer. // Verify we made some progress, move the remaining bytes to the errStart // and try to get more source bytes. if && == 0 || .n-.p == len(.b) { // There were not enough source bytes to proceed while the source // buffer cannot hold any more bytes. Return a fatal error as this // transformation can never complete. .fatalError(, errShortInternal) break } // in.b is an internal buffer and we can make progress. .p, .n = 0, copy(.b, .src()) fallthrough case nil: // if i == low, we have depleted the bytes at index i or any lower levels. // In that case we increase low and i. In all other cases we decrease i to // fetch more bytes before proceeding to the next index. if > { -- continue } default: .fatalError(, ) } // Exhausted level low or fatal error: increase low and continue // to process the bytes accepted so far. ++ = } // If c.errStart > 0, this means we found a fatal error. We will clear // all upstream buffers. At this point, no more progress can be made // downstream, as Transform would have bailed while handling ErrShortDst. if .errStart > 0 { for := 1; < .errStart; ++ { .link[].p, .link[].n = 0, 0 } , .errStart, .err = .err, 0, nil } return .n, .p, } // Deprecated: Use runes.Remove instead. func ( func( rune) bool) Transformer { return removeF() } type removeF func(r rune) bool func (removeF) () {} // Transform implements the Transformer interface. func ( removeF) (, []byte, bool) (, int, error) { for , := rune(0), 0; len() > 0; = [:] { if = rune([0]); < utf8.RuneSelf { = 1 } else { , = utf8.DecodeRune() if == 1 { // Invalid rune. if ! && !utf8.FullRune() { = ErrShortSrc break } // We replace illegal bytes with RuneError. Not doing so might // otherwise turn a sequence of invalid UTF-8 into valid UTF-8. // The resulting byte sequence may subsequently contain runes // for which t(r) is true that were passed unnoticed. if !() { if +3 > len() { = ErrShortDst break } += copy([:], "\uFFFD") } ++ continue } } if !() { if + > len() { = ErrShortDst break } += copy([:], [:]) } += } return } // grow returns a new []byte that is longer than b, and copies the first n bytes // of b to the start of the new slice. func grow( []byte, int) []byte { := len() if <= 32 { = 64 } else if <= 256 { *= 2 } else { += >> 1 } := make([]byte, ) copy(, [:]) return } const initialBufSize = 128 // String returns a string with the result of converting s[:n] using t, where // n <= len(s). If err == nil, n will be len(s). It calls Reset on t. func ( Transformer, string) ( string, int, error) { .Reset() if == "" { // Fast path for the common case for empty input. Results in about a // 86% reduction of running time for BenchmarkStringLowerEmpty. if , , := .Transform(nil, nil, true); == nil { return "", 0, nil } } // Allocate only once. Note that both dst and src escape when passed to // Transform. := [2 * initialBufSize]byte{} := [:initialBufSize:initialBufSize] := [initialBufSize : 2*initialBufSize] // The input string s is transformed in multiple chunks (starting with a // chunk size of initialBufSize). nDst and nSrc are per-chunk (or // per-Transform-call) indexes, pDst and pSrc are overall indexes. , := 0, 0 , := 0, 0 // pPrefix is the length of a common prefix: the first pPrefix bytes of the // result will equal the first pPrefix bytes of s. It is not guaranteed to // be the largest such value, but if pPrefix, len(result) and len(s) are // all equal after the final transform (i.e. calling Transform with atEOF // being true returned nil error) then we don't need to allocate a new // result string. := 0 for { // Invariant: pDst == pPrefix && pSrc == pPrefix. := copy(, [:]) , , = .Transform(, [:], + == len()) += += // TODO: let transformers implement an optional Spanner interface, akin // to norm's QuickSpan. This would even allow us to avoid any allocation. if !bytes.Equal([:], [:]) { break } = if == ErrShortDst { // A buffer can only be short if a transformer modifies its input. break } else if == ErrShortSrc { if == 0 { // No progress was made. break } // Equal so far and !atEOF, so continue checking. } else if != nil || == len() { return string([:]), , } } // Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc. // We have transformed the first pSrc bytes of the input s to become pDst // transformed bytes. Those transformed bytes are discontiguous: the first // pPrefix of them equal s[:pPrefix] and the last nDst of them equal // dst[:nDst]. We copy them around, into a new dst buffer if necessary, so // that they become one contiguous slice: dst[:pDst]. if != 0 { := if > len() { = make([]byte, len()+-) } copy([:], [:]) copy([:], [:]) = } // Prevent duplicate Transform calls with atEOF being true at the end of // the input. Also return if we have an unrecoverable error. if ( == nil && == len()) || ( != nil && != ErrShortDst && != ErrShortSrc) { return string([:]), , } // Transform the remaining input, growing dst and src buffers as necessary. for { := copy(, [:]) := + == len() , , := .Transform([:], [:], ) += += // If we got ErrShortDst or ErrShortSrc, do not grow as long as we can // make progress. This may avoid excessive allocations. if == ErrShortDst { if == 0 { = grow(, ) } } else if == ErrShortSrc { if { return string([:]), , } if == 0 { = grow(, 0) } } else if != nil || == len() { return string([:]), , } } } // Bytes returns a new byte slice with the result of converting b[:n] using t, // where n <= len(b). If err == nil, n will be len(b). It calls Reset on t. func ( Transformer, []byte) ( []byte, int, error) { return doAppend(, 0, make([]byte, len()), ) } // Append appends the result of converting src[:n] using t to dst, where // n <= len(src), If err == nil, n will be len(src). It calls Reset on t. func ( Transformer, , []byte) ( []byte, int, error) { if len() == cap() { := len() + len() // It is okay for this to be 0. := make([]byte, ) = [:copy(, )] } return doAppend(, len(), [:cap()], ) } func doAppend( Transformer, int, , []byte) ( []byte, int, error) { .Reset() := 0 for { , , := .Transform([:], [:], true) += += if != ErrShortDst { return [:], , } // Grow the destination buffer, but do not grow as long as we can make // progress. This may avoid excessive allocations. if == 0 { = grow(, ) } } }