// Copyright 2013 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
// Package encoding defines an interface for character encodings, such as Shift// JIS and Windows 1252, that can convert to and from UTF-8.//// Encoding implementations are provided in other packages, such as// golang.org/x/text/encoding/charmap and// golang.org/x/text/encoding/japanese.
package encoding // import "golang.org/x/text/encoding"import ()// TODO:// - There seems to be some inconsistency in when decoders return errors// and when not. Also documentation seems to suggest they shouldn't return// errors at all (except for UTF-16).// - Encoders seem to rely on or at least benefit from the input being in NFC// normal form. Perhaps add an example how users could prepare their output.// Encoding is a character set encoding that can be transformed to and from// UTF-8.typeEncodinginterface {// NewDecoder returns a Decoder.NewDecoder() *Decoder// NewEncoder returns an Encoder.NewEncoder() *Encoder}// A Decoder converts bytes to UTF-8. It implements transform.Transformer.//// Transforming source bytes that are not of that encoding will not result in an// error per se. Each byte that cannot be transcoded will be represented in the// output by the UTF-8 encoding of '\uFFFD', the replacement rune.typeDecoderstruct {transform.Transformer// This forces external creators of Decoders to use names in struct // initializers, allowing for future extendibility without having to break // code. _ struct{}}// Bytes converts the given encoded bytes to UTF-8. It returns the converted// bytes or nil, err if any error occurred.func ( *Decoder) ( []byte) ([]byte, error) { , , := transform.Bytes(, )if != nil {returnnil, }return , nil}// String converts the given encoded string to UTF-8. It returns the converted// string or "", err if any error occurred.func ( *Decoder) ( string) (string, error) { , , := transform.String(, )if != nil {return"", }return , nil}// Reader wraps another Reader to decode its bytes.//// The Decoder may not be used for any other operation as long as the returned// Reader is in use.func ( *Decoder) ( io.Reader) io.Reader {returntransform.NewReader(, )}// An Encoder converts bytes from UTF-8. It implements transform.Transformer.//// Each rune that cannot be transcoded will result in an error. In this case,// the transform will consume all source byte up to, not including the offending// rune. Transforming source bytes that are not valid UTF-8 will be replaced by// `\uFFFD`. To return early with an error instead, use transform.Chain to// preprocess the data with a UTF8Validator.typeEncoderstruct {transform.Transformer// This forces external creators of Encoders to use names in struct // initializers, allowing for future extendibility without having to break // code. _ struct{}}// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if// any error occurred.func ( *Encoder) ( []byte) ([]byte, error) { , , := transform.Bytes(, )if != nil {returnnil, }return , nil}// String converts a string from UTF-8. It returns the converted string or// "", err if any error occurred.func ( *Encoder) ( string) (string, error) { , , := transform.String(, )if != nil {return"", }return , nil}// Writer wraps another Writer to encode its UTF-8 output.//// The Encoder may not be used for any other operation as long as the returned// Writer is in use.func ( *Encoder) ( io.Writer) io.Writer {returntransform.NewWriter(, )}// ASCIISub is the ASCII substitute character, as recommended by// https://unicode.org/reports/tr36/#Text_ComparisonconstASCIISub = '\x1a'// Nop is the nop encoding. Its transformed bytes are the same as the source// bytes; it does not replace invalid UTF-8 sequences.varNopEncoding = nop{}type nop struct{}func (nop) () *Decoder {return &Decoder{Transformer: transform.Nop}}func (nop) () *Encoder {return &Encoder{Transformer: transform.Nop}}// Replacement is the replacement encoding. Decoding from the replacement// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to// the replacement encoding yields the same as the source bytes except that// invalid UTF-8 is converted to '\uFFFD'.//// It is defined at http://encoding.spec.whatwg.org/#replacementvarReplacementEncoding = replacement{}type replacement struct{}func (replacement) () *Decoder {return &Decoder{Transformer: replacementDecoder{}}}func (replacement) () *Encoder {return &Encoder{Transformer: replacementEncoder{}}}func (replacement) () ( identifier.MIB, string) {returnidentifier.Replacement, ""}type replacementDecoder struct{ transform.NopResetter }func (replacementDecoder) (, []byte, bool) (, int, error) {iflen() < 3 {return0, 0, transform.ErrShortDst }if {const = "\ufffd" [0] = [0] [1] = [1] [2] = [2] = 3 }return , len(), nil}type replacementEncoder struct{ transform.NopResetter }func (replacementEncoder) (, []byte, bool) (, int, error) { , := rune(0), 0for ; < len(); += { = rune([])// Decode a 1-byte rune.if < utf8.RuneSelf { = 1 } else {// Decode a multi-byte rune. , = utf8.DecodeRune([:])if == 1 {// All valid runes of size 1 (those below utf8.RuneSelf) were // handled above. We have invalid UTF-8 or we haven't seen the // full character yet.if ! && !utf8.FullRune([:]) { = transform.ErrShortSrcbreak } = '\ufffd' } }if +utf8.RuneLen() > len() { = transform.ErrShortDstbreak } += utf8.EncodeRune([:], ) }return , , }// HTMLEscapeUnsupported wraps encoders to replace source runes outside the// repertoire of the destination encoding with HTML escape sequences.//// This wrapper exists to comply to URL and HTML forms requiring a// non-terminating legacy encoder. The produced sequences may lead to data// loss as they are indistinguishable from legitimate input. To avoid this// issue, use UTF-8 encodings whenever possible.func ( *Encoder) *Encoder {return &Encoder{Transformer: &errorHandler{, errorToHTML}}}// ReplaceUnsupported wraps encoders to replace source runes outside the// repertoire of the destination encoding with an encoding-specific// replacement.//// This wrapper is only provided for backwards compatibility and legacy// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.func ( *Encoder) *Encoder {return &Encoder{Transformer: &errorHandler{, errorToReplacement}}}type errorHandler struct { *Encoder handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)}// TODO: consider making this error public in some form.type repertoireError interface { Replacement() byte}func ( errorHandler) (, []byte, bool) (, int, error) { , , = .Transformer.Transform(, , )for != nil { , := .(repertoireError)if ! {return , , } , := utf8.DecodeRune([:]) , := .handler([:], , )if ! {return , , transform.ErrShortDst } = nil += if += ; < len() {var , int , , = .Transformer.Transform([:], [:], ) += += } }return , , }func errorToHTML( []byte, rune, repertoireError) ( int, bool) { := [8]byte{} := strconv.AppendUint([:0], uint64(), 10)if = len() + len("&#;"); >= len() {return0, false } [0] = '&' [1] = '#' [copy([2:], )+2] = ';'return , true}func errorToReplacement( []byte, rune, repertoireError) ( int, bool) {iflen() == 0 {return0, false } [0] = .Replacement()return1, true}// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.varErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first// input byte that is not valid UTF-8.varUTF8Validatortransform.Transformer = utf8Validator{}type utf8Validator struct{ transform.NopResetter }func (utf8Validator) (, []byte, bool) (, int, error) { := len()if > len() { = len() }for := 0; < ; {if := []; < utf8.RuneSelf { [] = ++continue } , := utf8.DecodeRune([:])if == 1 {// All valid runes of size 1 (those below utf8.RuneSelf) were // handled above. We have invalid UTF-8 or we haven't seen the // full character yet. = ErrInvalidUTF8if ! && !utf8.FullRune([:]) { = transform.ErrShortSrc }return , , }if + > len() {return , , transform.ErrShortDst }for ; > 0; -- { [] = [] ++ } }iflen() > len() { = transform.ErrShortDst }return , , }
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.