// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:generate go run maketables.go

// Package charmap provides simple character encodings such as IBM Code Page 437 // and Windows 1252.
package charmap // import "golang.org/x/text/encoding/charmap" import ( ) // These encodings vary only in the way clients should interpret them. Their // coded character set is identical and a single implementation can be shared. var ( // ISO8859_6E is the ISO 8859-6E encoding. ISO8859_6E encoding.Encoding = &iso8859_6E // ISO8859_6I is the ISO 8859-6I encoding. ISO8859_6I encoding.Encoding = &iso8859_6I // ISO8859_8E is the ISO 8859-8E encoding. ISO8859_8E encoding.Encoding = &iso8859_8E // ISO8859_8I is the ISO 8859-8I encoding. ISO8859_8I encoding.Encoding = &iso8859_8I iso8859_6E = internal.Encoding{ Encoding: ISO8859_6, Name: "ISO-8859-6E", MIB: identifier.ISO88596E, } iso8859_6I = internal.Encoding{ Encoding: ISO8859_6, Name: "ISO-8859-6I", MIB: identifier.ISO88596I, } iso8859_8E = internal.Encoding{ Encoding: ISO8859_8, Name: "ISO-8859-8E", MIB: identifier.ISO88598E, } iso8859_8I = internal.Encoding{ Encoding: ISO8859_8, Name: "ISO-8859-8I", MIB: identifier.ISO88598I, } ) // All is a list of all defined encodings in this package. var All []encoding.Encoding = listAll // TODO: implement these encodings, in order of importance. // ASCII, ISO8859_1: Rather common. Close to Windows 1252. // ISO8859_9: Close to Windows 1254. // utf8Enc holds a rune's UTF-8 encoding in data[:len]. type utf8Enc struct { len uint8 data [3]byte } // Charmap is an 8-bit character set encoding. type Charmap struct { // name is the encoding's name. name string // mib is the encoding type of this encoder. mib identifier.MIB // asciiSuperset states whether the encoding is a superset of ASCII. asciiSuperset bool // low is the lower bound of the encoded byte for a non-ASCII rune. If // Charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00. low uint8 // replacement is the encoded replacement character. replacement byte // decode is the map from encoded byte to UTF-8. decode [256]utf8Enc // encoding is the map from runes to encoded bytes. Each entry is a // uint32: the high 8 bits are the encoded byte and the low 24 bits are // the rune. The table entries are sorted by ascending rune. encode [256]uint32 } // NewDecoder implements the encoding.Encoding interface. func ( *Charmap) () *encoding.Decoder { return &encoding.Decoder{Transformer: charmapDecoder{charmap: }} } // NewEncoder implements the encoding.Encoding interface. func ( *Charmap) () *encoding.Encoder { return &encoding.Encoder{Transformer: charmapEncoder{charmap: }} } // String returns the Charmap's name. func ( *Charmap) () string { return .name } // ID implements an internal interface. func ( *Charmap) () ( identifier.MIB, string) { return .mib, "" } // charmapDecoder implements transform.Transformer by decoding to UTF-8. type charmapDecoder struct { transform.NopResetter charmap *Charmap } func ( charmapDecoder) (, []byte, bool) (, int, error) { for , := range { if .charmap.asciiSuperset && < utf8.RuneSelf { if >= len() { = transform.ErrShortDst break } [] = ++ = + 1 continue } := &.charmap.decode[] := int(.len) if + > len() { = transform.ErrShortDst break } // It's 15% faster to avoid calling copy for these tiny slices. for := 0; < ; ++ { [] = .data[] ++ } = + 1 } return , , } // DecodeByte returns the Charmap's rune decoding of the byte b. func ( *Charmap) ( byte) rune { switch := &.decode[]; .len { case 1: return rune(.data[0]) case 2: return rune(.data[0]&0x1f)<<6 | rune(.data[1]&0x3f) default: return rune(.data[0]&0x0f)<<12 | rune(.data[1]&0x3f)<<6 | rune(.data[2]&0x3f) } } // charmapEncoder implements transform.Transformer by encoding from UTF-8. type charmapEncoder struct { transform.NopResetter charmap *Charmap } func ( charmapEncoder) (, []byte, bool) (, int, error) { , := rune(0), 0 : for < len() { if >= len() { = transform.ErrShortDst break } = rune([]) // Decode a 1-byte rune. if < utf8.RuneSelf { if .charmap.asciiSuperset { ++ [] = uint8() ++ continue } = 1 } else { // Decode a multi-byte rune. , = utf8.DecodeRune([:]) if == 1 { // All valid runes of size 1 (those below utf8.RuneSelf) were // handled above. We have invalid UTF-8 or we haven't seen the // full character yet. if ! && !utf8.FullRune([:]) { = transform.ErrShortSrc } else { = internal.RepertoireError(.charmap.replacement) } break } } // Binary search in [low, high) for that rune in the m.charmap.encode table. for , := int(.charmap.low), 0x100; ; { if >= { = internal.RepertoireError(.charmap.replacement) break } := ( + ) / 2 := .charmap.encode[] := rune( & (1<<24 - 1)) if < { = + 1 } else if > { = } else { [] = byte( >> 24) ++ break } } += } return , , } // EncodeRune returns the Charmap's byte encoding of the rune r. ok is whether // r is in the Charmap's repertoire. If not, b is set to the Charmap's // replacement byte. This is often the ASCII substitute character '\x1a'. func ( *Charmap) ( rune) ( byte, bool) { if < utf8.RuneSelf && .asciiSuperset { return byte(), true } for , := int(.low), 0x100; ; { if >= { return .replacement, false } := ( + ) / 2 := .encode[] := rune( & (1<<24 - 1)) if < { = + 1 } else if > { = } else { return byte( >> 24), true } } }