// Copyright 2013 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package simplifiedchineseimport ()var (// GB18030 is the GB18030 encoding.GB18030encoding.Encoding = &gbk18030// GBK is the GBK encoding. It encodes an extension of the GB2312 character set // and is also known as Code Page 936.GBKencoding.Encoding = &gbk)var gbk = internal.Encoding{ &internal.SimpleEncoding{gbkDecoder{gb18030: false},gbkEncoder{gb18030: false}, },"GBK",identifier.GBK,}var gbk18030 = internal.Encoding{ &internal.SimpleEncoding{gbkDecoder{gb18030: true},gbkEncoder{gb18030: true}, },"GB18030",identifier.GB18030,}type gbkDecoder struct {transform.NopResetter gb18030 bool}func ( gbkDecoder) (, []byte, bool) (, int, error) { , := rune(0), 0:for ; < len(); += {switch := []; {case < utf8.RuneSelf: , = rune(), 1// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. // GBK’s decoder is gb18030’s decoder. https://encoding.spec.whatwg.org/#gbk-decoder // If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decodercase == 0x80: , = '€', 1case < 0xff:if +1 >= len() {if ! { = transform.ErrShortSrcbreak } , = utf8.RuneError, 1goto } := [+1]switch {case0x40 <= && < 0x7f: -= 0x40case0x80 <= && < 0xff: -= 0x41case .gb18030 && 0x30 <= && < 0x40:if +3 >= len() {if ! { = transform.ErrShortSrcbreak }// The second byte here is always ASCII, so we can set size // to 1 in all cases. , = utf8.RuneError, 1goto } := [+2]if < 0x81 || 0xff <= { , = utf8.RuneError, 1goto } := [+3]if < 0x30 || 0x3a <= { , = utf8.RuneError, 1goto } = 4 = ((rune(-0x81)*10+rune(-0x30))*126+rune(-0x81))*10 + rune(-0x30)if < 39420 { , := 0, len(gb18030)for < { := + (-)/2if >= rune(gb18030[][0]) { = + 1 } else { = } } := &gb18030[-1] += rune([1]) - rune([0])goto } -= 189000if0 <= && < 0x100000 { += 0x10000 } else { , = utf8.RuneError, 1 }gotodefault: , = utf8.RuneError, 1goto } , = '\ufffd', 2if := int(-0x81)*190 + int(); < len(decode) { = rune(decode[])if == 0 { = '\ufffd' } }default: , = utf8.RuneError, 1 } :if +utf8.RuneLen() > len() { = transform.ErrShortDstbreak } += utf8.EncodeRune([:], ) }return , , }type gbkEncoder struct {transform.NopResetter gb18030 bool}func ( gbkEncoder) (, []byte, bool) (, int, error) { , , := rune(0), rune(0), 0for ; < len(); += { = rune([])// Decode a 1-byte rune.if < utf8.RuneSelf { = 1 } else {// Decode a multi-byte rune. , = utf8.DecodeRune([:])if == 1 {// All valid runes of size 1 (those below utf8.RuneSelf) were // handled above. We have invalid UTF-8 or we haven't seen the // full character yet.if ! && !utf8.FullRune([:]) { = transform.ErrShortSrcbreak } }// func init checks that the switch covers all tables.switch {caseencode0Low <= && < encode0High:if = rune(encode0[-encode0Low]); != 0 {goto }caseencode1Low <= && < encode1High:// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. // GBK’s encoder is gb18030’s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder // If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoderif !.gb18030 && == '€' { = 0x80goto }if = rune(encode1[-encode1Low]); != 0 {goto }caseencode2Low <= && < encode2High:if = rune(encode2[-encode2Low]); != 0 {goto }caseencode3Low <= && < encode3High:if = rune(encode3[-encode3Low]); != 0 {goto }caseencode4Low <= && < encode4High:if = rune(encode4[-encode4Low]); != 0 {goto } }if .gb18030 {if < 0x10000 { , := 0, len(gb18030)for < { := + (-)/2if >= rune(gb18030[][1]) { = + 1 } else { = } } := &gb18030[-1] += rune([0]) - rune([1])goto } elseif < 0x110000 { += 189000 - 0x10000goto } } = internal.ErrASCIIReplacementbreak } :if >= len() { = transform.ErrShortDstbreak } [] = uint8() ++continue :if +2 > len() { = transform.ErrShortDstbreak } [+0] = uint8( >> 8) [+1] = uint8() += 2continue :if +4 > len() { = transform.ErrShortDstbreak } [+3] = uint8(%10 + 0x30) /= 10 [+2] = uint8(%126 + 0x81) /= 126 [+1] = uint8(%10 + 0x30) /= 10 [+0] = uint8( + 0x81) += 4continue }return , , }func init() {// Check that the hard-coded encode switch covers all tables.ifnumEncodeTables != 5 {panic("bad numEncodeTables") }}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.