// Copyright 2015 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package unicodeimport ()// BOMOverride returns a new decoder transformer that is identical to fallback,// except that the presence of a Byte Order Mark at the start of the input// causes it to switch to the corresponding Unicode decoding. It will only// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE.//// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not// just UTF-16 variants, and allowing falling back to any encoding scheme.//// This technique is recommended by the W3C for use in HTML 5: "For// compatibility with deployed content, the byte order mark (also known as BOM)// is considered more authoritative than anything else."// http://www.w3.org/TR/encoding/#specification-hooks//// Using BOMOverride is mostly intended for use cases where the first characters// of a fallback encoding are known to not be a BOM, for example, for valid HTML// and most encodings.func ( transform.Transformer) transform.Transformer {// TODO: possibly allow a variadic argument of unicode encodings to allow // specifying details of which fallbacks are supported as well as // specifying the details of the implementations. This would also allow for // support for UTF-32, which should not be supported by default.return &bomOverride{fallback: }}type bomOverride struct { fallback transform.Transformer current transform.Transformer}func ( *bomOverride) () { .current = nil .fallback.Reset()}var (// TODO: we could use decode functions here, instead of allocating a new // decoder on every NewDecoder as IgnoreBOM decoders can be stateless. utf16le = UTF16(LittleEndian, IgnoreBOM) utf16be = UTF16(BigEndian, IgnoreBOM))const utf8BOM = "\ufeff"func ( *bomOverride) (, []byte, bool) (, int, error) {if .current != nil {return .current.Transform(, , ) }iflen() < 3 && ! {return0, 0, transform.ErrShortSrc } .current = .fallback := 0iflen() >= 2 {if [0] == 0xFF && [1] == 0xFE { .current = utf16le.NewDecoder() = 2 } elseif [0] == 0xFE && [1] == 0xFF { .current = utf16be.NewDecoder() = 2 } elseiflen() >= 3 && [0] == utf8BOM[0] && [1] == utf8BOM[1] && [2] == utf8BOM[2] { .current = transform.Nop = 3 } }if < len() { , , = .current.Transform(, [:], ) }return , + , }
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.