package brotliimport/* Copyright 2010 Google Inc. All Rights Reserved. Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT*//* Entropy encoding (Huffman) utilities. *//* A node of a Huffman tree. */type huffmanTree struct { total_count_ uint32 index_left_ int16 index_right_or_value_ int16}func initHuffmanTree( *huffmanTree, uint32, int16, int16) { .total_count_ = .index_left_ = .index_right_or_value_ = }/* Input size optimized Shell sort. */type huffmanTreeComparator func(huffmanTree, huffmanTree) boolvar sortHuffmanTreeItems_gaps = []uint{132, 57, 23, 10, 4, 1}func sortHuffmanTreeItems( []huffmanTree, uint, huffmanTreeComparator) {if < 13 {/* Insertion sort. */varuintfor = 1; < ; ++ {varhuffmanTree = []varuint = varuint = - 1for (, []) { [] = [] = if == 0 {break } -- } [] = }return } else {varintif < 57 { = 2 } else { = 0 }for ; < 6; ++ {varuint = sortHuffmanTreeItems_gaps[]varuintfor = ; < ; ++ {varuint = varhuffmanTree = []for ; >= && (, [-]); -= { [] = [-] } [] = } } }}/* Returns 1 if assignment of depths succeeded, otherwise 0. */func setDepth( int, []huffmanTree, []byte, int) bool {var [16]intvarint = 0varint = assert( <= 15) [0] = -1for {if [].index_left_ >= 0 { ++if > {returnfalse } [] = int([].index_right_or_value_) = int([].index_left_)continue } else { [[].index_right_or_value_] = byte() }for >= 0 && [] == -1 { -- }if < 0 {returntrue } = [] [] = -1 }}/* Sort the root nodes, least popular first. */func sortHuffmanTree( huffmanTree, huffmanTree) bool {if .total_count_ != .total_count_ {return .total_count_ < .total_count_ }return .index_right_or_value_ > .index_right_or_value_}/* This function will create a Huffman tree. The catch here is that the tree cannot be arbitrarily deep. Brotli specifies a maximum depth of 15 bits for "code trees" and 7 bits for "code length code trees." count_limit is the value that is to be faked as the minimum value and this minimum value is raised until the tree matches the maximum length requirement. This algorithm is not of excellent performance for very long data blocks, especially when population counts are longer than 2**tree_limit, but we are not planning to use this with extremely long blocks. See http://en.wikipedia.org/wiki/Huffman_coding */func createHuffmanTree( []uint32, uint, int, []huffmanTree, []byte) {varuint32varhuffmanTreeinitHuffmanTree(&, math.MaxUint32, -1, -1)/* For block sizes below 64 kB, we never need to do a second iteration of this loop. Probably all of our block sizes will be smaller than that, so this loop is mostly of academic interest. If we actually would need this, we would be better off with the Katajainen algorithm. */for = 1; ; *= 2 {varuint = 0varuintvaruintvaruintfor = ; != 0; { --if [] != 0 {varuint32 = brotli_max_uint32_t([], )initHuffmanTree(&[], , -1, int16()) ++ } }if == 1 { [[0].index_right_or_value_] = 1/* Only one element. */break }sortHuffmanTreeItems(, , huffmanTreeComparator(sortHuffmanTree))/* The nodes are: [0, n): the sorted leaf nodes that we start with. [n]: we add a sentinel here. [n + 1, 2n): new parent nodes are added here, starting from (n+1). These are naturally in ascending order. [2n]: we add a sentinel at the end as well. There will be (2n+1) elements at the end. */ [] = [+1] = = 0/* Points to the next leaf node. */ = + 1/* Points to the next non-leaf node. */for = - 1; != 0; -- {varuintvaruintif [].total_count_ <= [].total_count_ { = ++ } else { = ++ }if [].total_count_ <= [].total_count_ { = ++ } else { = ++ } {/* The sentinel node becomes the parent node. */varuint = 2* - [].total_count_ = [].total_count_ + [].total_count_ [].index_left_ = int16() [].index_right_or_value_ = int16()/* Add back the last sentinel node. */ [+1] = } }ifsetDepth(int(2*-1), [0:], , ) {/* We need to pack the Huffman tree in tree_limit bits. If this was not successful, add fake entities to the lowest values and retry. */break } }}func reverse( []byte, uint, uint) { --for < {varbyte = [] [] = [] [] = ++ -- }}func writeHuffmanTreeRepetitions( byte, byte, uint, *uint, []byte, []byte) {assert( > 0)if != { [*] = [*] = 0 (*)++ -- }if == 7 { [*] = [*] = 0 (*)++ -- }if < 3 {varuintfor = 0; < ; ++ { [*] = [*] = 0 (*)++ } } else {varuint = * -= 3for { [*] = repeatPreviousCodeLength [*] = byte( & 0x3) (*)++ >>= 2if == 0 {break } -- }reverse(, , *)reverse(, , *) }}func writeHuffmanTreeRepetitionsZeros( uint, *uint, []byte, []byte) {if == 11 { [*] = 0 [*] = 0 (*)++ -- }if < 3 {varuintfor = 0; < ; ++ { [*] = 0 [*] = 0 (*)++ } } else {varuint = * -= 3for { [*] = repeatZeroCodeLength [*] = byte( & 0x7) (*)++ >>= 3if == 0 {break } -- }reverse(, , *)reverse(, , *) }}/* Change the population counts in a way that the consequent Huffman tree compression, especially its RLE-part will be more likely to compress this data more efficiently. length contains the size of the histogram. counts contains the population counts. good_for_rle is a buffer of at least length size */func optimizeHuffmanCountsForRLE( uint, []uint32, []byte) {varuint = 0varuintvaruintvaruintvaruint = 1240varuint/* Let's make the Huffman code more compatible with RLE encoding. */for = 0; < ; ++ {if [] != 0 { ++ } }if < 16 {return }for != 0 && [-1] == 0 { -- }if == 0 {return/* All zeros. */ }/* Now counts[0..length - 1] does not have trailing zeros. */ {varuint = 0varuint32 = 1 << 30for = 0; < ; ++ {if [] != 0 { ++if > [] { = [] } } }if < 5 {/* Small histogram will model it well. */return }if < 4 {varuint = - if < 6 {for = 1; < -1; ++ {if [-1] != 0 && [] == 0 && [+1] != 0 { [] = 1 } } } }if < 28 {return } }/* 2) Let's mark all population counts that already can be encoded with an RLE code. */for := 0; < int(); ++ { [] = 0 } {varuint32 = [0]/* Let's not spoil any of the existing good RLE codes. Mark any seq of 0's that is longer as 5 as a good_for_rle. Mark any seq of non-0's that is longer as 7 as a good_for_rle. */varuint = 0for = 0; <= ; ++ {if == || [] != {if ( == 0 && >= 5) || ( != 0 && >= 7) {varuintfor = 0; < ; ++ { [--1] = 1 } } = 1if != { = [] } } else { ++ } } }/* 3) Let's replace those population counts that lead to more RLE codes. Math here is in 24.8 fixed point representation. */ = 0 = uint(256*([0]+[1]+[2])/3 + 420) = 0for = 0; <= ; ++ {if == || [] != 0 || ( != 0 && [-1] != 0) || (256*[]-uint32()+uint32()) >= uint32(2*) {if >= 4 || ( >= 3 && == 0) {varuintvaruint = ( + /2) / /* The stride must end, collapse what we have, if we have enough (4). */if == 0 { = 1 }if == 0 {/* Don't make an all zeros stride to be upgraded to ones. */ = 0 }for = 0; < ; ++ {/* We don't want to change value at counts[i], that is already belonging to the next stride. Thus - 1. */ [--1] = uint32() } } = 0 = 0if < -2 {/* All interesting strides have a count of at least 4, */ /* at least when non-zeros. */ = uint(256*([]+[+1]+[+2])/3 + 420) } elseif < { = uint(256 * []) } else { = 0 } } ++if != { += uint([])if >= 4 { = (256* + /2) / }if == 4 { += 120 } } }}func decideOverRLEUse( []byte, uint, *bool, *bool) {varuint = 0varuint = 0varuint = 1varuint = 1varuintfor = 0; < ; {varbyte = []varuint = 1varuintfor = + 1; < && [] == ; ++ { ++ }if >= 3 && == 0 { += ++ }if >= 4 && != 0 { += ++ } += } * = > *2 * = > *2}/* Write a Huffman tree from bit depths into the bit-stream representation of a Huffman tree. The generated Huffman tree is to be compressed once more using a Huffman tree */func writeHuffmanTree( []byte, uint, *uint, []byte, []byte) {varbyte = initialRepeatedCodeLengthvaruintvarbool = falsevarbool = falsevaruint = /* Throw away trailing zeros. */for = 0; < ; ++ {if [--1] == 0 { -- } else {break } }/* First gather statistics on if it is a good idea to do RLE. */if > 50 {/* Find RLE coding for longer codes. Shorter codes seem not to benefit from RLE. */decideOverRLEUse(, , &, &) }/* Actual RLE coding. */for = 0; < ; {varbyte = []varuint = 1if ( != 0 && ) || ( == 0 && ) {varuintfor = + 1; < && [] == ; ++ { ++ } }if == 0 {writeHuffmanTreeRepetitionsZeros(, , , ) } else {writeHuffmanTreeRepetitions(, , , , , ) = } += }}var reverseBits_kLut = [16]uint{0x00,0x08,0x04,0x0C,0x02,0x0A,0x06,0x0E,0x01,0x09,0x05,0x0D,0x03,0x0B,0x07,0x0F,}func reverseBits( uint, uint16) uint16 {varuint = reverseBits_kLut[&0x0F]varuintfor = 4; < ; += 4 { <<= 4 = uint16( >> 4) |= reverseBits_kLut[&0x0F] } >>= ((0 - ) & 0x03)returnuint16()}/* 0..15 are values for bits */const maxHuffmanBits = 16/* Get the actual bit values for a tree of bit depths. */func convertBitDepthsToSymbols( []byte, uint, []uint16) {var = [maxHuffmanBits]uint16{0}var [maxHuffmanBits]uint16varuint/* In Brotli, all bit depths are [1..15] 0 bit depth means that the symbol does not exist. */varint = 0for = 0; < ; ++ { [[]]++ } [0] = 0 [0] = 0for = 1; < maxHuffmanBits; ++ { = ( + int([-1])) << 1 [] = uint16() }for = 0; < ; ++ {if [] != 0 { [] = reverseBits(uint([]), [[]]) [[]]++ } }}
The pages are generated with Goldsv0.6.7. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.