package brotli
import "encoding/binary"
const maxStaticDictionaryMatchLen = 37
const kInvalidMatch uint32 = 0xFFFFFFF
func hash(data []byte ) uint32 {
var h uint32 = binary .LittleEndian .Uint32 (data ) * kDictHashMul32
return h >> uint (32 -kDictNumBits )
}
func addMatch(distance uint , len uint , len_code uint , matches []uint32 ) {
var match uint32 = uint32 ((distance << 5 ) + len_code )
matches [len ] = brotli_min_uint32_t (matches [len ], match )
}
func dictMatchLength(dict *dictionary , data []byte , id uint , len uint , maxlen uint ) uint {
var offset uint = uint (dict .offsets_by_length [len ]) + len *id
return findMatchLengthWithLimit (dict .data [offset :], data , brotli_min_size_t (uint (len ), maxlen ))
}
func isMatch(d *dictionary , w dictWord , data []byte , max_length uint ) bool {
if uint (w .len ) > max_length {
return false
} else {
var offset uint = uint (d .offsets_by_length [w .len ]) + uint (w .len )*uint (w .idx )
var dict []byte = d .data [offset :]
if w .transform == 0 {
return findMatchLengthWithLimit (dict , data , uint (w .len )) == uint (w .len )
} else if w .transform == 10 {
return dict [0 ] >= 'a' && dict [0 ] <= 'z' && (dict [0 ]^32 ) == data [0 ] && findMatchLengthWithLimit (dict [1 :], data [1 :], uint (w .len )-1 ) == uint (w .len -1 )
} else {
var i uint
for i = 0 ; i < uint (w .len ); i ++ {
if dict [i ] >= 'a' && dict [i ] <= 'z' {
if (dict [i ] ^ 32 ) != data [i ] {
return false
}
} else {
if dict [i ] != data [i ] {
return false
}
}
}
return true
}
}
}
func findAllStaticDictionaryMatches(dict *encoderDictionary , data []byte , min_length uint , max_length uint , matches []uint32 ) bool {
var has_found_match bool = false
{
var offset uint = uint (dict .buckets [hash (data )])
var end bool = offset == 0
for !end {
w := dict .dict_words [offset ]
offset ++
var l uint = uint (w .len ) & 0x1F
var n uint = uint (1 ) << dict .words .size_bits_by_length [l ]
var id uint = uint (w .idx )
end = !(w .len &0x80 == 0 )
w .len = byte (l )
if w .transform == 0 {
var matchlen uint = dictMatchLength (dict .words , data , id , l , max_length )
var s []byte
var minlen uint
var maxlen uint
var len uint
if matchlen == l {
addMatch (id , l , l , matches )
has_found_match = true
}
if matchlen >= l -1 {
addMatch (id +12 *n , l -1 , l , matches )
if l +2 < max_length && data [l -1 ] == 'i' && data [l ] == 'n' && data [l +1 ] == 'g' && data [l +2 ] == ' ' {
addMatch (id +49 *n , l +3 , l , matches )
}
has_found_match = true
}
minlen = min_length
if l > 9 {
minlen = brotli_max_size_t (minlen , l -9 )
}
maxlen = brotli_min_size_t (matchlen , l -2 )
for len = minlen ; len <= maxlen ; len ++ {
var cut uint = l - len
var transform_id uint = (cut << 2 ) + uint ((dict .cutoffTransforms >>(cut *6 ))&0x3F )
addMatch (id +transform_id *n , uint (len ), l , matches )
has_found_match = true
}
if matchlen < l || l +6 >= max_length {
continue
}
s = data [l :]
if s [0 ] == ' ' {
addMatch (id +n , l +1 , l , matches )
if s [1 ] == 'a' {
if s [2 ] == ' ' {
addMatch (id +28 *n , l +3 , l , matches )
} else if s [2 ] == 's' {
if s [3 ] == ' ' {
addMatch (id +46 *n , l +4 , l , matches )
}
} else if s [2 ] == 't' {
if s [3 ] == ' ' {
addMatch (id +60 *n , l +4 , l , matches )
}
} else if s [2 ] == 'n' {
if s [3 ] == 'd' && s [4 ] == ' ' {
addMatch (id +10 *n , l +5 , l , matches )
}
}
} else if s [1 ] == 'b' {
if s [2 ] == 'y' && s [3 ] == ' ' {
addMatch (id +38 *n , l +4 , l , matches )
}
} else if s [1 ] == 'i' {
if s [2 ] == 'n' {
if s [3 ] == ' ' {
addMatch (id +16 *n , l +4 , l , matches )
}
} else if s [2 ] == 's' {
if s [3 ] == ' ' {
addMatch (id +47 *n , l +4 , l , matches )
}
}
} else if s [1 ] == 'f' {
if s [2 ] == 'o' {
if s [3 ] == 'r' && s [4 ] == ' ' {
addMatch (id +25 *n , l +5 , l , matches )
}
} else if s [2 ] == 'r' {
if s [3 ] == 'o' && s [4 ] == 'm' && s [5 ] == ' ' {
addMatch (id +37 *n , l +6 , l , matches )
}
}
} else if s [1 ] == 'o' {
if s [2 ] == 'f' {
if s [3 ] == ' ' {
addMatch (id +8 *n , l +4 , l , matches )
}
} else if s [2 ] == 'n' {
if s [3 ] == ' ' {
addMatch (id +45 *n , l +4 , l , matches )
}
}
} else if s [1 ] == 'n' {
if s [2 ] == 'o' && s [3 ] == 't' && s [4 ] == ' ' {
addMatch (id +80 *n , l +5 , l , matches )
}
} else if s [1 ] == 't' {
if s [2 ] == 'h' {
if s [3 ] == 'e' {
if s [4 ] == ' ' {
addMatch (id +5 *n , l +5 , l , matches )
}
} else if s [3 ] == 'a' {
if s [4 ] == 't' && s [5 ] == ' ' {
addMatch (id +29 *n , l +6 , l , matches )
}
}
} else if s [2 ] == 'o' {
if s [3 ] == ' ' {
addMatch (id +17 *n , l +4 , l , matches )
}
}
} else if s [1 ] == 'w' {
if s [2 ] == 'i' && s [3 ] == 't' && s [4 ] == 'h' && s [5 ] == ' ' {
addMatch (id +35 *n , l +6 , l , matches )
}
}
} else if s [0 ] == '"' {
addMatch (id +19 *n , l +1 , l , matches )
if s [1 ] == '>' {
addMatch (id +21 *n , l +2 , l , matches )
}
} else if s [0 ] == '.' {
addMatch (id +20 *n , l +1 , l , matches )
if s [1 ] == ' ' {
addMatch (id +31 *n , l +2 , l , matches )
if s [2 ] == 'T' && s [3 ] == 'h' {
if s [4 ] == 'e' {
if s [5 ] == ' ' {
addMatch (id +43 *n , l +6 , l , matches )
}
} else if s [4 ] == 'i' {
if s [5 ] == 's' && s [6 ] == ' ' {
addMatch (id +75 *n , l +7 , l , matches )
}
}
}
}
} else if s [0 ] == ',' {
addMatch (id +76 *n , l +1 , l , matches )
if s [1 ] == ' ' {
addMatch (id +14 *n , l +2 , l , matches )
}
} else if s [0 ] == '\n' {
addMatch (id +22 *n , l +1 , l , matches )
if s [1 ] == '\t' {
addMatch (id +50 *n , l +2 , l , matches )
}
} else if s [0 ] == ']' {
addMatch (id +24 *n , l +1 , l , matches )
} else if s [0 ] == '\'' {
addMatch (id +36 *n , l +1 , l , matches )
} else if s [0 ] == ':' {
addMatch (id +51 *n , l +1 , l , matches )
} else if s [0 ] == '(' {
addMatch (id +57 *n , l +1 , l , matches )
} else if s [0 ] == '=' {
if s [1 ] == '"' {
addMatch (id +70 *n , l +2 , l , matches )
} else if s [1 ] == '\'' {
addMatch (id +86 *n , l +2 , l , matches )
}
} else if s [0 ] == 'a' {
if s [1 ] == 'l' && s [2 ] == ' ' {
addMatch (id +84 *n , l +3 , l , matches )
}
} else if s [0 ] == 'e' {
if s [1 ] == 'd' {
if s [2 ] == ' ' {
addMatch (id +53 *n , l +3 , l , matches )
}
} else if s [1 ] == 'r' {
if s [2 ] == ' ' {
addMatch (id +82 *n , l +3 , l , matches )
}
} else if s [1 ] == 's' {
if s [2 ] == 't' && s [3 ] == ' ' {
addMatch (id +95 *n , l +4 , l , matches )
}
}
} else if s [0 ] == 'f' {
if s [1 ] == 'u' && s [2 ] == 'l' && s [3 ] == ' ' {
addMatch (id +90 *n , l +4 , l , matches )
}
} else if s [0 ] == 'i' {
if s [1 ] == 'v' {
if s [2 ] == 'e' && s [3 ] == ' ' {
addMatch (id +92 *n , l +4 , l , matches )
}
} else if s [1 ] == 'z' {
if s [2 ] == 'e' && s [3 ] == ' ' {
addMatch (id +100 *n , l +4 , l , matches )
}
}
} else if s [0 ] == 'l' {
if s [1 ] == 'e' {
if s [2 ] == 's' && s [3 ] == 's' && s [4 ] == ' ' {
addMatch (id +93 *n , l +5 , l , matches )
}
} else if s [1 ] == 'y' {
if s [2 ] == ' ' {
addMatch (id +61 *n , l +3 , l , matches )
}
}
} else if s [0 ] == 'o' {
if s [1 ] == 'u' && s [2 ] == 's' && s [3 ] == ' ' {
addMatch (id +106 *n , l +4 , l , matches )
}
}
} else {
var is_all_caps bool = (w .transform != transformUppercaseFirst )
var s []byte
if !isMatch (dict .words , w , data , max_length ) {
continue
}
var tmp int
if is_all_caps {
tmp = 44
} else {
tmp = 9
}
addMatch (id +uint (tmp )*n , l , l , matches )
has_found_match = true
if l +1 >= max_length {
continue
}
s = data [l :]
if s [0 ] == ' ' {
var tmp int
if is_all_caps {
tmp = 68
} else {
tmp = 4
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
} else if s [0 ] == '"' {
var tmp int
if is_all_caps {
tmp = 87
} else {
tmp = 66
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
if s [1 ] == '>' {
var tmp int
if is_all_caps {
tmp = 97
} else {
tmp = 69
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
}
} else if s [0 ] == '.' {
var tmp int
if is_all_caps {
tmp = 101
} else {
tmp = 79
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
if s [1 ] == ' ' {
var tmp int
if is_all_caps {
tmp = 114
} else {
tmp = 88
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
}
} else if s [0 ] == ',' {
var tmp int
if is_all_caps {
tmp = 112
} else {
tmp = 99
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
if s [1 ] == ' ' {
var tmp int
if is_all_caps {
tmp = 107
} else {
tmp = 58
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
}
} else if s [0 ] == '\'' {
var tmp int
if is_all_caps {
tmp = 94
} else {
tmp = 74
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
} else if s [0 ] == '(' {
var tmp int
if is_all_caps {
tmp = 113
} else {
tmp = 78
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
} else if s [0 ] == '=' {
if s [1 ] == '"' {
var tmp int
if is_all_caps {
tmp = 105
} else {
tmp = 104
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
} else if s [1 ] == '\'' {
var tmp int
if is_all_caps {
tmp = 116
} else {
tmp = 108
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
}
}
}
}
}
if max_length >= 5 && (data [0 ] == ' ' || data [0 ] == '.' ) {
var is_space bool = (data [0 ] == ' ' )
var offset uint = uint (dict .buckets [hash (data [1 :])])
var end bool = offset == 0
for !end {
w := dict .dict_words [offset ]
offset ++
var l uint = uint (w .len ) & 0x1F
var n uint = uint (1 ) << dict .words .size_bits_by_length [l ]
var id uint = uint (w .idx )
end = !(w .len &0x80 == 0 )
w .len = byte (l )
if w .transform == 0 {
var s []byte
if !isMatch (dict .words , w , data [1 :], max_length -1 ) {
continue
}
var tmp int
if is_space {
tmp = 6
} else {
tmp = 32
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
has_found_match = true
if l +2 >= max_length {
continue
}
s = data [l +1 :]
if s [0 ] == ' ' {
var tmp int
if is_space {
tmp = 2
} else {
tmp = 77
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
} else if s [0 ] == '(' {
var tmp int
if is_space {
tmp = 89
} else {
tmp = 67
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
} else if is_space {
if s [0 ] == ',' {
addMatch (id +103 *n , l +2 , l , matches )
if s [1 ] == ' ' {
addMatch (id +33 *n , l +3 , l , matches )
}
} else if s [0 ] == '.' {
addMatch (id +71 *n , l +2 , l , matches )
if s [1 ] == ' ' {
addMatch (id +52 *n , l +3 , l , matches )
}
} else if s [0 ] == '=' {
if s [1 ] == '"' {
addMatch (id +81 *n , l +3 , l , matches )
} else if s [1 ] == '\'' {
addMatch (id +98 *n , l +3 , l , matches )
}
}
}
} else if is_space {
var is_all_caps bool = (w .transform != transformUppercaseFirst )
var s []byte
if !isMatch (dict .words , w , data [1 :], max_length -1 ) {
continue
}
var tmp int
if is_all_caps {
tmp = 85
} else {
tmp = 30
}
addMatch (id +uint (tmp )*n , l +1 , l , matches )
has_found_match = true
if l +2 >= max_length {
continue
}
s = data [l +1 :]
if s [0 ] == ' ' {
var tmp int
if is_all_caps {
tmp = 83
} else {
tmp = 15
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
} else if s [0 ] == ',' {
if !is_all_caps {
addMatch (id +109 *n , l +2 , l , matches )
}
if s [1 ] == ' ' {
var tmp int
if is_all_caps {
tmp = 111
} else {
tmp = 65
}
addMatch (id +uint (tmp )*n , l +3 , l , matches )
}
} else if s [0 ] == '.' {
var tmp int
if is_all_caps {
tmp = 115
} else {
tmp = 96
}
addMatch (id +uint (tmp )*n , l +2 , l , matches )
if s [1 ] == ' ' {
var tmp int
if is_all_caps {
tmp = 117
} else {
tmp = 91
}
addMatch (id +uint (tmp )*n , l +3 , l , matches )
}
} else if s [0 ] == '=' {
if s [1 ] == '"' {
var tmp int
if is_all_caps {
tmp = 110
} else {
tmp = 118
}
addMatch (id +uint (tmp )*n , l +3 , l , matches )
} else if s [1 ] == '\'' {
var tmp int
if is_all_caps {
tmp = 119
} else {
tmp = 120
}
addMatch (id +uint (tmp )*n , l +3 , l , matches )
}
}
}
}
}
if max_length >= 6 {
if (data [1 ] == ' ' && (data [0 ] == 'e' || data [0 ] == 's' || data [0 ] == ',' )) || (data [0 ] == 0xC2 && data [1 ] == 0xA0 ) {
var offset uint = uint (dict .buckets [hash (data [2 :])])
var end bool = offset == 0
for !end {
w := dict .dict_words [offset ]
offset ++
var l uint = uint (w .len ) & 0x1F
var n uint = uint (1 ) << dict .words .size_bits_by_length [l ]
var id uint = uint (w .idx )
end = !(w .len &0x80 == 0 )
w .len = byte (l )
if w .transform == 0 && isMatch (dict .words , w , data [2 :], max_length -2 ) {
if data [0 ] == 0xC2 {
addMatch (id +102 *n , l +2 , l , matches )
has_found_match = true
} else if l +2 < max_length && data [l +2 ] == ' ' {
var t uint = 13
if data [0 ] == 'e' {
t = 18
} else if data [0 ] == 's' {
t = 7
}
addMatch (id +t *n , l +3 , l , matches )
has_found_match = true
}
}
}
}
}
if max_length >= 9 {
if (data [0 ] == ' ' && data [1 ] == 't' && data [2 ] == 'h' && data [3 ] == 'e' && data [4 ] == ' ' ) || (data [0 ] == '.' && data [1 ] == 'c' && data [2 ] == 'o' && data [3 ] == 'm' && data [4 ] == '/' ) {
var offset uint = uint (dict .buckets [hash (data [5 :])])
var end bool = offset == 0
for !end {
w := dict .dict_words [offset ]
offset ++
var l uint = uint (w .len ) & 0x1F
var n uint = uint (1 ) << dict .words .size_bits_by_length [l ]
var id uint = uint (w .idx )
end = !(w .len &0x80 == 0 )
w .len = byte (l )
if w .transform == 0 && isMatch (dict .words , w , data [5 :], max_length -5 ) {
var tmp int
if data [0 ] == ' ' {
tmp = 41
} else {
tmp = 72
}
addMatch (id +uint (tmp )*n , l +5 , l , matches )
has_found_match = true
if l +5 < max_length {
var s []byte = data [l +5 :]
if data [0 ] == ' ' {
if l +8 < max_length && s [0 ] == ' ' && s [1 ] == 'o' && s [2 ] == 'f' && s [3 ] == ' ' {
addMatch (id +62 *n , l +9 , l , matches )
if l +12 < max_length && s [4 ] == 't' && s [5 ] == 'h' && s [6 ] == 'e' && s [7 ] == ' ' {
addMatch (id +73 *n , l +13 , l , matches )
}
}
}
}
}
}
}
}
return has_found_match
}
The pages are generated with Golds v0.6.7 . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds .