Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
344 lines
9.2 KiB
344 lines
9.2 KiB
package brotli |
|
|
|
import ( |
|
"encoding/binary" |
|
"fmt" |
|
) |
|
|
|
type hasherCommon struct { |
|
params hasherParams |
|
is_prepared_ bool |
|
dict_num_lookups uint |
|
dict_num_matches uint |
|
} |
|
|
|
func (h *hasherCommon) Common() *hasherCommon { |
|
return h |
|
} |
|
|
|
type hasherHandle interface { |
|
Common() *hasherCommon |
|
Initialize(params *encoderParams) |
|
Prepare(one_shot bool, input_size uint, data []byte) |
|
StitchToPreviousBlock(num_bytes uint, position uint, ringbuffer []byte, ringbuffer_mask uint) |
|
HashTypeLength() uint |
|
StoreLookahead() uint |
|
PrepareDistanceCache(distance_cache []int) |
|
FindLongestMatch(dictionary *encoderDictionary, data []byte, ring_buffer_mask uint, distance_cache []int, cur_ix uint, max_length uint, max_backward uint, gap uint, max_distance uint, out *hasherSearchResult) |
|
StoreRange(data []byte, mask uint, ix_start uint, ix_end uint) |
|
Store(data []byte, mask uint, ix uint) |
|
} |
|
|
|
type score_t uint |
|
|
|
const kCutoffTransformsCount uint32 = 10 |
|
|
|
/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */ |
|
/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */ |
|
const kCutoffTransforms uint64 = 0x071B520ADA2D3200 |
|
|
|
type hasherSearchResult struct { |
|
len uint |
|
distance uint |
|
score uint |
|
len_code_delta int |
|
} |
|
|
|
/* kHashMul32 multiplier has these properties: |
|
* The multiplier must be odd. Otherwise we may lose the highest bit. |
|
* No long streaks of ones or zeros. |
|
* There is no effort to ensure that it is a prime, the oddity is enough |
|
for this use. |
|
* The number has been tuned heuristically against compression benchmarks. */ |
|
const kHashMul32 uint32 = 0x1E35A7BD |
|
|
|
const kHashMul64 uint64 = 0x1E35A7BD1E35A7BD |
|
|
|
const kHashMul64Long uint64 = 0x1FE35A7BD3579BD3 |
|
|
|
func hash14(data []byte) uint32 { |
|
var h uint32 = binary.LittleEndian.Uint32(data) * kHashMul32 |
|
|
|
/* The higher bits contain more mixture from the multiplication, |
|
so we take our results from there. */ |
|
return h >> (32 - 14) |
|
} |
|
|
|
func prepareDistanceCache(distance_cache []int, num_distances int) { |
|
if num_distances > 4 { |
|
var last_distance int = distance_cache[0] |
|
distance_cache[4] = last_distance - 1 |
|
distance_cache[5] = last_distance + 1 |
|
distance_cache[6] = last_distance - 2 |
|
distance_cache[7] = last_distance + 2 |
|
distance_cache[8] = last_distance - 3 |
|
distance_cache[9] = last_distance + 3 |
|
if num_distances > 10 { |
|
var next_last_distance int = distance_cache[1] |
|
distance_cache[10] = next_last_distance - 1 |
|
distance_cache[11] = next_last_distance + 1 |
|
distance_cache[12] = next_last_distance - 2 |
|
distance_cache[13] = next_last_distance + 2 |
|
distance_cache[14] = next_last_distance - 3 |
|
distance_cache[15] = next_last_distance + 3 |
|
} |
|
} |
|
} |
|
|
|
const literalByteScore = 135 |
|
|
|
const distanceBitPenalty = 30 |
|
|
|
/* Score must be positive after applying maximal penalty. */ |
|
const scoreBase = (distanceBitPenalty * 8 * 8) |
|
|
|
/* Usually, we always choose the longest backward reference. This function |
|
allows for the exception of that rule. |
|
|
|
If we choose a backward reference that is further away, it will |
|
usually be coded with more bits. We approximate this by assuming |
|
log2(distance). If the distance can be expressed in terms of the |
|
last four distances, we use some heuristic constants to estimate |
|
the bits cost. For the first up to four literals we use the bit |
|
cost of the literals from the literal cost model, after that we |
|
use the average bit cost of the cost model. |
|
|
|
This function is used to sometimes discard a longer backward reference |
|
when it is not much longer and the bit cost for encoding it is more |
|
than the saved literals. |
|
|
|
backward_reference_offset MUST be positive. */ |
|
func backwardReferenceScore(copy_length uint, backward_reference_offset uint) uint { |
|
return scoreBase + literalByteScore*uint(copy_length) - distanceBitPenalty*uint(log2FloorNonZero(backward_reference_offset)) |
|
} |
|
|
|
func backwardReferenceScoreUsingLastDistance(copy_length uint) uint { |
|
return literalByteScore*uint(copy_length) + scoreBase + 15 |
|
} |
|
|
|
func backwardReferencePenaltyUsingLastDistance(distance_short_code uint) uint { |
|
return uint(39) + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE) |
|
} |
|
|
|
func testStaticDictionaryItem(dictionary *encoderDictionary, item uint, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult) bool { |
|
var len uint |
|
var word_idx uint |
|
var offset uint |
|
var matchlen uint |
|
var backward uint |
|
var score uint |
|
len = item & 0x1F |
|
word_idx = item >> 5 |
|
offset = uint(dictionary.words.offsets_by_length[len]) + len*word_idx |
|
if len > max_length { |
|
return false |
|
} |
|
|
|
matchlen = findMatchLengthWithLimit(data, dictionary.words.data[offset:], uint(len)) |
|
if matchlen+uint(dictionary.cutoffTransformsCount) <= len || matchlen == 0 { |
|
return false |
|
} |
|
{ |
|
var cut uint = len - matchlen |
|
var transform_id uint = (cut << 2) + uint((dictionary.cutoffTransforms>>(cut*6))&0x3F) |
|
backward = max_backward + 1 + word_idx + (transform_id << dictionary.words.size_bits_by_length[len]) |
|
} |
|
|
|
if backward > max_distance { |
|
return false |
|
} |
|
|
|
score = backwardReferenceScore(matchlen, backward) |
|
if score < out.score { |
|
return false |
|
} |
|
|
|
out.len = matchlen |
|
out.len_code_delta = int(len) - int(matchlen) |
|
out.distance = backward |
|
out.score = score |
|
return true |
|
} |
|
|
|
func searchInStaticDictionary(dictionary *encoderDictionary, handle hasherHandle, data []byte, max_length uint, max_backward uint, max_distance uint, out *hasherSearchResult, shallow bool) { |
|
var key uint |
|
var i uint |
|
var self *hasherCommon = handle.Common() |
|
if self.dict_num_matches < self.dict_num_lookups>>7 { |
|
return |
|
} |
|
|
|
key = uint(hash14(data) << 1) |
|
for i = 0; ; (func() { i++; key++ })() { |
|
var tmp uint |
|
if shallow { |
|
tmp = 1 |
|
} else { |
|
tmp = 2 |
|
} |
|
if i >= tmp { |
|
break |
|
} |
|
var item uint = uint(dictionary.hash_table[key]) |
|
self.dict_num_lookups++ |
|
if item != 0 { |
|
var item_matches bool = testStaticDictionaryItem(dictionary, item, data, max_length, max_backward, max_distance, out) |
|
if item_matches { |
|
self.dict_num_matches++ |
|
} |
|
} |
|
} |
|
} |
|
|
|
type backwardMatch struct { |
|
distance uint32 |
|
length_and_code uint32 |
|
} |
|
|
|
func initBackwardMatch(self *backwardMatch, dist uint, len uint) { |
|
self.distance = uint32(dist) |
|
self.length_and_code = uint32(len << 5) |
|
} |
|
|
|
func initDictionaryBackwardMatch(self *backwardMatch, dist uint, len uint, len_code uint) { |
|
self.distance = uint32(dist) |
|
var tmp uint |
|
if len == len_code { |
|
tmp = 0 |
|
} else { |
|
tmp = len_code |
|
} |
|
self.length_and_code = uint32(len<<5 | tmp) |
|
} |
|
|
|
func backwardMatchLength(self *backwardMatch) uint { |
|
return uint(self.length_and_code >> 5) |
|
} |
|
|
|
func backwardMatchLengthCode(self *backwardMatch) uint { |
|
var code uint = uint(self.length_and_code) & 31 |
|
if code != 0 { |
|
return code |
|
} else { |
|
return backwardMatchLength(self) |
|
} |
|
} |
|
|
|
func hasherReset(handle hasherHandle) { |
|
if handle == nil { |
|
return |
|
} |
|
handle.Common().is_prepared_ = false |
|
} |
|
|
|
func newHasher(typ int) hasherHandle { |
|
switch typ { |
|
case 2: |
|
return &hashLongestMatchQuickly{ |
|
bucketBits: 16, |
|
bucketSweep: 1, |
|
hashLen: 5, |
|
useDictionary: true, |
|
} |
|
case 3: |
|
return &hashLongestMatchQuickly{ |
|
bucketBits: 16, |
|
bucketSweep: 2, |
|
hashLen: 5, |
|
useDictionary: false, |
|
} |
|
case 4: |
|
return &hashLongestMatchQuickly{ |
|
bucketBits: 17, |
|
bucketSweep: 4, |
|
hashLen: 5, |
|
useDictionary: true, |
|
} |
|
case 5: |
|
return new(h5) |
|
case 6: |
|
return new(h6) |
|
case 10: |
|
return new(h10) |
|
case 35: |
|
return &hashComposite{ |
|
ha: newHasher(3), |
|
hb: &hashRolling{jump: 4}, |
|
} |
|
case 40: |
|
return &hashForgetfulChain{ |
|
bucketBits: 15, |
|
numBanks: 1, |
|
bankBits: 16, |
|
numLastDistancesToCheck: 4, |
|
} |
|
case 41: |
|
return &hashForgetfulChain{ |
|
bucketBits: 15, |
|
numBanks: 1, |
|
bankBits: 16, |
|
numLastDistancesToCheck: 10, |
|
} |
|
case 42: |
|
return &hashForgetfulChain{ |
|
bucketBits: 15, |
|
numBanks: 512, |
|
bankBits: 9, |
|
numLastDistancesToCheck: 16, |
|
} |
|
case 54: |
|
return &hashLongestMatchQuickly{ |
|
bucketBits: 20, |
|
bucketSweep: 4, |
|
hashLen: 7, |
|
useDictionary: false, |
|
} |
|
case 55: |
|
return &hashComposite{ |
|
ha: newHasher(54), |
|
hb: &hashRolling{jump: 4}, |
|
} |
|
case 65: |
|
return &hashComposite{ |
|
ha: newHasher(6), |
|
hb: &hashRolling{jump: 1}, |
|
} |
|
} |
|
|
|
panic(fmt.Sprintf("unknown hasher type: %d", typ)) |
|
} |
|
|
|
func hasherSetup(handle *hasherHandle, params *encoderParams, data []byte, position uint, input_size uint, is_last bool) { |
|
var self hasherHandle = nil |
|
var common *hasherCommon = nil |
|
var one_shot bool = (position == 0 && is_last) |
|
if *handle == nil { |
|
chooseHasher(params, ¶ms.hasher) |
|
self = newHasher(params.hasher.type_) |
|
|
|
*handle = self |
|
common = self.Common() |
|
common.params = params.hasher |
|
self.Initialize(params) |
|
} |
|
|
|
self = *handle |
|
common = self.Common() |
|
if !common.is_prepared_ { |
|
self.Prepare(one_shot, input_size, data) |
|
|
|
if position == 0 { |
|
common.dict_num_lookups = 0 |
|
common.dict_num_matches = 0 |
|
} |
|
|
|
common.is_prepared_ = true |
|
} |
|
} |
|
|
|
func initOrStitchToPreviousBlock(handle *hasherHandle, data []byte, mask uint, params *encoderParams, position uint, input_size uint, is_last bool) { |
|
var self hasherHandle |
|
hasherSetup(handle, params, data, position, input_size, is_last) |
|
self = *handle |
|
self.StitchToPreviousBlock(input_size, position, data, mask) |
|
}
|
|
|