Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
162 lines
6.0 KiB
162 lines
6.0 KiB
package brotli |
|
|
|
/* Copyright 2015 Google Inc. All Rights Reserved. |
|
|
|
Distributed under MIT license. |
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
|
*/ |
|
|
|
/* Greedy block splitter for one block category (literal, command or distance). |
|
*/ |
|
type blockSplitterLiteral struct { |
|
alphabet_size_ uint |
|
min_block_size_ uint |
|
split_threshold_ float64 |
|
num_blocks_ uint |
|
split_ *blockSplit |
|
histograms_ []histogramLiteral |
|
histograms_size_ *uint |
|
target_block_size_ uint |
|
block_size_ uint |
|
curr_histogram_ix_ uint |
|
last_histogram_ix_ [2]uint |
|
last_entropy_ [2]float64 |
|
merge_last_count_ uint |
|
} |
|
|
|
func initBlockSplitterLiteral(self *blockSplitterLiteral, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramLiteral, histograms_size *uint) { |
|
var max_num_blocks uint = num_symbols/min_block_size + 1 |
|
var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1) |
|
/* We have to allocate one more histogram than the maximum number of block |
|
types for the current histogram when the meta-block is too big. */ |
|
self.alphabet_size_ = alphabet_size |
|
|
|
self.min_block_size_ = min_block_size |
|
self.split_threshold_ = split_threshold |
|
self.num_blocks_ = 0 |
|
self.split_ = split |
|
self.histograms_size_ = histograms_size |
|
self.target_block_size_ = min_block_size |
|
self.block_size_ = 0 |
|
self.curr_histogram_ix_ = 0 |
|
self.merge_last_count_ = 0 |
|
brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks) |
|
brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks) |
|
self.split_.num_blocks = max_num_blocks |
|
assert(*histograms == nil) |
|
*histograms_size = max_num_types |
|
*histograms = make([]histogramLiteral, (*histograms_size)) |
|
self.histograms_ = *histograms |
|
|
|
/* Clear only current histogram. */ |
|
histogramClearLiteral(&self.histograms_[0]) |
|
|
|
self.last_histogram_ix_[1] = 0 |
|
self.last_histogram_ix_[0] = self.last_histogram_ix_[1] |
|
} |
|
|
|
/* Does either of three things: |
|
(1) emits the current block with a new block type; |
|
(2) emits the current block with the type of the second last block; |
|
(3) merges the current block with the last block. */ |
|
func blockSplitterFinishBlockLiteral(self *blockSplitterLiteral, is_final bool) { |
|
var split *blockSplit = self.split_ |
|
var last_entropy []float64 = self.last_entropy_[:] |
|
var histograms []histogramLiteral = self.histograms_ |
|
self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_) |
|
if self.num_blocks_ == 0 { |
|
/* Create first block. */ |
|
split.lengths[0] = uint32(self.block_size_) |
|
|
|
split.types[0] = 0 |
|
last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_) |
|
last_entropy[1] = last_entropy[0] |
|
self.num_blocks_++ |
|
split.num_types++ |
|
self.curr_histogram_ix_++ |
|
if self.curr_histogram_ix_ < *self.histograms_size_ { |
|
histogramClearLiteral(&histograms[self.curr_histogram_ix_]) |
|
} |
|
self.block_size_ = 0 |
|
} else if self.block_size_ > 0 { |
|
var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_) |
|
var combined_histo [2]histogramLiteral |
|
var combined_entropy [2]float64 |
|
var diff [2]float64 |
|
var j uint |
|
for j = 0; j < 2; j++ { |
|
var last_histogram_ix uint = self.last_histogram_ix_[j] |
|
combined_histo[j] = histograms[self.curr_histogram_ix_] |
|
histogramAddHistogramLiteral(&combined_histo[j], &histograms[last_histogram_ix]) |
|
combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_) |
|
diff[j] = combined_entropy[j] - entropy - last_entropy[j] |
|
} |
|
|
|
if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ { |
|
/* Create new block. */ |
|
split.lengths[self.num_blocks_] = uint32(self.block_size_) |
|
|
|
split.types[self.num_blocks_] = byte(split.num_types) |
|
self.last_histogram_ix_[1] = self.last_histogram_ix_[0] |
|
self.last_histogram_ix_[0] = uint(byte(split.num_types)) |
|
last_entropy[1] = last_entropy[0] |
|
last_entropy[0] = entropy |
|
self.num_blocks_++ |
|
split.num_types++ |
|
self.curr_histogram_ix_++ |
|
if self.curr_histogram_ix_ < *self.histograms_size_ { |
|
histogramClearLiteral(&histograms[self.curr_histogram_ix_]) |
|
} |
|
self.block_size_ = 0 |
|
self.merge_last_count_ = 0 |
|
self.target_block_size_ = self.min_block_size_ |
|
} else if diff[1] < diff[0]-20.0 { |
|
split.lengths[self.num_blocks_] = uint32(self.block_size_) |
|
split.types[self.num_blocks_] = split.types[self.num_blocks_-2] |
|
/* Combine this block with second last block. */ |
|
|
|
var tmp uint = self.last_histogram_ix_[0] |
|
self.last_histogram_ix_[0] = self.last_histogram_ix_[1] |
|
self.last_histogram_ix_[1] = tmp |
|
histograms[self.last_histogram_ix_[0]] = combined_histo[1] |
|
last_entropy[1] = last_entropy[0] |
|
last_entropy[0] = combined_entropy[1] |
|
self.num_blocks_++ |
|
self.block_size_ = 0 |
|
histogramClearLiteral(&histograms[self.curr_histogram_ix_]) |
|
self.merge_last_count_ = 0 |
|
self.target_block_size_ = self.min_block_size_ |
|
} else { |
|
/* Combine this block with last block. */ |
|
split.lengths[self.num_blocks_-1] += uint32(self.block_size_) |
|
|
|
histograms[self.last_histogram_ix_[0]] = combined_histo[0] |
|
last_entropy[0] = combined_entropy[0] |
|
if split.num_types == 1 { |
|
last_entropy[1] = last_entropy[0] |
|
} |
|
|
|
self.block_size_ = 0 |
|
histogramClearLiteral(&histograms[self.curr_histogram_ix_]) |
|
self.merge_last_count_++ |
|
if self.merge_last_count_ > 1 { |
|
self.target_block_size_ += self.min_block_size_ |
|
} |
|
} |
|
} |
|
|
|
if is_final { |
|
*self.histograms_size_ = split.num_types |
|
split.num_blocks = self.num_blocks_ |
|
} |
|
} |
|
|
|
/* Adds the next symbol to the current histogram. When the current histogram |
|
reaches the target size, decides on merging the block. */ |
|
func blockSplitterAddSymbolLiteral(self *blockSplitterLiteral, symbol uint) { |
|
histogramAddLiteral(&self.histograms_[self.curr_histogram_ix_], symbol) |
|
self.block_size_++ |
|
if self.block_size_ == self.target_block_size_ { |
|
blockSplitterFinishBlockLiteral(self, false) /* is_final = */ |
|
} |
|
}
|
|
|