Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
436 lines
11 KiB
436 lines
11 KiB
package brotli |
|
|
|
/* Copyright 2013 Google Inc. All Rights Reserved. |
|
|
|
Distributed under MIT license. |
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
|
*/ |
|
|
|
/* Functions to estimate the bit cost of Huffman trees. */ |
|
func shannonEntropy(population []uint32, size uint, total *uint) float64 { |
|
var sum uint = 0 |
|
var retval float64 = 0 |
|
var population_end []uint32 = population[size:] |
|
var p uint |
|
for -cap(population) < -cap(population_end) { |
|
p = uint(population[0]) |
|
population = population[1:] |
|
sum += p |
|
retval -= float64(p) * fastLog2(p) |
|
} |
|
|
|
if sum != 0 { |
|
retval += float64(sum) * fastLog2(sum) |
|
} |
|
*total = sum |
|
return retval |
|
} |
|
|
|
func bitsEntropy(population []uint32, size uint) float64 { |
|
var sum uint |
|
var retval float64 = shannonEntropy(population, size, &sum) |
|
if retval < float64(sum) { |
|
/* At least one bit per literal is needed. */ |
|
retval = float64(sum) |
|
} |
|
|
|
return retval |
|
} |
|
|
|
const kOneSymbolHistogramCost float64 = 12 |
|
const kTwoSymbolHistogramCost float64 = 20 |
|
const kThreeSymbolHistogramCost float64 = 28 |
|
const kFourSymbolHistogramCost float64 = 37 |
|
|
|
func populationCostLiteral(histogram *histogramLiteral) float64 { |
|
var data_size uint = histogramDataSizeLiteral() |
|
var count int = 0 |
|
var s [5]uint |
|
var bits float64 = 0.0 |
|
var i uint |
|
if histogram.total_count_ == 0 { |
|
return kOneSymbolHistogramCost |
|
} |
|
|
|
for i = 0; i < data_size; i++ { |
|
if histogram.data_[i] > 0 { |
|
s[count] = i |
|
count++ |
|
if count > 4 { |
|
break |
|
} |
|
} |
|
} |
|
|
|
if count == 1 { |
|
return kOneSymbolHistogramCost |
|
} |
|
|
|
if count == 2 { |
|
return kTwoSymbolHistogramCost + float64(histogram.total_count_) |
|
} |
|
|
|
if count == 3 { |
|
var histo0 uint32 = histogram.data_[s[0]] |
|
var histo1 uint32 = histogram.data_[s[1]] |
|
var histo2 uint32 = histogram.data_[s[2]] |
|
var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) |
|
return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) |
|
} |
|
|
|
if count == 4 { |
|
var histo [4]uint32 |
|
var h23 uint32 |
|
var histomax uint32 |
|
for i = 0; i < 4; i++ { |
|
histo[i] = histogram.data_[s[i]] |
|
} |
|
|
|
/* Sort */ |
|
for i = 0; i < 4; i++ { |
|
var j uint |
|
for j = i + 1; j < 4; j++ { |
|
if histo[j] > histo[i] { |
|
var tmp uint32 = histo[j] |
|
histo[j] = histo[i] |
|
histo[i] = tmp |
|
} |
|
} |
|
} |
|
|
|
h23 = histo[2] + histo[3] |
|
histomax = brotli_max_uint32_t(h23, histo[0]) |
|
return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) |
|
} |
|
{ |
|
var max_depth uint = 1 |
|
var depth_histo = [codeLengthCodes]uint32{0} |
|
/* In this loop we compute the entropy of the histogram and simultaneously |
|
build a simplified histogram of the code length codes where we use the |
|
zero repeat code 17, but we don't use the non-zero repeat code 16. */ |
|
|
|
var log2total float64 = fastLog2(histogram.total_count_) |
|
for i = 0; i < data_size; { |
|
if histogram.data_[i] > 0 { |
|
var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) |
|
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
|
= log2(total_count) - log2(count(symbol)) */ |
|
|
|
var depth uint = uint(log2p + 0.5) |
|
/* Approximate the bit depth by round(-log2(P(symbol))) */ |
|
bits += float64(histogram.data_[i]) * log2p |
|
|
|
if depth > 15 { |
|
depth = 15 |
|
} |
|
|
|
if depth > max_depth { |
|
max_depth = depth |
|
} |
|
|
|
depth_histo[depth]++ |
|
i++ |
|
} else { |
|
var reps uint32 = 1 |
|
/* Compute the run length of zeros and add the appropriate number of 0 |
|
and 17 code length codes to the code length code histogram. */ |
|
|
|
var k uint |
|
for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { |
|
reps++ |
|
} |
|
|
|
i += uint(reps) |
|
if i == data_size { |
|
/* Don't add any cost for the last zero run, since these are encoded |
|
only implicitly. */ |
|
break |
|
} |
|
|
|
if reps < 3 { |
|
depth_histo[0] += reps |
|
} else { |
|
reps -= 2 |
|
for reps > 0 { |
|
depth_histo[repeatZeroCodeLength]++ |
|
|
|
/* Add the 3 extra bits for the 17 code length code. */ |
|
bits += 3 |
|
|
|
reps >>= 3 |
|
} |
|
} |
|
} |
|
} |
|
|
|
/* Add the estimated encoding cost of the code length code histogram. */ |
|
bits += float64(18 + 2*max_depth) |
|
|
|
/* Add the entropy of the code length code histogram. */ |
|
bits += bitsEntropy(depth_histo[:], codeLengthCodes) |
|
} |
|
|
|
return bits |
|
} |
|
|
|
func populationCostCommand(histogram *histogramCommand) float64 { |
|
var data_size uint = histogramDataSizeCommand() |
|
var count int = 0 |
|
var s [5]uint |
|
var bits float64 = 0.0 |
|
var i uint |
|
if histogram.total_count_ == 0 { |
|
return kOneSymbolHistogramCost |
|
} |
|
|
|
for i = 0; i < data_size; i++ { |
|
if histogram.data_[i] > 0 { |
|
s[count] = i |
|
count++ |
|
if count > 4 { |
|
break |
|
} |
|
} |
|
} |
|
|
|
if count == 1 { |
|
return kOneSymbolHistogramCost |
|
} |
|
|
|
if count == 2 { |
|
return kTwoSymbolHistogramCost + float64(histogram.total_count_) |
|
} |
|
|
|
if count == 3 { |
|
var histo0 uint32 = histogram.data_[s[0]] |
|
var histo1 uint32 = histogram.data_[s[1]] |
|
var histo2 uint32 = histogram.data_[s[2]] |
|
var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) |
|
return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) |
|
} |
|
|
|
if count == 4 { |
|
var histo [4]uint32 |
|
var h23 uint32 |
|
var histomax uint32 |
|
for i = 0; i < 4; i++ { |
|
histo[i] = histogram.data_[s[i]] |
|
} |
|
|
|
/* Sort */ |
|
for i = 0; i < 4; i++ { |
|
var j uint |
|
for j = i + 1; j < 4; j++ { |
|
if histo[j] > histo[i] { |
|
var tmp uint32 = histo[j] |
|
histo[j] = histo[i] |
|
histo[i] = tmp |
|
} |
|
} |
|
} |
|
|
|
h23 = histo[2] + histo[3] |
|
histomax = brotli_max_uint32_t(h23, histo[0]) |
|
return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) |
|
} |
|
{ |
|
var max_depth uint = 1 |
|
var depth_histo = [codeLengthCodes]uint32{0} |
|
/* In this loop we compute the entropy of the histogram and simultaneously |
|
build a simplified histogram of the code length codes where we use the |
|
zero repeat code 17, but we don't use the non-zero repeat code 16. */ |
|
|
|
var log2total float64 = fastLog2(histogram.total_count_) |
|
for i = 0; i < data_size; { |
|
if histogram.data_[i] > 0 { |
|
var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) |
|
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
|
= log2(total_count) - log2(count(symbol)) */ |
|
|
|
var depth uint = uint(log2p + 0.5) |
|
/* Approximate the bit depth by round(-log2(P(symbol))) */ |
|
bits += float64(histogram.data_[i]) * log2p |
|
|
|
if depth > 15 { |
|
depth = 15 |
|
} |
|
|
|
if depth > max_depth { |
|
max_depth = depth |
|
} |
|
|
|
depth_histo[depth]++ |
|
i++ |
|
} else { |
|
var reps uint32 = 1 |
|
/* Compute the run length of zeros and add the appropriate number of 0 |
|
and 17 code length codes to the code length code histogram. */ |
|
|
|
var k uint |
|
for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { |
|
reps++ |
|
} |
|
|
|
i += uint(reps) |
|
if i == data_size { |
|
/* Don't add any cost for the last zero run, since these are encoded |
|
only implicitly. */ |
|
break |
|
} |
|
|
|
if reps < 3 { |
|
depth_histo[0] += reps |
|
} else { |
|
reps -= 2 |
|
for reps > 0 { |
|
depth_histo[repeatZeroCodeLength]++ |
|
|
|
/* Add the 3 extra bits for the 17 code length code. */ |
|
bits += 3 |
|
|
|
reps >>= 3 |
|
} |
|
} |
|
} |
|
} |
|
|
|
/* Add the estimated encoding cost of the code length code histogram. */ |
|
bits += float64(18 + 2*max_depth) |
|
|
|
/* Add the entropy of the code length code histogram. */ |
|
bits += bitsEntropy(depth_histo[:], codeLengthCodes) |
|
} |
|
|
|
return bits |
|
} |
|
|
|
func populationCostDistance(histogram *histogramDistance) float64 { |
|
var data_size uint = histogramDataSizeDistance() |
|
var count int = 0 |
|
var s [5]uint |
|
var bits float64 = 0.0 |
|
var i uint |
|
if histogram.total_count_ == 0 { |
|
return kOneSymbolHistogramCost |
|
} |
|
|
|
for i = 0; i < data_size; i++ { |
|
if histogram.data_[i] > 0 { |
|
s[count] = i |
|
count++ |
|
if count > 4 { |
|
break |
|
} |
|
} |
|
} |
|
|
|
if count == 1 { |
|
return kOneSymbolHistogramCost |
|
} |
|
|
|
if count == 2 { |
|
return kTwoSymbolHistogramCost + float64(histogram.total_count_) |
|
} |
|
|
|
if count == 3 { |
|
var histo0 uint32 = histogram.data_[s[0]] |
|
var histo1 uint32 = histogram.data_[s[1]] |
|
var histo2 uint32 = histogram.data_[s[2]] |
|
var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2)) |
|
return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax) |
|
} |
|
|
|
if count == 4 { |
|
var histo [4]uint32 |
|
var h23 uint32 |
|
var histomax uint32 |
|
for i = 0; i < 4; i++ { |
|
histo[i] = histogram.data_[s[i]] |
|
} |
|
|
|
/* Sort */ |
|
for i = 0; i < 4; i++ { |
|
var j uint |
|
for j = i + 1; j < 4; j++ { |
|
if histo[j] > histo[i] { |
|
var tmp uint32 = histo[j] |
|
histo[j] = histo[i] |
|
histo[i] = tmp |
|
} |
|
} |
|
} |
|
|
|
h23 = histo[2] + histo[3] |
|
histomax = brotli_max_uint32_t(h23, histo[0]) |
|
return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax) |
|
} |
|
{ |
|
var max_depth uint = 1 |
|
var depth_histo = [codeLengthCodes]uint32{0} |
|
/* In this loop we compute the entropy of the histogram and simultaneously |
|
build a simplified histogram of the code length codes where we use the |
|
zero repeat code 17, but we don't use the non-zero repeat code 16. */ |
|
|
|
var log2total float64 = fastLog2(histogram.total_count_) |
|
for i = 0; i < data_size; { |
|
if histogram.data_[i] > 0 { |
|
var log2p float64 = log2total - fastLog2(uint(histogram.data_[i])) |
|
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
|
= log2(total_count) - log2(count(symbol)) */ |
|
|
|
var depth uint = uint(log2p + 0.5) |
|
/* Approximate the bit depth by round(-log2(P(symbol))) */ |
|
bits += float64(histogram.data_[i]) * log2p |
|
|
|
if depth > 15 { |
|
depth = 15 |
|
} |
|
|
|
if depth > max_depth { |
|
max_depth = depth |
|
} |
|
|
|
depth_histo[depth]++ |
|
i++ |
|
} else { |
|
var reps uint32 = 1 |
|
/* Compute the run length of zeros and add the appropriate number of 0 |
|
and 17 code length codes to the code length code histogram. */ |
|
|
|
var k uint |
|
for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ { |
|
reps++ |
|
} |
|
|
|
i += uint(reps) |
|
if i == data_size { |
|
/* Don't add any cost for the last zero run, since these are encoded |
|
only implicitly. */ |
|
break |
|
} |
|
|
|
if reps < 3 { |
|
depth_histo[0] += reps |
|
} else { |
|
reps -= 2 |
|
for reps > 0 { |
|
depth_histo[repeatZeroCodeLength]++ |
|
|
|
/* Add the 3 extra bits for the 17 code length code. */ |
|
bits += 3 |
|
|
|
reps >>= 3 |
|
} |
|
} |
|
} |
|
} |
|
|
|
/* Add the estimated encoding cost of the code length code histogram. */ |
|
bits += float64(18 + 2*max_depth) |
|
|
|
/* Add the entropy of the code length code histogram. */ |
|
bits += bitsEntropy(depth_histo[:], codeLengthCodes) |
|
} |
|
|
|
return bits |
|
}
|
|
|