Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
651 lines
16 KiB
651 lines
16 KiB
package huff0 |
|
|
|
import ( |
|
"fmt" |
|
"runtime" |
|
"sync" |
|
) |
|
|
|
// Compress1X will compress the input. |
|
// The output can be decoded using Decompress1X. |
|
// Supply a Scratch object. The scratch object contains state about re-use, |
|
// So when sharing across independent encodes, be sure to set the re-use policy. |
|
func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) { |
|
s, err = s.prepare(in) |
|
if err != nil { |
|
return nil, false, err |
|
} |
|
return compress(in, s, s.compress1X) |
|
} |
|
|
|
// Compress4X will compress the input. The input is split into 4 independent blocks |
|
// and compressed similar to Compress1X. |
|
// The output can be decoded using Decompress4X. |
|
// Supply a Scratch object. The scratch object contains state about re-use, |
|
// So when sharing across independent encodes, be sure to set the re-use policy. |
|
func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) { |
|
s, err = s.prepare(in) |
|
if err != nil { |
|
return nil, false, err |
|
} |
|
if false { |
|
// TODO: compress4Xp only slightly faster. |
|
const parallelThreshold = 8 << 10 |
|
if len(in) < parallelThreshold || runtime.GOMAXPROCS(0) == 1 { |
|
return compress(in, s, s.compress4X) |
|
} |
|
return compress(in, s, s.compress4Xp) |
|
} |
|
return compress(in, s, s.compress4X) |
|
} |
|
|
|
func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) { |
|
// Nuke previous table if we cannot reuse anyway. |
|
if s.Reuse == ReusePolicyNone { |
|
s.prevTable = s.prevTable[:0] |
|
} |
|
|
|
// Create histogram, if none was provided. |
|
maxCount := s.maxCount |
|
var canReuse = false |
|
if maxCount == 0 { |
|
maxCount, canReuse = s.countSimple(in) |
|
} else { |
|
canReuse = s.canUseTable(s.prevTable) |
|
} |
|
|
|
// We want the output size to be less than this: |
|
wantSize := len(in) |
|
if s.WantLogLess > 0 { |
|
wantSize -= wantSize >> s.WantLogLess |
|
} |
|
|
|
// Reset for next run. |
|
s.clearCount = true |
|
s.maxCount = 0 |
|
if maxCount >= len(in) { |
|
if maxCount > len(in) { |
|
return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in)) |
|
} |
|
if len(in) == 1 { |
|
return nil, false, ErrIncompressible |
|
} |
|
// One symbol, use RLE |
|
return nil, false, ErrUseRLE |
|
} |
|
if maxCount == 1 || maxCount < (len(in)>>7) { |
|
// Each symbol present maximum once or too well distributed. |
|
return nil, false, ErrIncompressible |
|
} |
|
|
|
if s.Reuse == ReusePolicyPrefer && canReuse { |
|
keepTable := s.cTable |
|
keepTL := s.actualTableLog |
|
s.cTable = s.prevTable |
|
s.actualTableLog = s.prevTableLog |
|
s.Out, err = compressor(in) |
|
s.cTable = keepTable |
|
s.actualTableLog = keepTL |
|
if err == nil && len(s.Out) < wantSize { |
|
s.OutData = s.Out |
|
return s.Out, true, nil |
|
} |
|
// Do not attempt to re-use later. |
|
s.prevTable = s.prevTable[:0] |
|
} |
|
|
|
// Calculate new table. |
|
err = s.buildCTable() |
|
if err != nil { |
|
return nil, false, err |
|
} |
|
|
|
if false && !s.canUseTable(s.cTable) { |
|
panic("invalid table generated") |
|
} |
|
|
|
if s.Reuse == ReusePolicyAllow && canReuse { |
|
hSize := len(s.Out) |
|
oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen]) |
|
newSize := s.cTable.estimateSize(s.count[:s.symbolLen]) |
|
if oldSize <= hSize+newSize || hSize+12 >= wantSize { |
|
// Retain cTable even if we re-use. |
|
keepTable := s.cTable |
|
keepTL := s.actualTableLog |
|
|
|
s.cTable = s.prevTable |
|
s.actualTableLog = s.prevTableLog |
|
s.Out, err = compressor(in) |
|
|
|
// Restore ctable. |
|
s.cTable = keepTable |
|
s.actualTableLog = keepTL |
|
if err != nil { |
|
return nil, false, err |
|
} |
|
if len(s.Out) >= wantSize { |
|
return nil, false, ErrIncompressible |
|
} |
|
s.OutData = s.Out |
|
return s.Out, true, nil |
|
} |
|
} |
|
|
|
// Use new table |
|
err = s.cTable.write(s) |
|
if err != nil { |
|
s.OutTable = nil |
|
return nil, false, err |
|
} |
|
s.OutTable = s.Out |
|
|
|
// Compress using new table |
|
s.Out, err = compressor(in) |
|
if err != nil { |
|
s.OutTable = nil |
|
return nil, false, err |
|
} |
|
if len(s.Out) >= wantSize { |
|
s.OutTable = nil |
|
return nil, false, ErrIncompressible |
|
} |
|
// Move current table into previous. |
|
s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0] |
|
s.OutData = s.Out[len(s.OutTable):] |
|
return s.Out, false, nil |
|
} |
|
|
|
func (s *Scratch) compress1X(src []byte) ([]byte, error) { |
|
return s.compress1xDo(s.Out, src) |
|
} |
|
|
|
func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { |
|
var bw = bitWriter{out: dst} |
|
|
|
// N is length divisible by 4. |
|
n := len(src) |
|
n -= n & 3 |
|
cTable := s.cTable[:256] |
|
|
|
// Encode last bytes. |
|
for i := len(src) & 3; i > 0; i-- { |
|
bw.encSymbol(cTable, src[n+i-1]) |
|
} |
|
n -= 4 |
|
if s.actualTableLog <= 8 { |
|
for ; n >= 0; n -= 4 { |
|
tmp := src[n : n+4] |
|
// tmp should be len 4 |
|
bw.flush32() |
|
bw.encTwoSymbols(cTable, tmp[3], tmp[2]) |
|
bw.encTwoSymbols(cTable, tmp[1], tmp[0]) |
|
} |
|
} else { |
|
for ; n >= 0; n -= 4 { |
|
tmp := src[n : n+4] |
|
// tmp should be len 4 |
|
bw.flush32() |
|
bw.encTwoSymbols(cTable, tmp[3], tmp[2]) |
|
bw.flush32() |
|
bw.encTwoSymbols(cTable, tmp[1], tmp[0]) |
|
} |
|
} |
|
err := bw.close() |
|
return bw.out, err |
|
} |
|
|
|
var sixZeros [6]byte |
|
|
|
func (s *Scratch) compress4X(src []byte) ([]byte, error) { |
|
if len(src) < 12 { |
|
return nil, ErrIncompressible |
|
} |
|
segmentSize := (len(src) + 3) / 4 |
|
|
|
// Add placeholder for output length |
|
offsetIdx := len(s.Out) |
|
s.Out = append(s.Out, sixZeros[:]...) |
|
|
|
for i := 0; i < 4; i++ { |
|
toDo := src |
|
if len(toDo) > segmentSize { |
|
toDo = toDo[:segmentSize] |
|
} |
|
src = src[len(toDo):] |
|
|
|
var err error |
|
idx := len(s.Out) |
|
s.Out, err = s.compress1xDo(s.Out, toDo) |
|
if err != nil { |
|
return nil, err |
|
} |
|
// Write compressed length as little endian before block. |
|
if i < 3 { |
|
// Last length is not written. |
|
length := len(s.Out) - idx |
|
s.Out[i*2+offsetIdx] = byte(length) |
|
s.Out[i*2+offsetIdx+1] = byte(length >> 8) |
|
} |
|
} |
|
|
|
return s.Out, nil |
|
} |
|
|
|
// compress4Xp will compress 4 streams using separate goroutines. |
|
func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { |
|
if len(src) < 12 { |
|
return nil, ErrIncompressible |
|
} |
|
// Add placeholder for output length |
|
s.Out = s.Out[:6] |
|
|
|
segmentSize := (len(src) + 3) / 4 |
|
var wg sync.WaitGroup |
|
var errs [4]error |
|
wg.Add(4) |
|
for i := 0; i < 4; i++ { |
|
toDo := src |
|
if len(toDo) > segmentSize { |
|
toDo = toDo[:segmentSize] |
|
} |
|
src = src[len(toDo):] |
|
|
|
// Separate goroutine for each block. |
|
go func(i int) { |
|
s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) |
|
wg.Done() |
|
}(i) |
|
} |
|
wg.Wait() |
|
for i := 0; i < 4; i++ { |
|
if errs[i] != nil { |
|
return nil, errs[i] |
|
} |
|
o := s.tmpOut[i] |
|
// Write compressed length as little endian before block. |
|
if i < 3 { |
|
// Last length is not written. |
|
s.Out[i*2] = byte(len(o)) |
|
s.Out[i*2+1] = byte(len(o) >> 8) |
|
} |
|
|
|
// Write output. |
|
s.Out = append(s.Out, o...) |
|
} |
|
return s.Out, nil |
|
} |
|
|
|
// countSimple will create a simple histogram in s.count. |
|
// Returns the biggest count. |
|
// Does not update s.clearCount. |
|
func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { |
|
reuse = true |
|
for _, v := range in { |
|
s.count[v]++ |
|
} |
|
m := uint32(0) |
|
if len(s.prevTable) > 0 { |
|
for i, v := range s.count[:] { |
|
if v > m { |
|
m = v |
|
} |
|
if v > 0 { |
|
s.symbolLen = uint16(i) + 1 |
|
if i >= len(s.prevTable) { |
|
reuse = false |
|
} else { |
|
if s.prevTable[i].nBits == 0 { |
|
reuse = false |
|
} |
|
} |
|
} |
|
} |
|
return int(m), reuse |
|
} |
|
for i, v := range s.count[:] { |
|
if v > m { |
|
m = v |
|
} |
|
if v > 0 { |
|
s.symbolLen = uint16(i) + 1 |
|
} |
|
} |
|
return int(m), false |
|
} |
|
|
|
func (s *Scratch) canUseTable(c cTable) bool { |
|
if len(c) < int(s.symbolLen) { |
|
return false |
|
} |
|
for i, v := range s.count[:s.symbolLen] { |
|
if v != 0 && c[i].nBits == 0 { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
func (s *Scratch) validateTable(c cTable) bool { |
|
if len(c) < int(s.symbolLen) { |
|
return false |
|
} |
|
for i, v := range s.count[:s.symbolLen] { |
|
if v != 0 { |
|
if c[i].nBits == 0 { |
|
return false |
|
} |
|
if c[i].nBits > s.actualTableLog { |
|
return false |
|
} |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// minTableLog provides the minimum logSize to safely represent a distribution. |
|
func (s *Scratch) minTableLog() uint8 { |
|
minBitsSrc := highBit32(uint32(s.br.remain())) + 1 |
|
minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2 |
|
if minBitsSrc < minBitsSymbols { |
|
return uint8(minBitsSrc) |
|
} |
|
return uint8(minBitsSymbols) |
|
} |
|
|
|
// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog |
|
func (s *Scratch) optimalTableLog() { |
|
tableLog := s.TableLog |
|
minBits := s.minTableLog() |
|
maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1 |
|
if maxBitsSrc < tableLog { |
|
// Accuracy can be reduced |
|
tableLog = maxBitsSrc |
|
} |
|
if minBits > tableLog { |
|
tableLog = minBits |
|
} |
|
// Need a minimum to safely represent all symbol values |
|
if tableLog < minTablelog { |
|
tableLog = minTablelog |
|
} |
|
if tableLog > tableLogMax { |
|
tableLog = tableLogMax |
|
} |
|
s.actualTableLog = tableLog |
|
} |
|
|
|
type cTableEntry struct { |
|
val uint16 |
|
nBits uint8 |
|
// We have 8 bits extra |
|
} |
|
|
|
const huffNodesMask = huffNodesLen - 1 |
|
|
|
func (s *Scratch) buildCTable() error { |
|
s.optimalTableLog() |
|
s.huffSort() |
|
if cap(s.cTable) < maxSymbolValue+1 { |
|
s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1) |
|
} else { |
|
s.cTable = s.cTable[:s.symbolLen] |
|
for i := range s.cTable { |
|
s.cTable[i] = cTableEntry{} |
|
} |
|
} |
|
|
|
var startNode = int16(s.symbolLen) |
|
nonNullRank := s.symbolLen - 1 |
|
|
|
nodeNb := int16(startNode) |
|
huffNode := s.nodes[1 : huffNodesLen+1] |
|
|
|
// This overlays the slice above, but allows "-1" index lookups. |
|
// Different from reference implementation. |
|
huffNode0 := s.nodes[0 : huffNodesLen+1] |
|
|
|
for huffNode[nonNullRank].count == 0 { |
|
nonNullRank-- |
|
} |
|
|
|
lowS := int16(nonNullRank) |
|
nodeRoot := nodeNb + lowS - 1 |
|
lowN := nodeNb |
|
huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count |
|
huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb) |
|
nodeNb++ |
|
lowS -= 2 |
|
for n := nodeNb; n <= nodeRoot; n++ { |
|
huffNode[n].count = 1 << 30 |
|
} |
|
// fake entry, strong barrier |
|
huffNode0[0].count = 1 << 31 |
|
|
|
// create parents |
|
for nodeNb <= nodeRoot { |
|
var n1, n2 int16 |
|
if huffNode0[lowS+1].count < huffNode0[lowN+1].count { |
|
n1 = lowS |
|
lowS-- |
|
} else { |
|
n1 = lowN |
|
lowN++ |
|
} |
|
if huffNode0[lowS+1].count < huffNode0[lowN+1].count { |
|
n2 = lowS |
|
lowS-- |
|
} else { |
|
n2 = lowN |
|
lowN++ |
|
} |
|
|
|
huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count |
|
huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb) |
|
nodeNb++ |
|
} |
|
|
|
// distribute weights (unlimited tree height) |
|
huffNode[nodeRoot].nbBits = 0 |
|
for n := nodeRoot - 1; n >= startNode; n-- { |
|
huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 |
|
} |
|
for n := uint16(0); n <= nonNullRank; n++ { |
|
huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 |
|
} |
|
s.actualTableLog = s.setMaxHeight(int(nonNullRank)) |
|
maxNbBits := s.actualTableLog |
|
|
|
// fill result into tree (val, nbBits) |
|
if maxNbBits > tableLogMax { |
|
return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax) |
|
} |
|
var nbPerRank [tableLogMax + 1]uint16 |
|
var valPerRank [16]uint16 |
|
for _, v := range huffNode[:nonNullRank+1] { |
|
nbPerRank[v.nbBits]++ |
|
} |
|
// determine stating value per rank |
|
{ |
|
min := uint16(0) |
|
for n := maxNbBits; n > 0; n-- { |
|
// get starting value within each rank |
|
valPerRank[n] = min |
|
min += nbPerRank[n] |
|
min >>= 1 |
|
} |
|
} |
|
|
|
// push nbBits per symbol, symbol order |
|
for _, v := range huffNode[:nonNullRank+1] { |
|
s.cTable[v.symbol].nBits = v.nbBits |
|
} |
|
|
|
// assign value within rank, symbol order |
|
t := s.cTable[:s.symbolLen] |
|
for n, val := range t { |
|
nbits := val.nBits & 15 |
|
v := valPerRank[nbits] |
|
t[n].val = v |
|
valPerRank[nbits] = v + 1 |
|
} |
|
|
|
return nil |
|
} |
|
|
|
// huffSort will sort symbols, decreasing order. |
|
func (s *Scratch) huffSort() { |
|
type rankPos struct { |
|
base uint32 |
|
current uint32 |
|
} |
|
|
|
// Clear nodes |
|
nodes := s.nodes[:huffNodesLen+1] |
|
s.nodes = nodes |
|
nodes = nodes[1 : huffNodesLen+1] |
|
|
|
// Sort into buckets based on length of symbol count. |
|
var rank [32]rankPos |
|
for _, v := range s.count[:s.symbolLen] { |
|
r := highBit32(v+1) & 31 |
|
rank[r].base++ |
|
} |
|
// maxBitLength is log2(BlockSizeMax) + 1 |
|
const maxBitLength = 18 + 1 |
|
for n := maxBitLength; n > 0; n-- { |
|
rank[n-1].base += rank[n].base |
|
} |
|
for n := range rank[:maxBitLength] { |
|
rank[n].current = rank[n].base |
|
} |
|
for n, c := range s.count[:s.symbolLen] { |
|
r := (highBit32(c+1) + 1) & 31 |
|
pos := rank[r].current |
|
rank[r].current++ |
|
prev := nodes[(pos-1)&huffNodesMask] |
|
for pos > rank[r].base && c > prev.count { |
|
nodes[pos&huffNodesMask] = prev |
|
pos-- |
|
prev = nodes[(pos-1)&huffNodesMask] |
|
} |
|
nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} |
|
} |
|
return |
|
} |
|
|
|
func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { |
|
maxNbBits := s.actualTableLog |
|
huffNode := s.nodes[1 : huffNodesLen+1] |
|
//huffNode = huffNode[: huffNodesLen] |
|
|
|
largestBits := huffNode[lastNonNull].nbBits |
|
|
|
// early exit : no elt > maxNbBits |
|
if largestBits <= maxNbBits { |
|
return largestBits |
|
} |
|
totalCost := int(0) |
|
baseCost := int(1) << (largestBits - maxNbBits) |
|
n := uint32(lastNonNull) |
|
|
|
for huffNode[n].nbBits > maxNbBits { |
|
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)) |
|
huffNode[n].nbBits = maxNbBits |
|
n-- |
|
} |
|
// n stops at huffNode[n].nbBits <= maxNbBits |
|
|
|
for huffNode[n].nbBits == maxNbBits { |
|
n-- |
|
} |
|
// n end at index of smallest symbol using < maxNbBits |
|
|
|
// renorm totalCost |
|
totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */ |
|
|
|
// repay normalized cost |
|
{ |
|
const noSymbol = 0xF0F0F0F0 |
|
var rankLast [tableLogMax + 2]uint32 |
|
|
|
for i := range rankLast[:] { |
|
rankLast[i] = noSymbol |
|
} |
|
|
|
// Get pos of last (smallest) symbol per rank |
|
{ |
|
currentNbBits := uint8(maxNbBits) |
|
for pos := int(n); pos >= 0; pos-- { |
|
if huffNode[pos].nbBits >= currentNbBits { |
|
continue |
|
} |
|
currentNbBits = huffNode[pos].nbBits // < maxNbBits |
|
rankLast[maxNbBits-currentNbBits] = uint32(pos) |
|
} |
|
} |
|
|
|
for totalCost > 0 { |
|
nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1 |
|
|
|
for ; nBitsToDecrease > 1; nBitsToDecrease-- { |
|
highPos := rankLast[nBitsToDecrease] |
|
lowPos := rankLast[nBitsToDecrease-1] |
|
if highPos == noSymbol { |
|
continue |
|
} |
|
if lowPos == noSymbol { |
|
break |
|
} |
|
highTotal := huffNode[highPos].count |
|
lowTotal := 2 * huffNode[lowPos].count |
|
if highTotal <= lowTotal { |
|
break |
|
} |
|
} |
|
// only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) |
|
// HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary |
|
// FIXME: try to remove |
|
for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) { |
|
nBitsToDecrease++ |
|
} |
|
totalCost -= 1 << (nBitsToDecrease - 1) |
|
if rankLast[nBitsToDecrease-1] == noSymbol { |
|
// this rank is no longer empty |
|
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease] |
|
} |
|
huffNode[rankLast[nBitsToDecrease]].nbBits++ |
|
if rankLast[nBitsToDecrease] == 0 { |
|
/* special case, reached largest symbol */ |
|
rankLast[nBitsToDecrease] = noSymbol |
|
} else { |
|
rankLast[nBitsToDecrease]-- |
|
if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease { |
|
rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */ |
|
} |
|
} |
|
} |
|
|
|
for totalCost < 0 { /* Sometimes, cost correction overshoot */ |
|
if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ |
|
for huffNode[n].nbBits == maxNbBits { |
|
n-- |
|
} |
|
huffNode[n+1].nbBits-- |
|
rankLast[1] = n + 1 |
|
totalCost++ |
|
continue |
|
} |
|
huffNode[rankLast[1]+1].nbBits-- |
|
rankLast[1]++ |
|
totalCost++ |
|
} |
|
} |
|
return maxNbBits |
|
} |
|
|
|
type nodeElt struct { |
|
count uint32 |
|
parent uint16 |
|
symbol byte |
|
nbBits uint8 |
|
}
|
|
|