Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
523 lines
9.5 KiB
523 lines
9.5 KiB
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
package lzma |
|
|
|
import ( |
|
"bufio" |
|
"errors" |
|
"fmt" |
|
"io" |
|
"unicode" |
|
) |
|
|
|
// node represents a node in the binary tree. |
|
type node struct { |
|
// x is the search value |
|
x uint32 |
|
// p parent node |
|
p uint32 |
|
// l left child |
|
l uint32 |
|
// r right child |
|
r uint32 |
|
} |
|
|
|
// wordLen is the number of bytes represented by the v field of a node. |
|
const wordLen = 4 |
|
|
|
// binTree supports the identification of the next operation based on a |
|
// binary tree. |
|
// |
|
// Nodes will be identified by their index into the ring buffer. |
|
type binTree struct { |
|
dict *encoderDict |
|
// ring buffer of nodes |
|
node []node |
|
// absolute offset of the entry for the next node. Position 4 |
|
// byte larger. |
|
hoff int64 |
|
// front position in the node ring buffer |
|
front uint32 |
|
// index of the root node |
|
root uint32 |
|
// current x value |
|
x uint32 |
|
// preallocated array |
|
data []byte |
|
} |
|
|
|
// null represents the nonexistent index. We can't use zero because it |
|
// would always exist or we would need to decrease the index for each |
|
// reference. |
|
const null uint32 = 1<<32 - 1 |
|
|
|
// newBinTree initializes the binTree structure. The capacity defines |
|
// the size of the buffer and defines the maximum distance for which |
|
// matches will be found. |
|
func newBinTree(capacity int) (t *binTree, err error) { |
|
if capacity < 1 { |
|
return nil, errors.New( |
|
"newBinTree: capacity must be larger than zero") |
|
} |
|
if int64(capacity) >= int64(null) { |
|
return nil, errors.New( |
|
"newBinTree: capacity must less 2^{32}-1") |
|
} |
|
t = &binTree{ |
|
node: make([]node, capacity), |
|
hoff: -int64(wordLen), |
|
root: null, |
|
data: make([]byte, maxMatchLen), |
|
} |
|
return t, nil |
|
} |
|
|
|
func (t *binTree) SetDict(d *encoderDict) { t.dict = d } |
|
|
|
// WriteByte writes a single byte into the binary tree. |
|
func (t *binTree) WriteByte(c byte) error { |
|
t.x = (t.x << 8) | uint32(c) |
|
t.hoff++ |
|
if t.hoff < 0 { |
|
return nil |
|
} |
|
v := t.front |
|
if int64(v) < t.hoff { |
|
// We are overwriting old nodes stored in the tree. |
|
t.remove(v) |
|
} |
|
t.node[v].x = t.x |
|
t.add(v) |
|
t.front++ |
|
if int64(t.front) >= int64(len(t.node)) { |
|
t.front = 0 |
|
} |
|
return nil |
|
} |
|
|
|
// Writes writes a sequence of bytes into the binTree structure. |
|
func (t *binTree) Write(p []byte) (n int, err error) { |
|
for _, c := range p { |
|
t.WriteByte(c) |
|
} |
|
return len(p), nil |
|
} |
|
|
|
// add puts the node v into the tree. The node must not be part of the |
|
// tree before. |
|
func (t *binTree) add(v uint32) { |
|
vn := &t.node[v] |
|
// Set left and right to null indices. |
|
vn.l, vn.r = null, null |
|
// If the binary tree is empty make v the root. |
|
if t.root == null { |
|
t.root = v |
|
vn.p = null |
|
return |
|
} |
|
x := vn.x |
|
p := t.root |
|
// Search for the right leave link and add the new node. |
|
for { |
|
pn := &t.node[p] |
|
if x <= pn.x { |
|
if pn.l == null { |
|
pn.l = v |
|
vn.p = p |
|
return |
|
} |
|
p = pn.l |
|
} else { |
|
if pn.r == null { |
|
pn.r = v |
|
vn.p = p |
|
return |
|
} |
|
p = pn.r |
|
} |
|
} |
|
} |
|
|
|
// parent returns the parent node index of v and the pointer to v value |
|
// in the parent. |
|
func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) { |
|
if t.root == v { |
|
return null, &t.root |
|
} |
|
p = t.node[v].p |
|
if t.node[p].l == v { |
|
ptr = &t.node[p].l |
|
} else { |
|
ptr = &t.node[p].r |
|
} |
|
return |
|
} |
|
|
|
// Remove node v. |
|
func (t *binTree) remove(v uint32) { |
|
vn := &t.node[v] |
|
p, ptr := t.parent(v) |
|
l, r := vn.l, vn.r |
|
if l == null { |
|
// Move the right child up. |
|
*ptr = r |
|
if r != null { |
|
t.node[r].p = p |
|
} |
|
return |
|
} |
|
if r == null { |
|
// Move the left child up. |
|
*ptr = l |
|
t.node[l].p = p |
|
return |
|
} |
|
|
|
// Search the in-order predecessor u. |
|
un := &t.node[l] |
|
ur := un.r |
|
if ur == null { |
|
// In order predecessor is l. Move it up. |
|
un.r = r |
|
t.node[r].p = l |
|
un.p = p |
|
*ptr = l |
|
return |
|
} |
|
var u uint32 |
|
for { |
|
// Look for the max value in the tree where l is root. |
|
u = ur |
|
ur = t.node[u].r |
|
if ur == null { |
|
break |
|
} |
|
} |
|
// replace u with ul |
|
un = &t.node[u] |
|
ul := un.l |
|
up := un.p |
|
t.node[up].r = ul |
|
if ul != null { |
|
t.node[ul].p = up |
|
} |
|
|
|
// replace v by u |
|
un.l, un.r = l, r |
|
t.node[l].p = u |
|
t.node[r].p = u |
|
*ptr = u |
|
un.p = p |
|
} |
|
|
|
// search looks for the node that have the value x or for the nodes that |
|
// brace it. The node highest in the tree with the value x will be |
|
// returned. All other nodes with the same value live in left subtree of |
|
// the returned node. |
|
func (t *binTree) search(v uint32, x uint32) (a, b uint32) { |
|
a, b = null, null |
|
if v == null { |
|
return |
|
} |
|
for { |
|
vn := &t.node[v] |
|
if x <= vn.x { |
|
if x == vn.x { |
|
return v, v |
|
} |
|
b = v |
|
if vn.l == null { |
|
return |
|
} |
|
v = vn.l |
|
} else { |
|
a = v |
|
if vn.r == null { |
|
return |
|
} |
|
v = vn.r |
|
} |
|
} |
|
} |
|
|
|
// max returns the node with maximum value in the subtree with v as |
|
// root. |
|
func (t *binTree) max(v uint32) uint32 { |
|
if v == null { |
|
return null |
|
} |
|
for { |
|
r := t.node[v].r |
|
if r == null { |
|
return v |
|
} |
|
v = r |
|
} |
|
} |
|
|
|
// min returns the node with the minimum value in the subtree with v as |
|
// root. |
|
func (t *binTree) min(v uint32) uint32 { |
|
if v == null { |
|
return null |
|
} |
|
for { |
|
l := t.node[v].l |
|
if l == null { |
|
return v |
|
} |
|
v = l |
|
} |
|
} |
|
|
|
// pred returns the in-order predecessor of node v. |
|
func (t *binTree) pred(v uint32) uint32 { |
|
if v == null { |
|
return null |
|
} |
|
u := t.max(t.node[v].l) |
|
if u != null { |
|
return u |
|
} |
|
for { |
|
p := t.node[v].p |
|
if p == null { |
|
return null |
|
} |
|
if t.node[p].r == v { |
|
return p |
|
} |
|
v = p |
|
} |
|
} |
|
|
|
// succ returns the in-order successor of node v. |
|
func (t *binTree) succ(v uint32) uint32 { |
|
if v == null { |
|
return null |
|
} |
|
u := t.min(t.node[v].r) |
|
if u != null { |
|
return u |
|
} |
|
for { |
|
p := t.node[v].p |
|
if p == null { |
|
return null |
|
} |
|
if t.node[p].l == v { |
|
return p |
|
} |
|
v = p |
|
} |
|
} |
|
|
|
// xval converts the first four bytes of a into an 32-bit unsigned |
|
// integer in big-endian order. |
|
func xval(a []byte) uint32 { |
|
var x uint32 |
|
switch len(a) { |
|
default: |
|
x |= uint32(a[3]) |
|
fallthrough |
|
case 3: |
|
x |= uint32(a[2]) << 8 |
|
fallthrough |
|
case 2: |
|
x |= uint32(a[1]) << 16 |
|
fallthrough |
|
case 1: |
|
x |= uint32(a[0]) << 24 |
|
case 0: |
|
} |
|
return x |
|
} |
|
|
|
// dumpX converts value x into a four-letter string. |
|
func dumpX(x uint32) string { |
|
a := make([]byte, 4) |
|
for i := 0; i < 4; i++ { |
|
c := byte(x >> uint((3-i)*8)) |
|
if unicode.IsGraphic(rune(c)) { |
|
a[i] = c |
|
} else { |
|
a[i] = '.' |
|
} |
|
} |
|
return string(a) |
|
} |
|
|
|
// dumpNode writes a representation of the node v into the io.Writer. |
|
func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) { |
|
if v == null { |
|
return |
|
} |
|
|
|
vn := &t.node[v] |
|
|
|
t.dumpNode(w, vn.r, indent+2) |
|
|
|
for i := 0; i < indent; i++ { |
|
fmt.Fprint(w, " ") |
|
} |
|
if vn.p == null { |
|
fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x)) |
|
} else { |
|
fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p) |
|
} |
|
|
|
t.dumpNode(w, vn.l, indent+2) |
|
} |
|
|
|
// dump prints a representation of the binary tree into the writer. |
|
func (t *binTree) dump(w io.Writer) error { |
|
bw := bufio.NewWriter(w) |
|
t.dumpNode(bw, t.root, 0) |
|
return bw.Flush() |
|
} |
|
|
|
func (t *binTree) distance(v uint32) int { |
|
dist := int(t.front) - int(v) |
|
if dist <= 0 { |
|
dist += len(t.node) |
|
} |
|
return dist |
|
} |
|
|
|
type matchParams struct { |
|
rep [4]uint32 |
|
// length when match will be accepted |
|
nAccept int |
|
// nodes to check |
|
check int |
|
// finish if length get shorter |
|
stopShorter bool |
|
} |
|
|
|
func (t *binTree) match(m match, distIter func() (int, bool), p matchParams, |
|
) (r match, checked int, accepted bool) { |
|
buf := &t.dict.buf |
|
for { |
|
if checked >= p.check { |
|
return m, checked, true |
|
} |
|
dist, ok := distIter() |
|
if !ok { |
|
return m, checked, false |
|
} |
|
checked++ |
|
if m.n > 0 { |
|
i := buf.rear - dist + m.n - 1 |
|
if i < 0 { |
|
i += len(buf.data) |
|
} else if i >= len(buf.data) { |
|
i -= len(buf.data) |
|
} |
|
if buf.data[i] != t.data[m.n-1] { |
|
if p.stopShorter { |
|
return m, checked, false |
|
} |
|
continue |
|
} |
|
} |
|
n := buf.matchLen(dist, t.data) |
|
switch n { |
|
case 0: |
|
if p.stopShorter { |
|
return m, checked, false |
|
} |
|
continue |
|
case 1: |
|
if uint32(dist-minDistance) != p.rep[0] { |
|
continue |
|
} |
|
} |
|
if n < m.n || (n == m.n && int64(dist) >= m.distance) { |
|
continue |
|
} |
|
m = match{int64(dist), n} |
|
if n >= p.nAccept { |
|
return m, checked, true |
|
} |
|
} |
|
} |
|
|
|
func (t *binTree) NextOp(rep [4]uint32) operation { |
|
// retrieve maxMatchLen data |
|
n, _ := t.dict.buf.Peek(t.data[:maxMatchLen]) |
|
if n == 0 { |
|
panic("no data in buffer") |
|
} |
|
t.data = t.data[:n] |
|
|
|
var ( |
|
m match |
|
x, u, v uint32 |
|
iterPred, iterSucc func() (int, bool) |
|
) |
|
p := matchParams{ |
|
rep: rep, |
|
nAccept: maxMatchLen, |
|
check: 32, |
|
} |
|
i := 4 |
|
iterSmall := func() (dist int, ok bool) { |
|
i-- |
|
if i <= 0 { |
|
return 0, false |
|
} |
|
return i, true |
|
} |
|
m, checked, accepted := t.match(m, iterSmall, p) |
|
if accepted { |
|
goto end |
|
} |
|
p.check -= checked |
|
x = xval(t.data) |
|
u, v = t.search(t.root, x) |
|
if u == v && len(t.data) == 4 { |
|
iter := func() (dist int, ok bool) { |
|
if u == null { |
|
return 0, false |
|
} |
|
dist = t.distance(u) |
|
u, v = t.search(t.node[u].l, x) |
|
if u != v { |
|
u = null |
|
} |
|
return dist, true |
|
} |
|
m, _, _ = t.match(m, iter, p) |
|
goto end |
|
} |
|
p.stopShorter = true |
|
iterSucc = func() (dist int, ok bool) { |
|
if v == null { |
|
return 0, false |
|
} |
|
dist = t.distance(v) |
|
v = t.succ(v) |
|
return dist, true |
|
} |
|
m, checked, accepted = t.match(m, iterSucc, p) |
|
if accepted { |
|
goto end |
|
} |
|
p.check -= checked |
|
iterPred = func() (dist int, ok bool) { |
|
if u == null { |
|
return 0, false |
|
} |
|
dist = t.distance(u) |
|
u = t.pred(u) |
|
return dist, true |
|
} |
|
m, _, _ = t.match(m, iterPred, p) |
|
end: |
|
if m.n == 0 { |
|
return lit{t.data[0]} |
|
} |
|
return m |
|
}
|
|
|