Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
110 lines
3.0 KiB
110 lines
3.0 KiB
// Copyright 2015, Joe Tsai. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE.md file. |
|
|
|
// Package bzip2 implements the BZip2 compressed data format. |
|
// |
|
// Canonical C implementation: |
|
// http://bzip.org |
|
// |
|
// Unofficial format specification: |
|
// https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf |
|
package bzip2 |
|
|
|
import ( |
|
"fmt" |
|
"hash/crc32" |
|
|
|
"github.com/dsnet/compress/internal" |
|
"github.com/dsnet/compress/internal/errors" |
|
) |
|
|
|
// There does not exist a formal specification of the BZip2 format. As such, |
|
// much of this work is derived by either reverse engineering the original C |
|
// source code or using secondary sources. |
|
// |
|
// Significant amounts of fuzz testing is done to ensure that outputs from |
|
// this package is properly decoded by the C library. Furthermore, we test that |
|
// both this package and the C library agree about what inputs are invalid. |
|
// |
|
// Compression stack: |
|
// Run-length encoding 1 (RLE1) |
|
// Burrows-Wheeler transform (BWT) |
|
// Move-to-front transform (MTF) |
|
// Run-length encoding 2 (RLE2) |
|
// Prefix encoding (PE) |
|
// |
|
// References: |
|
// http://bzip.org/ |
|
// https://en.wikipedia.org/wiki/Bzip2 |
|
// https://code.google.com/p/jbzip2/ |
|
|
|
const ( |
|
BestSpeed = 1 |
|
BestCompression = 9 |
|
DefaultCompression = 6 |
|
) |
|
|
|
const ( |
|
hdrMagic = 0x425a // Hex of "BZ" |
|
blkMagic = 0x314159265359 // BCD of PI |
|
endMagic = 0x177245385090 // BCD of sqrt(PI) |
|
|
|
blockSize = 100000 |
|
) |
|
|
|
func errorf(c int, f string, a ...interface{}) error { |
|
return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)} |
|
} |
|
|
|
func panicf(c int, f string, a ...interface{}) { |
|
errors.Panic(errorf(c, f, a...)) |
|
} |
|
|
|
// errWrap converts a lower-level errors.Error to be one from this package. |
|
// The replaceCode passed in will be used to replace the code for any errors |
|
// with the errors.Invalid code. |
|
// |
|
// For the Reader, set this to errors.Corrupted. |
|
// For the Writer, set this to errors.Internal. |
|
func errWrap(err error, replaceCode int) error { |
|
if cerr, ok := err.(errors.Error); ok { |
|
if errors.IsInvalid(cerr) { |
|
cerr.Code = replaceCode |
|
} |
|
err = errorf(cerr.Code, "%s", cerr.Msg) |
|
} |
|
return err |
|
} |
|
|
|
var errClosed = errorf(errors.Closed, "") |
|
|
|
// crc computes the CRC-32 used by BZip2. |
|
// |
|
// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian |
|
// order. That is, the MSB is read before the LSB. Thus, we can use the |
|
// standard library version of CRC-32 IEEE with some minor adjustments. |
|
// |
|
// The byte array is used as an intermediate buffer to swap the bits of every |
|
// byte of the input. |
|
type crc struct { |
|
val uint32 |
|
buf [256]byte |
|
} |
|
|
|
// update computes the CRC-32 of appending buf to c. |
|
func (c *crc) update(buf []byte) { |
|
cval := internal.ReverseUint32(c.val) |
|
for len(buf) > 0 { |
|
n := len(buf) |
|
if n > len(c.buf) { |
|
n = len(c.buf) |
|
} |
|
for i, b := range buf[:n] { |
|
c.buf[i] = internal.ReverseLUT[b] |
|
} |
|
cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n]) |
|
buf = buf[n:] |
|
} |
|
c.val = internal.ReverseUint32(cval) |
|
}
|
|
|