Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
691 lines
18 KiB
691 lines
18 KiB
package archiver |
|
|
|
import ( |
|
"archive/zip" |
|
"bytes" |
|
"compress/flate" |
|
"fmt" |
|
"io" |
|
"io/ioutil" |
|
"log" |
|
"os" |
|
"path" |
|
"path/filepath" |
|
"strings" |
|
|
|
"github.com/dsnet/compress/bzip2" |
|
"github.com/klauspost/compress/zstd" |
|
"github.com/ulikunitz/xz" |
|
) |
|
|
|
// ZipCompressionMethod Compression type |
|
type ZipCompressionMethod uint16 |
|
|
|
// Compression methods. |
|
// see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. |
|
// Note LZMA: Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2 archives made this way - and vice versa. |
|
const ( |
|
Store ZipCompressionMethod = 0 |
|
Deflate ZipCompressionMethod = 8 |
|
BZIP2 ZipCompressionMethod = 12 |
|
LZMA ZipCompressionMethod = 14 |
|
ZSTD ZipCompressionMethod = 93 |
|
XZ ZipCompressionMethod = 95 |
|
) |
|
|
|
// Zip provides facilities for operating ZIP archives. |
|
// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. |
|
type Zip struct { |
|
// The compression level to use, as described |
|
// in the compress/flate package. |
|
CompressionLevel int |
|
|
|
// Whether to overwrite existing files; if false, |
|
// an error is returned if the file exists. |
|
OverwriteExisting bool |
|
|
|
// Whether to make all the directories necessary |
|
// to create a zip archive in the desired path. |
|
MkdirAll bool |
|
|
|
// If enabled, selective compression will only |
|
// compress files which are not already in a |
|
// compressed format; this is decided based |
|
// simply on file extension. |
|
SelectiveCompression bool |
|
|
|
// A single top-level folder can be implicitly |
|
// created by the Archive or Unarchive methods |
|
// if the files to be added to the archive |
|
// or the files to be extracted from the archive |
|
// do not all have a common root. This roughly |
|
// mimics the behavior of archival tools integrated |
|
// into OS file browsers which create a subfolder |
|
// to avoid unexpectedly littering the destination |
|
// folder with potentially many files, causing a |
|
// problematic cleanup/organization situation. |
|
// This feature is available for both creation |
|
// and extraction of archives, but may be slightly |
|
// inefficient with lots and lots of files, |
|
// especially on extraction. |
|
ImplicitTopLevelFolder bool |
|
|
|
// If true, errors encountered during reading |
|
// or writing a single file will be logged and |
|
// the operation will continue on remaining files. |
|
ContinueOnError bool |
|
|
|
// Compression algorithm |
|
FileMethod ZipCompressionMethod |
|
zw *zip.Writer |
|
zr *zip.Reader |
|
ridx int |
|
//decinitialized bool |
|
} |
|
|
|
// CheckExt ensures the file extension matches the format. |
|
func (*Zip) CheckExt(filename string) error { |
|
if !strings.HasSuffix(filename, ".zip") { |
|
return fmt.Errorf("filename must have a .zip extension") |
|
} |
|
return nil |
|
} |
|
|
|
// Registering a global decompressor is not reentrant and may panic |
|
func registerDecompressor(zr *zip.Reader) { |
|
// register zstd decompressor |
|
zr.RegisterDecompressor(uint16(ZSTD), func(r io.Reader) io.ReadCloser { |
|
zr, err := zstd.NewReader(r) |
|
if err != nil { |
|
return nil |
|
} |
|
return zr.IOReadCloser() |
|
}) |
|
zr.RegisterDecompressor(uint16(BZIP2), func(r io.Reader) io.ReadCloser { |
|
bz2r, err := bzip2.NewReader(r, nil) |
|
if err != nil { |
|
return nil |
|
} |
|
return bz2r |
|
}) |
|
zr.RegisterDecompressor(uint16(XZ), func(r io.Reader) io.ReadCloser { |
|
xr, err := xz.NewReader(r) |
|
if err != nil { |
|
return nil |
|
} |
|
return ioutil.NopCloser(xr) |
|
}) |
|
} |
|
|
|
// CheckPath ensures the file extension matches the format. |
|
func (*Zip) CheckPath(to, filename string) error { |
|
to, _ = filepath.Abs(to) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input |
|
dest := filepath.Join(to, filename) |
|
//prevent path traversal attacks |
|
if !strings.HasPrefix(dest, to) { |
|
return fmt.Errorf("illegal file path: %s", filename) |
|
} |
|
return nil |
|
} |
|
|
|
// Archive creates a .zip file at destination containing |
|
// the files listed in sources. The destination must end |
|
// with ".zip". File paths can be those of regular files |
|
// or directories. Regular files are stored at the 'root' |
|
// of the archive, and directories are recursively added. |
|
func (z *Zip) Archive(sources []string, destination string) error { |
|
err := z.CheckExt(destination) |
|
if err != nil { |
|
return fmt.Errorf("checking extension: %v", err) |
|
} |
|
if !z.OverwriteExisting && fileExists(destination) { |
|
return fmt.Errorf("file already exists: %s", destination) |
|
} |
|
|
|
// make the folder to contain the resulting archive |
|
// if it does not already exist |
|
destDir := filepath.Dir(destination) |
|
if z.MkdirAll && !fileExists(destDir) { |
|
err := mkdir(destDir, 0755) |
|
if err != nil { |
|
return fmt.Errorf("making folder for destination: %v", err) |
|
} |
|
} |
|
|
|
out, err := os.Create(destination) |
|
if err != nil { |
|
return fmt.Errorf("creating %s: %v", destination, err) |
|
} |
|
defer out.Close() |
|
|
|
err = z.Create(out) |
|
if err != nil { |
|
return fmt.Errorf("creating zip: %v", err) |
|
} |
|
defer z.Close() |
|
|
|
var topLevelFolder string |
|
if z.ImplicitTopLevelFolder && multipleTopLevels(sources) { |
|
topLevelFolder = folderNameFromFileName(destination) |
|
} |
|
|
|
for _, source := range sources { |
|
err := z.writeWalk(source, topLevelFolder, destination) |
|
if err != nil { |
|
return fmt.Errorf("walking %s: %v", source, err) |
|
} |
|
} |
|
|
|
return nil |
|
} |
|
|
|
// Unarchive unpacks the .zip file at source to destination. |
|
// Destination will be treated as a folder name. |
|
func (z *Zip) Unarchive(source, destination string) error { |
|
if !fileExists(destination) && z.MkdirAll { |
|
err := mkdir(destination, 0755) |
|
if err != nil { |
|
return fmt.Errorf("preparing destination: %v", err) |
|
} |
|
} |
|
|
|
file, err := os.Open(source) |
|
if err != nil { |
|
return fmt.Errorf("opening source file: %v", err) |
|
} |
|
defer file.Close() |
|
|
|
fileInfo, err := file.Stat() |
|
if err != nil { |
|
return fmt.Errorf("statting source file: %v", err) |
|
} |
|
|
|
err = z.Open(file, fileInfo.Size()) |
|
if err != nil { |
|
return fmt.Errorf("opening zip archive for reading: %v", err) |
|
} |
|
defer z.Close() |
|
|
|
// if the files in the archive do not all share a common |
|
// root, then make sure we extract to a single subfolder |
|
// rather than potentially littering the destination... |
|
if z.ImplicitTopLevelFolder { |
|
files := make([]string, len(z.zr.File)) |
|
for i := range z.zr.File { |
|
files[i] = z.zr.File[i].Name |
|
} |
|
if multipleTopLevels(files) { |
|
destination = filepath.Join(destination, folderNameFromFileName(source)) |
|
} |
|
} |
|
|
|
for { |
|
err := z.extractNext(destination) |
|
if err == io.EOF { |
|
break |
|
} |
|
if err != nil { |
|
if z.ContinueOnError || strings.Contains(err.Error(), "illegal file path") { |
|
log.Printf("[ERROR] Reading file in zip archive: %v", err) |
|
continue |
|
} |
|
return fmt.Errorf("reading file in zip archive: %v", err) |
|
} |
|
} |
|
|
|
return nil |
|
} |
|
|
|
func (z *Zip) extractNext(to string) error { |
|
f, err := z.Read() |
|
if err != nil { |
|
return err // don't wrap error; calling loop must break on io.EOF |
|
} |
|
defer f.Close() |
|
|
|
errPath := z.CheckPath(to, f.Header.(zip.FileHeader).Name) |
|
if errPath != nil { |
|
return fmt.Errorf("checking path traversal attempt: %v", errPath) |
|
} |
|
return z.extractFile(f, to) |
|
} |
|
|
|
func (z *Zip) extractFile(f File, to string) error { |
|
header, ok := f.Header.(zip.FileHeader) |
|
if !ok { |
|
return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) |
|
} |
|
|
|
to = filepath.Join(to, header.Name) |
|
|
|
// if a directory, no content; simply make the directory and return |
|
if f.IsDir() { |
|
return mkdir(to, f.Mode()) |
|
} |
|
|
|
// do not overwrite existing files, if configured |
|
if !z.OverwriteExisting && fileExists(to) { |
|
return fmt.Errorf("file already exists: %s", to) |
|
} |
|
|
|
// extract symbolic links as symbolic links |
|
if isSymlink(header.FileInfo()) { |
|
// symlink target is the contents of the file |
|
buf := new(bytes.Buffer) |
|
_, err := io.Copy(buf, f) |
|
if err != nil { |
|
return fmt.Errorf("%s: reading symlink target: %v", header.Name, err) |
|
} |
|
return writeNewSymbolicLink(to, strings.TrimSpace(buf.String())) |
|
} |
|
|
|
return writeNewFile(to, f, f.Mode()) |
|
} |
|
|
|
func (z *Zip) writeWalk(source, topLevelFolder, destination string) error { |
|
sourceInfo, err := os.Stat(source) |
|
if err != nil { |
|
return fmt.Errorf("%s: stat: %v", source, err) |
|
} |
|
destAbs, err := filepath.Abs(destination) |
|
if err != nil { |
|
return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) |
|
} |
|
|
|
return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { |
|
handleErr := func(err error) error { |
|
if z.ContinueOnError { |
|
log.Printf("[ERROR] Walking %s: %v", fpath, err) |
|
return nil |
|
} |
|
return err |
|
} |
|
if err != nil { |
|
return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) |
|
} |
|
if info == nil { |
|
return handleErr(fmt.Errorf("%s: no file info", fpath)) |
|
} |
|
|
|
// make sure we do not copy the output file into the output |
|
// file; that results in an infinite loop and disk exhaustion! |
|
fpathAbs, err := filepath.Abs(fpath) |
|
if err != nil { |
|
return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) |
|
} |
|
if within(fpathAbs, destAbs) { |
|
return nil |
|
} |
|
|
|
// build the name to be used within the archive |
|
nameInArchive, err := makeNameInArchive(sourceInfo, source, topLevelFolder, fpath) |
|
if err != nil { |
|
return handleErr(err) |
|
} |
|
|
|
var file io.ReadCloser |
|
if info.Mode().IsRegular() { |
|
file, err = os.Open(fpath) |
|
if err != nil { |
|
return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) |
|
} |
|
defer file.Close() |
|
} |
|
err = z.Write(File{ |
|
FileInfo: FileInfo{ |
|
FileInfo: info, |
|
CustomName: nameInArchive, |
|
}, |
|
ReadCloser: file, |
|
}) |
|
if err != nil { |
|
return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) |
|
} |
|
|
|
return nil |
|
}) |
|
} |
|
|
|
// Create opens z for writing a ZIP archive to out. |
|
func (z *Zip) Create(out io.Writer) error { |
|
if z.zw != nil { |
|
return fmt.Errorf("zip archive is already created for writing") |
|
} |
|
z.zw = zip.NewWriter(out) |
|
if z.CompressionLevel != flate.DefaultCompression { |
|
z.zw.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { |
|
return flate.NewWriter(out, z.CompressionLevel) |
|
}) |
|
} |
|
switch z.FileMethod { |
|
case BZIP2: |
|
z.zw.RegisterCompressor(uint16(BZIP2), func(out io.Writer) (io.WriteCloser, error) { |
|
return bzip2.NewWriter(out, &bzip2.WriterConfig{Level: z.CompressionLevel}) |
|
}) |
|
case ZSTD: |
|
z.zw.RegisterCompressor(uint16(ZSTD), func(out io.Writer) (io.WriteCloser, error) { |
|
return zstd.NewWriter(out) |
|
}) |
|
case XZ: |
|
z.zw.RegisterCompressor(uint16(XZ), func(out io.Writer) (io.WriteCloser, error) { |
|
return xz.NewWriter(out) |
|
}) |
|
} |
|
return nil |
|
} |
|
|
|
// Write writes f to z, which must have been opened for writing first. |
|
func (z *Zip) Write(f File) error { |
|
if z.zw == nil { |
|
return fmt.Errorf("zip archive was not created for writing first") |
|
} |
|
if f.FileInfo == nil { |
|
return fmt.Errorf("no file info") |
|
} |
|
if f.FileInfo.Name() == "" { |
|
return fmt.Errorf("missing file name") |
|
} |
|
|
|
header, err := zip.FileInfoHeader(f) |
|
if err != nil { |
|
return fmt.Errorf("%s: getting header: %v", f.Name(), err) |
|
} |
|
|
|
if f.IsDir() { |
|
header.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc... |
|
header.Method = zip.Store |
|
} else { |
|
ext := strings.ToLower(path.Ext(header.Name)) |
|
if _, ok := compressedFormats[ext]; ok && z.SelectiveCompression { |
|
header.Method = zip.Store |
|
} else { |
|
header.Method = uint16(z.FileMethod) |
|
} |
|
} |
|
|
|
writer, err := z.zw.CreateHeader(header) |
|
if err != nil { |
|
return fmt.Errorf("%s: making header: %v", f.Name(), err) |
|
} |
|
|
|
return z.writeFile(f, writer) |
|
} |
|
|
|
func (z *Zip) writeFile(f File, writer io.Writer) error { |
|
if f.IsDir() { |
|
return nil // directories have no contents |
|
} |
|
if isSymlink(f) { |
|
// file body for symlinks is the symlink target |
|
linkTarget, err := os.Readlink(f.Name()) |
|
if err != nil { |
|
return fmt.Errorf("%s: readlink: %v", f.Name(), err) |
|
} |
|
_, err = writer.Write([]byte(filepath.ToSlash(linkTarget))) |
|
if err != nil { |
|
return fmt.Errorf("%s: writing symlink target: %v", f.Name(), err) |
|
} |
|
return nil |
|
} |
|
|
|
if f.ReadCloser == nil { |
|
return fmt.Errorf("%s: no way to read file contents", f.Name()) |
|
} |
|
_, err := io.Copy(writer, f) |
|
if err != nil { |
|
return fmt.Errorf("%s: copying contents: %v", f.Name(), err) |
|
} |
|
|
|
return nil |
|
} |
|
|
|
// Open opens z for reading an archive from in, |
|
// which is expected to have the given size and |
|
// which must be an io.ReaderAt. |
|
func (z *Zip) Open(in io.Reader, size int64) error { |
|
inRdrAt, ok := in.(io.ReaderAt) |
|
if !ok { |
|
return fmt.Errorf("reader must be io.ReaderAt") |
|
} |
|
if z.zr != nil { |
|
return fmt.Errorf("zip archive is already open for reading") |
|
} |
|
var err error |
|
z.zr, err = zip.NewReader(inRdrAt, size) |
|
if err != nil { |
|
return fmt.Errorf("creating reader: %v", err) |
|
} |
|
registerDecompressor(z.zr) |
|
z.ridx = 0 |
|
return nil |
|
} |
|
|
|
// Read reads the next file from z, which must have |
|
// already been opened for reading. If there are no |
|
// more files, the error is io.EOF. The File must |
|
// be closed when finished reading from it. |
|
func (z *Zip) Read() (File, error) { |
|
if z.zr == nil { |
|
return File{}, fmt.Errorf("zip archive is not open") |
|
} |
|
if z.ridx >= len(z.zr.File) { |
|
return File{}, io.EOF |
|
} |
|
|
|
// access the file and increment counter so that |
|
// if there is an error processing this file, the |
|
// caller can still iterate to the next file |
|
zf := z.zr.File[z.ridx] |
|
z.ridx++ |
|
|
|
file := File{ |
|
FileInfo: zf.FileInfo(), |
|
Header: zf.FileHeader, |
|
} |
|
|
|
rc, err := zf.Open() |
|
if err != nil { |
|
return file, fmt.Errorf("%s: open compressed file: %v", zf.Name, err) |
|
} |
|
file.ReadCloser = rc |
|
|
|
return file, nil |
|
} |
|
|
|
// Close closes the zip archive(s) opened by Create and Open. |
|
func (z *Zip) Close() error { |
|
if z.zr != nil { |
|
z.zr = nil |
|
} |
|
if z.zw != nil { |
|
zw := z.zw |
|
z.zw = nil |
|
return zw.Close() |
|
} |
|
return nil |
|
} |
|
|
|
// Walk calls walkFn for each visited item in archive. |
|
func (z *Zip) Walk(archive string, walkFn WalkFunc) error { |
|
zr, err := zip.OpenReader(archive) |
|
if err != nil { |
|
return fmt.Errorf("opening zip reader: %v", err) |
|
} |
|
defer zr.Close() |
|
registerDecompressor(&zr.Reader) |
|
for _, zf := range zr.File { |
|
zfrc, err := zf.Open() |
|
if err != nil { |
|
if zfrc != nil { |
|
zfrc.Close() |
|
} |
|
if z.ContinueOnError { |
|
log.Printf("[ERROR] Opening %s: %v", zf.Name, err) |
|
continue |
|
} |
|
return fmt.Errorf("opening %s: %v", zf.Name, err) |
|
} |
|
|
|
err = walkFn(File{ |
|
FileInfo: zf.FileInfo(), |
|
Header: zf.FileHeader, |
|
ReadCloser: zfrc, |
|
}) |
|
zfrc.Close() |
|
if err != nil { |
|
if err == ErrStopWalk { |
|
break |
|
} |
|
if z.ContinueOnError { |
|
log.Printf("[ERROR] Walking %s: %v", zf.Name, err) |
|
continue |
|
} |
|
return fmt.Errorf("walking %s: %v", zf.Name, err) |
|
} |
|
} |
|
|
|
return nil |
|
} |
|
|
|
// Extract extracts a single file from the zip archive. |
|
// If the target is a directory, the entire folder will |
|
// be extracted into destination. |
|
func (z *Zip) Extract(source, target, destination string) error { |
|
// target refers to a path inside the archive, which should be clean also |
|
target = path.Clean(target) |
|
|
|
// if the target ends up being a directory, then |
|
// we will continue walking and extracting files |
|
// until we are no longer within that directory |
|
var targetDirPath string |
|
|
|
return z.Walk(source, func(f File) error { |
|
zfh, ok := f.Header.(zip.FileHeader) |
|
if !ok { |
|
return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) |
|
} |
|
|
|
// importantly, cleaning the path strips tailing slash, |
|
// which must be appended to folders within the archive |
|
name := path.Clean(zfh.Name) |
|
if f.IsDir() && target == name { |
|
targetDirPath = path.Dir(name) |
|
} |
|
|
|
if within(target, zfh.Name) { |
|
// either this is the exact file we want, or is |
|
// in the directory we want to extract |
|
|
|
// build the filename we will extract to |
|
end, err := filepath.Rel(targetDirPath, zfh.Name) |
|
if err != nil { |
|
return fmt.Errorf("relativizing paths: %v", err) |
|
} |
|
joined := filepath.Join(destination, end) |
|
|
|
err = z.extractFile(f, joined) |
|
if err != nil { |
|
return fmt.Errorf("extracting file %s: %v", zfh.Name, err) |
|
} |
|
|
|
// if our target was not a directory, stop walk |
|
if targetDirPath == "" { |
|
return ErrStopWalk |
|
} |
|
} else if targetDirPath != "" { |
|
// finished walking the entire directory |
|
return ErrStopWalk |
|
} |
|
|
|
return nil |
|
}) |
|
} |
|
|
|
// Match returns true if the format of file matches this |
|
// type's format. It should not affect reader position. |
|
func (*Zip) Match(file io.ReadSeeker) (bool, error) { |
|
currentPos, err := file.Seek(0, io.SeekCurrent) |
|
if err != nil { |
|
return false, err |
|
} |
|
_, err = file.Seek(0, 0) |
|
if err != nil { |
|
return false, err |
|
} |
|
defer func() { |
|
_, _ = file.Seek(currentPos, io.SeekStart) |
|
}() |
|
|
|
buf := make([]byte, 4) |
|
if n, err := file.Read(buf); err != nil || n < 4 { |
|
return false, nil |
|
} |
|
return bytes.Equal(buf, []byte("PK\x03\x04")), nil |
|
} |
|
|
|
func (z *Zip) String() string { return "zip" } |
|
|
|
// NewZip returns a new, default instance ready to be customized and used. |
|
func NewZip() *Zip { |
|
return &Zip{ |
|
CompressionLevel: flate.DefaultCompression, |
|
MkdirAll: true, |
|
SelectiveCompression: true, |
|
FileMethod: Deflate, |
|
} |
|
} |
|
|
|
// Compile-time checks to ensure type implements desired interfaces. |
|
var ( |
|
_ = Reader(new(Zip)) |
|
_ = Writer(new(Zip)) |
|
_ = Archiver(new(Zip)) |
|
_ = Unarchiver(new(Zip)) |
|
_ = Walker(new(Zip)) |
|
_ = Extractor(new(Zip)) |
|
_ = Matcher(new(Zip)) |
|
_ = ExtensionChecker(new(Zip)) |
|
_ = FilenameChecker(new(Zip)) |
|
) |
|
|
|
// compressedFormats is a (non-exhaustive) set of lowercased |
|
// file extensions for formats that are typically already |
|
// compressed. Compressing files that are already compressed |
|
// is inefficient, so use this set of extension to avoid that. |
|
var compressedFormats = map[string]struct{}{ |
|
".7z": {}, |
|
".avi": {}, |
|
".br": {}, |
|
".bz2": {}, |
|
".cab": {}, |
|
".docx": {}, |
|
".gif": {}, |
|
".gz": {}, |
|
".jar": {}, |
|
".jpeg": {}, |
|
".jpg": {}, |
|
".lz": {}, |
|
".lz4": {}, |
|
".lzma": {}, |
|
".m4v": {}, |
|
".mov": {}, |
|
".mp3": {}, |
|
".mp4": {}, |
|
".mpeg": {}, |
|
".mpg": {}, |
|
".png": {}, |
|
".pptx": {}, |
|
".rar": {}, |
|
".sz": {}, |
|
".tbz2": {}, |
|
".tgz": {}, |
|
".tsz": {}, |
|
".txz": {}, |
|
".xlsx": {}, |
|
".xz": {}, |
|
".zip": {}, |
|
".zipx": {}, |
|
} |
|
|
|
// DefaultZip is a default instance that is conveniently ready to use. |
|
var DefaultZip = NewZip()
|
|
|