Compare commits

...
Sign in to create a new pull request.

7 commits

Author SHA1 Message Date
buengese
07e2cea184 press: add xz compression algorithm 2020-03-03 19:38:50 +01:00
buengese
c353b95a28 press: rework compression backends
... use interface for different compression algorithms
... use pierrec/lz4 for lz4
... use klauspost/compress/gzip for gzip
... remove snappy
2020-03-03 19:38:40 +01:00
buengese
45375d5b8f vendor: add new dependencies
... switch from id01/go-lz4 to pierrec/lz4
... use klauspost/compress/gzip for gzip
... use ulikunitz/xz for xz
... remove snappy
2020-03-03 18:40:54 +01:00
buengese
509b9b96a6 press: rebase and fix the broken tests
... add additional error handling for metadata upload
... fix metadata wrapping
... make sure not to leave any orphaned data when overwriting existing files with Put, Copy or Move
... restructure some minor things to make them clearer
... remove xz compression
2020-03-03 17:49:35 +01:00
buengese
ae06b050a4 vendor: update dependencies for press 2020-03-03 17:49:23 +01:00
id01
0cf94ae6ac press: Added experimental compression remote - implements #2098, #1356, #675 2020-03-03 17:40:32 +01:00
id01
4b31a33ab4 vendor: add dependencies for press backend
-  github.com/OneOfOne/xxhash
-  github.com/gabriel-vasile/mimetype
-  github.com/golang/snappy
-  github.com/id01/go-lz4
2020-03-03 17:40:32 +01:00
133 changed files with 22251 additions and 7 deletions

View file

@ -28,6 +28,7 @@ import (
_ "github.com/rclone/rclone/backend/opendrive"
_ "github.com/rclone/rclone/backend/pcloud"
_ "github.com/rclone/rclone/backend/premiumizeme"
_ "github.com/rclone/rclone/backend/press"
_ "github.com/rclone/rclone/backend/putio"
_ "github.com/rclone/rclone/backend/qingstor"
_ "github.com/rclone/rclone/backend/s3"

1
backend/press/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
test

75
backend/press/alg_gzip.go Normal file
View file

@ -0,0 +1,75 @@
package press
import (
"bufio"
"io"
"github.com/klauspost/compress/gzip"
)
// AlgGzip represents gzip compression algorithm
type AlgGzip struct {
level int
blockSize uint32
}
// InitializeGzip initializes the gzip compression Algorithm
func InitializeGzip(bs uint32, level int) Algorithm {
a := new(AlgGzip)
a.blockSize = bs
a.level = level
return a
}
// GetFileExtension returns file extension
func (a *AlgGzip) GetFileExtension() string {
return ".gz"
}
// GetHeader returns the Lz4 compression header
func (a *AlgGzip) GetHeader() []byte {
return []byte{}
}
// GetFooter returns
func (a *AlgGzip) GetFooter() []byte {
return []byte{}
}
// CompressBlock that compresses a block using gzip
func (a *AlgGzip) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
// Initialize buffer
bufw := bufio.NewWriterSize(out, int(a.blockSize+(a.blockSize)>>4))
// Initialize block writer
outw, err := gzip.NewWriterLevel(bufw, a.level)
if err != nil {
return 0, 0, err
}
// Compress block
_, err = outw.Write(in)
if err != nil {
return 0, 0, err
}
// Finalize gzip file, flush buffer and return
err = outw.Close()
if err != nil {
return 0, 0, err
}
blockSize := uint32(bufw.Buffered())
err = bufw.Flush()
return blockSize, uint64(len(in)), err
}
// DecompressBlock decompresses Lz4 compressed block
func (a *AlgGzip) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
gzipReader, err := gzip.NewReader(in)
if err != nil {
return 0, err
}
written, err := io.Copy(out, gzipReader)
return int(written), err
}

223
backend/press/alg_lz4.go Normal file
View file

@ -0,0 +1,223 @@
package press
// This file implements the LZ4 algorithm.
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math/bits"
"github.com/buengese/xxh32"
lz4 "github.com/pierrec/lz4"
)
/*
Structure of LZ4 header:
Flags:
Version = 01
Independent = 1
Block Checksum = 1
Content Size = 0
Content Checksum = 0
Reserved = 0
Dictionary ID = 0
BD byte:
Reserved = 0
Block Max Size = 101 (or 5; 256kb)
Reserved = 0000
Header checksum byte (xxhash(flags and bd byte) >> 1) & 0xff
*/
// LZ4Header - Header of our LZ4 file
//var LZ4Header = []byte{0x04, 0x22, 0x4d, 0x18, 0x70, 0x50, 0x84}
// LZ4Footer - Footer of our LZ4 file
var LZ4Footer = []byte{0x00, 0x00, 0x00, 0x00} // This is just an empty block
const (
frameMagic uint32 = 0x184D2204
compressedBlockFlag = 1 << 31
compressedBlockMask = compressedBlockFlag - 1
)
// AlgLz4 is the Lz4 Compression algorithm
type AlgLz4 struct {
Header lz4.Header
buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
}
// InitializeLz4 creates an Lz4 compression algorithm
func InitializeLz4(bs uint32, blockChecksum bool) Algorithm {
a := new(AlgLz4)
a.Header.Reset()
a.Header = lz4.Header{
BlockChecksum: blockChecksum,
BlockMaxSize: int(bs),
}
return a
}
// GetFileExtension returns file extension
func (a *AlgLz4) GetFileExtension() string {
return ".lz4"
}
// GetHeader returns the Lz4 compression header
func (a *AlgLz4) GetHeader() []byte {
// Size is optional.
buf := a.buf[:]
// Set the fixed size data: magic number, block max size and flags.
binary.LittleEndian.PutUint32(buf[0:], frameMagic)
flg := byte(lz4.Version << 6)
flg |= 1 << 5 // No block dependency.
if a.Header.BlockChecksum {
flg |= 1 << 4
}
if a.Header.Size > 0 {
flg |= 1 << 3
}
buf[4] = flg
buf[5] = blockSizeValueToIndex(a.Header.BlockMaxSize) << 4
// Current buffer size: magic(4) + flags(1) + block max size (1).
n := 6
if a.Header.Size > 0 {
binary.LittleEndian.PutUint64(buf[n:], a.Header.Size)
n += 8
}
// The header checksum includes the flags, block max size and optional Size.
buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
// Header ready, write it out.
return buf[0 : n+1]
}
// GetFooter returns
func (a *AlgLz4) GetFooter() []byte {
return LZ4Footer
}
// CompressBlock that compresses a block using lz4
func (a *AlgLz4) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
if len(in) > 0 {
n, err := a.compressBlock(in, out)
if err != nil {
return 0, 0, err
}
return n, uint64(len(in)), nil
}
return 0, 0, nil
}
// compressBlock compresses a block.
func (a *AlgLz4) compressBlock(data []byte, dst io.Writer) (uint32, error) {
zdata := make([]byte, a.Header.BlockMaxSize) // The compressed block size cannot exceed the input's.
var zn int
if level := a.Header.CompressionLevel; level != 0 {
zn, _ = lz4.CompressBlockHC(data, zdata, level)
} else {
var hashTable [1 << 16]int
zn, _ = lz4.CompressBlock(data, zdata, hashTable[:])
}
var bLen uint32
if zn > 0 && zn < len(data) {
// Compressible and compressed size smaller than uncompressed: ok!
bLen = uint32(zn)
zdata = zdata[:zn]
} else {
// Uncompressed block.
bLen = uint32(len(data)) | compressedBlockFlag
zdata = data
}
// Write the block.
if err := a.writeUint32(bLen, dst); err != nil {
return 0, err
}
_, err := dst.Write(zdata)
if err != nil {
return 0, err
}
if !a.Header.BlockChecksum {
return bLen, nil
}
checksum := xxh32.ChecksumZero(zdata)
if err := a.writeUint32(checksum, dst); err != nil {
return 0, err
}
return bLen, nil
}
// writeUint32 writes a uint32 to the underlying writer.
func (a *AlgLz4) writeUint32(x uint32, dst io.Writer) error {
buf := make([]byte, 4)
binary.LittleEndian.PutUint32(buf, x)
_, err := dst.Write(buf)
return err
}
func blockSizeValueToIndex(size int) byte {
return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2)
}
// DecompressBlock decompresses Lz4 compressed block
func (a *AlgLz4) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
// Get our compressed data
var b bytes.Buffer
_, err = io.Copy(&b, in)
if err != nil {
return 0, err
}
zdata := b.Bytes()
bLen := binary.LittleEndian.Uint32(zdata[:4])
if bLen&compressedBlockFlag > 0 {
// Uncompressed block.
bLen &= compressedBlockMask
if bLen > BlockSize {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
data := zdata[4 : bLen+4]
if a.Header.BlockChecksum {
checksum := binary.LittleEndian.Uint32(zdata[4+bLen:])
if h := xxh32.ChecksumZero(data); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
_, err := out.Write(data)
return len(data), err
}
// compressed block
if bLen > BlockSize {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
if a.Header.BlockChecksum {
checksum := binary.LittleEndian.Uint32(zdata[4+bLen:])
if h := xxh32.ChecksumZero(zdata[4 : bLen+4]); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
data := make([]byte, BlockSize)
n, err = lz4.UncompressBlock(zdata[4:bLen+4], data)
if err != nil {
return 0, err
}
_, err = out.Write(data[:n])
return n, err
}

75
backend/press/alg_xz.go Normal file
View file

@ -0,0 +1,75 @@
package press
import (
"bufio"
"io"
"github.com/ulikunitz/xz"
)
// AlgXZ represents the XZ compression algorithm
type AlgXZ struct {
blockSize uint32
config xz.WriterConfig
}
// InitializeXZ creates an Lz4 compression algorithm
func InitializeXZ(bs uint32) Algorithm {
a := new(AlgXZ)
a.blockSize = bs
a.config = xz.WriterConfig{}
return a
}
// GetFileExtension returns file extension
func (a *AlgXZ) GetFileExtension() string {
return ".xz"
}
// GetHeader returns the Lz4 compression header
func (a *AlgXZ) GetHeader() []byte {
return []byte{}
}
// GetFooter returns
func (a *AlgXZ) GetFooter() []byte {
return []byte{}
}
// CompressBlock that compresses a block using lz4
func (a *AlgXZ) CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error) {
// Initialize buffer
bufw := bufio.NewWriterSize(out, int(a.blockSize+(a.blockSize)>>4))
// Initialize block writer
outw, err := a.config.NewWriter(bufw)
if err != nil {
return 0, 0, err
}
// Compress block
_, err = outw.Write(in)
if err != nil {
return 0, 0, err
}
// Finalize gzip file, flush buffer and return
err = outw.Close()
if err != nil {
return 0, 0, err
}
blockSize := uint32(bufw.Buffered())
err = bufw.Flush()
return blockSize, uint64(len(in)), err
}
// DecompressBlock decompresses Lz4 compressed block
func (a *AlgXZ) DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error) {
xzReader, err := xz.NewReader(in)
if err != nil {
return 0, err
}
written, err := io.Copy(out, xzReader)
return int(written), err
}

View file

@ -0,0 +1,526 @@
// Package press provides wrappers for Fs and Object which implement compression.
// This file is the backend implementation for seekable compression.
package press
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
)
// Compression modes
const (
Uncompressed = -1
LZ4 = 2
Gzip = 4
XZ = 8
)
// Errors
var (
ErrMetadataCorrupted = errors.New("metadata may have been corrupted")
)
// DEBUG - flag for debug mode
const DEBUG = false
// Compression is a struct containing configurable variables (what used to be constants)
type Compression struct {
CompressionMode int // Compression mode
Algorithm Algorithm
BlockSize uint32 // Size of blocks. Higher block size means better compression but more download bandwidth needed for small downloads
// ~1MB is recommended for xz, while ~128KB is recommended for gzip and lz4
HeuristicBytes int64 // Bytes to perform gzip heuristic on to determine whether a file should be compressed
NumThreads int // Number of threads to use for compression
MaxCompressionRatio float64 // Maximum compression ratio for a file to be considered compressible
BinPath string // Path to compression binary. This is used for all non-gzip compression.
}
// Algorithm is the main compression Algorithm Interface
type Algorithm interface {
GetHeader() []byte
GetFileExtension() string
CompressBlock(in []byte, out io.Writer) (compressedSize uint32, uncompressedSize uint64, err error)
DecompressBlock(in io.Reader, out io.Writer, BlockSize uint32) (n int, err error)
GetFooter() []byte
}
// NewCompressionPreset creates a Compression object with a preset mode/bs
func NewCompressionPreset(preset string) (*Compression, error) {
switch preset {
case "lz4":
alg := InitializeLz4(262144, true)
return NewCompression(LZ4, alg, 262144) // LZ4 compression (very fast)
case "gzip":
alg := InitializeGzip(131072, 6)
return NewCompression(Gzip, alg, 131070) // GZIP-default compression (medium)*/
case "xz":
alg := InitializeXZ(1048576)
return NewCompression(XZ, alg, 1048576) // XZ compression (strong compression)*/
}
return nil, errors.New("Compression mode doesn't exist")
}
// NewCompressionPresetNumber creates a Compression object with a preset mode/bs
func NewCompressionPresetNumber(preset int) (*Compression, error) {
switch preset {
case LZ4:
alg := InitializeLz4(262144, true)
return NewCompression(LZ4, alg, 262144) // LZ4 compression (very fast)
case Gzip:
alg := InitializeGzip(131072, 6)
return NewCompression(Gzip, alg, 131070) // GZIP-default compression (medium)*/
case XZ:
alg := InitializeXZ(1048576)
return NewCompression(XZ, alg, 1048576) // XZ compression (strong compression)*/
}
return nil, errors.New("Compression mode doesn't exist")
}
// NewCompression creates a Compression object with some default configuration values
func NewCompression(mode int, alg Algorithm, bs uint32) (*Compression, error) {
return NewCompressionAdvanced(mode, alg, bs, 1048576, 12, 0.9)
}
// NewCompressionAdvanced creates a Compression object
func NewCompressionAdvanced(mode int, alg Algorithm, bs uint32, hb int64, threads int, mcr float64) (c *Compression, err error) {
// Set vars
c = new(Compression)
c.Algorithm = alg
c.CompressionMode = mode
c.BlockSize = bs
c.HeuristicBytes = hb
c.NumThreads = threads
c.MaxCompressionRatio = mcr
return c, err
}
/*** UTILITY FUNCTIONS ***/
// GetFileExtension gets a file extension for current compression mode
func (c *Compression) GetFileExtension() string {
return c.Algorithm.GetFileExtension()
}
// GetFileCompressionInfo gets a file extension along with compressibility of file
func (c *Compression) GetFileCompressionInfo(reader io.Reader) (compressable bool, extension string, err error) {
// Use our compression algorithm to do a heuristic on the first few bytes
var emulatedBlock, emulatedBlockCompressed bytes.Buffer
_, err = io.CopyN(&emulatedBlock, reader, c.HeuristicBytes)
if err != nil && err != io.EOF {
return false, "", err
}
compressedSize, uncompressedSize, err := c.Algorithm.CompressBlock(emulatedBlock.Bytes(), &emulatedBlockCompressed)
if err != nil {
return false, "", err
}
compressionRatio := float64(compressedSize) / float64(uncompressedSize)
// If the data is not compressible, return so
if compressionRatio > c.MaxCompressionRatio {
return false, ".bin", nil
}
// If the file is compressible, select file extension based on compression mode
return true, c.Algorithm.GetFileExtension(), nil
}
/*** MAIN COMPRESSION INTERFACE ***/
// compressionResult represents the result of compression for a single block (gotten by a single thread)
type compressionResult struct {
buffer *bytes.Buffer
n uint64
err error
}
// CompressFileReturningBlockData compresses a file returning the block data for that file.
func (c *Compression) CompressFileReturningBlockData(in io.Reader, out io.Writer) (blockData []uint32, err error) {
// Initialize buffered writer
bufw := bufio.NewWriterSize(out, int((c.BlockSize+(c.BlockSize)>>4)*uint32(c.NumThreads)))
// Get blockData, copy over header, add length of header to blockData
blockData = make([]uint32, 0)
header := c.Algorithm.GetHeader()
_, err = bufw.Write(header)
if err != nil {
return nil, err
}
blockData = append(blockData, uint32(len(header)))
// Compress blocks
for {
// Loop through threads, spawning a go procedure for each thread. If we get eof on one thread, set eofAt to that thread and break
compressionResults := make([]chan compressionResult, c.NumThreads)
eofAt := -1
for i := 0; i < c.NumThreads; i++ {
// Create thread channel and allocate buffer to pass to thread
compressionResults[i] = make(chan compressionResult)
var inputBuffer bytes.Buffer
_, err = io.CopyN(&inputBuffer, in, int64(c.BlockSize))
if err == io.EOF {
eofAt = i
} else if err != nil {
return nil, err
}
// Run thread
go func(i int, in []byte) {
// Initialize thread writer and result struct
var res compressionResult
var buffer bytes.Buffer
// Compress block
_, n, err := c.Algorithm.CompressBlock(in, &buffer)
if err != nil && err != io.EOF { // This errored out.
res.buffer = nil
res.n = 0
res.err = err
compressionResults[i] <- res
return
}
// Pass our data back to the main thread as a compression result
res.buffer = &buffer
res.n = n
res.err = err
compressionResults[i] <- res
}(i, inputBuffer.Bytes())
// If we have reached eof, we don't need more threads
if eofAt != -1 {
break
}
}
// Process writers in order
for i := 0; i < c.NumThreads; i++ {
if compressionResults[i] != nil {
// Get current compression result, get buffer, and copy buffer over to output
res := <-compressionResults[i]
close(compressionResults[i])
if res.buffer == nil {
return nil, res.err
}
blockSize := uint32(res.buffer.Len())
_, err = io.Copy(bufw, res.buffer)
if err != nil {
return nil, err
}
if DEBUG {
fmt.Printf("%d %d\n", res.n, blockSize)
}
// Append block size to block data
blockData = append(blockData, blockSize)
// If this is the last block, add the raw size of the last block to the end of blockData and break
if eofAt == i {
if DEBUG {
log.Printf("%d %d %d\n", res.n, byte(res.n%256), byte(res.n/256))
}
blockData = append(blockData, uint32(res.n))
break
}
}
}
// Get number of bytes written in this block (they should all be in the bufio buffer), then close gzip and flush buffer
err = bufw.Flush()
if err != nil {
return nil, err
}
// If eof happened, break
if eofAt != -1 {
if DEBUG {
log.Printf("%d", eofAt)
log.Printf("%v", blockData)
}
break
}
}
// Write footer and flush
footer := c.Algorithm.GetFooter()
_, err = bufw.Write(footer)
if err != nil {
return nil, err
}
err = bufw.Flush()
// Return
return blockData, err
}
/*** BLOCK DECOMPRESSION FUNCTIONS ***/
// Wrapper function for decompressBlock that implements multithreading
// decompressionResult represents the result of decompressing a block
type decompressionResult struct {
err error
buffer *bytes.Buffer
}
func (d *Decompressor) decompressBlockRangeMultithreaded(in io.Reader, out io.Writer, startingBlock uint32) (n int, err error) {
// First, use bufio.Reader to reduce the number of reads and bufio.Writer to reduce the number of writes
bufin := bufio.NewReader(in)
bufout := bufio.NewWriter(out)
// Decompress each block individually.
currBatch := startingBlock // Block # of start of current batch of blocks
totalBytesCopied := 0
for {
// Loop through threads
eofAt := -1
decompressionResults := make([]chan decompressionResult, d.c.NumThreads)
for i := 0; i < d.c.NumThreads; i++ {
// Get currBlock
currBlock := currBatch + uint32(i)
// Create channel
decompressionResults[i] = make(chan decompressionResult)
// Check if we've reached EOF
if currBlock >= d.numBlocks {
eofAt = i
break
}
// Get block to decompress
var compressedBlock bytes.Buffer
var err error
n, err := io.CopyN(&compressedBlock, bufin, d.blockStarts[currBlock+1]-d.blockStarts[currBlock])
if err != nil || n == 0 { // End of stream
eofAt = i
break
}
// Spawn thread to decompress block
if DEBUG {
log.Printf("Spawning %d", i)
}
go func(i int, currBlock uint32, in io.Reader) {
var block bytes.Buffer
var res decompressionResult
// Decompress block
_, res.err = d.c.Algorithm.DecompressBlock(in, &block, d.c.BlockSize)
res.buffer = &block
decompressionResults[i] <- res
}(i, currBlock, &compressedBlock)
}
if DEBUG {
log.Printf("Eof at %d", eofAt)
}
// Process results
for i := 0; i < d.c.NumThreads; i++ {
// If we got EOF, return
if eofAt == i {
return totalBytesCopied, bufout.Flush() // Flushing bufout is needed to prevent us from getting all nulls
}
// Get result and close
res := <-decompressionResults[i]
close(decompressionResults[i])
if res.err != nil {
return totalBytesCopied, res.err
}
// Copy to output and add to total bytes copied
n, err := io.Copy(bufout, res.buffer)
totalBytesCopied += int(n)
if err != nil {
return totalBytesCopied, err
}
}
// Add NumThreads to currBatch
currBatch += uint32(d.c.NumThreads)
}
}
/*** MAIN DECOMPRESSION INTERFACE ***/
// Decompressor is the ReadSeeker implementation for decompression
type Decompressor struct {
cursorPos *int64 // The current location we have seeked to
blockStarts []int64 // The start of each block. These will be recovered from the block sizes
numBlocks uint32 // Number of blocks
decompressedSize int64 // Decompressed size of the file.
in io.ReadSeeker // Input
c *Compression // Compression options
}
// Parses block data. Returns the number of blocks, the block start locations for each block, and the decompressed size of the entire file.
func parseBlockData(blockData []uint32, BlockSize uint32) (numBlocks uint32, blockStarts []int64, decompressedSize int64) {
// Parse the block data
blockDataLen := len(blockData)
numBlocks = uint32(blockDataLen - 1)
if DEBUG {
log.Printf("%v\n", blockData)
log.Printf("metadata len, numblocks = %d, %d", blockDataLen, numBlocks)
}
blockStarts = make([]int64, numBlocks+1) // Starts with start of first block (and end of header), ends with end of last block
currentBlockPosition := int64(0)
for i := uint32(0); i < numBlocks; i++ { // Loop through block data, getting starts of blocks.
currentBlockSize := blockData[i]
currentBlockPosition += int64(currentBlockSize)
blockStarts[i] = currentBlockPosition
}
blockStarts[numBlocks] = currentBlockPosition // End of last block
//log.Printf("Block Starts: %v\n", d.blockStarts)
numBlocks-- // Subtract 1 from number of blocks because our header technically isn't a block
// Get uncompressed size of last block and derive uncompressed size of file
lastBlockRawSize := blockData[blockDataLen-1]
decompressedSize = int64(numBlocks-1)*int64(BlockSize) + int64(lastBlockRawSize)
if DEBUG {
log.Printf("Decompressed size = %d", decompressedSize)
}
return numBlocks, blockStarts, decompressedSize
}
// Initializes decompressor with the block data specified.
func (d *Decompressor) initWithBlockData(c *Compression, in io.ReadSeeker, size int64, blockData []uint32) (err error) {
// Copy over compression object
d.c = c
// Initialize cursor position
d.cursorPos = new(int64)
// Parse the block data
d.numBlocks, d.blockStarts, d.decompressedSize = parseBlockData(blockData, d.c.BlockSize)
// Initialize cursor position value and copy over reader
*d.cursorPos = 0
_, err = in.Seek(0, io.SeekStart)
d.in = in
return err
}
// Read reads data using a decompressor
func (d Decompressor) Read(p []byte) (int, error) {
if DEBUG {
log.Printf("Cursor pos before: %d\n", *d.cursorPos)
}
// Check if we're at the end of the file or before the beginning of the file
if *d.cursorPos >= d.decompressedSize || *d.cursorPos < 0 {
if DEBUG {
log.Println("Out of bounds EOF")
}
return 0, io.EOF
}
// Get block range to read
blockNumber := *d.cursorPos / int64(d.c.BlockSize)
blockStart := d.blockStarts[blockNumber] // Start position of blocks to read
dataOffset := *d.cursorPos % int64(d.c.BlockSize) // Offset of data to read in blocks to read
bytesToRead := len(p) // Number of bytes to read
blocksToRead := (int64(bytesToRead)+dataOffset)/int64(d.c.BlockSize) + 1 // Number of blocks to read
returnEOF := false
if blockNumber+blocksToRead > int64(d.numBlocks) { // Overflowed the last block
blocksToRead = int64(d.numBlocks) - blockNumber
returnEOF = true
}
blockEnd := d.blockStarts[blockNumber+blocksToRead] // Start of the block after the last block we want to get is the end of the last block we want to get
blockLen := blockEnd - blockStart
// Read compressed block range into buffer
var compressedBlocks bytes.Buffer
_, err := d.in.Seek(blockStart, io.SeekStart)
if err != nil {
return 0, err
}
n1, err := io.CopyN(&compressedBlocks, d.in, blockLen)
if DEBUG {
log.Printf("block # = %d @ %d <- %d, len %d, copied %d bytes", blockNumber, blockStart, *d.cursorPos, blockLen, n1)
}
if err != nil {
if DEBUG {
log.Println("Copy Error")
}
return 0, err
}
// Decompress block range
var b bytes.Buffer
n, err := d.decompressBlockRangeMultithreaded(&compressedBlocks, &b, uint32(blockNumber))
if err != nil {
log.Println("Decompression error")
return n, err
}
// Calculate bytes read
readOverflow := *d.cursorPos + int64(bytesToRead) - d.decompressedSize
if readOverflow < 0 {
readOverflow = 0
}
bytesRead := int64(bytesToRead) - readOverflow
if DEBUG {
log.Printf("Read offset = %d, overflow = %d", dataOffset, readOverflow)
log.Printf("Decompressed %d bytes; read %d out of %d bytes\n", n, bytesRead, bytesToRead)
// log.Printf("%v", b.Bytes())
}
// If we read 0 bytes, we reached the end of the file
if bytesRead == 0 {
log.Println("EOF")
return 0, io.EOF
}
// Copy from buffer+offset to p
_, err = io.CopyN(ioutil.Discard, &b, dataOffset)
if err != nil {
return 0, err
}
n, err = b.Read(p) // Note: everything after bytesToRead bytes will be discarded; we are returning bytesToRead instead of n
if err != nil {
return n, err
}
// Increment cursor position and return
*d.cursorPos += bytesRead
if returnEOF {
if DEBUG {
log.Println("EOF")
}
return int(bytesRead), io.EOF
}
return int(bytesRead), nil
}
// Seek seeks to a location in compressed stream
func (d Decompressor) Seek(offset int64, whence int) (int64, error) {
// Seek to offset in cursorPos
if whence == io.SeekStart {
*d.cursorPos = offset
} else if whence == io.SeekCurrent {
*d.cursorPos += offset
} else if whence == io.SeekEnd {
*d.cursorPos = d.decompressedSize + offset
}
// Return
return offset, nil
}
// DecompressFileExtData decompresses a file using external block data. Argument "size" is very useful here.
func (c *Compression) DecompressFileExtData(in io.ReadSeeker, size int64, blockData []uint32) (FileHandle io.ReadSeeker, decompressedSize int64, err error) {
var decompressor Decompressor
err = decompressor.initWithBlockData(c, in, size, blockData)
return decompressor, decompressor.decompressedSize, err
}

View file

@ -0,0 +1,131 @@
package press
import (
"bufio"
"bytes"
"crypto/md5"
"encoding/base64"
"io"
"io/ioutil"
"math/rand"
"os"
"strings"
"testing"
)
const TestStringSmall = "The quick brown fox jumps over the lazy dog."
const TestSizeLarge = 2097152 // 2 megabytes
// Tests compression and decompression for a preset
func testCompressDecompress(t *testing.T, preset string, testString string) {
// Create compression instance
comp, err := NewCompressionPreset(preset)
if err != nil {
t.Fatal(err)
}
// Open files and hashers
testFile := strings.NewReader(testString)
testFileHasher := md5.New()
if err != nil {
t.Fatal(err)
}
compressedFile, err := ioutil.TempFile(os.TempDir(), "rclone_compression_test")
if err != nil {
t.Fatal(err)
}
outHasher := md5.New()
// Compress file and hash it (size doesn't matter here)
testFileReader, testFileWriter := io.Pipe()
go func() {
_, err := io.Copy(io.MultiWriter(testFileHasher, testFileWriter), testFile)
if err != nil {
t.Fatal("Failed to write compressed file")
}
err = testFileWriter.Close()
if err != nil {
t.Log("Failed to close compressed file")
}
}()
var blockData []uint32
blockData, err = comp.CompressFileReturningBlockData(testFileReader, compressedFile)
if err != nil {
t.Fatalf("Compression failed with error: %v", err)
}
testFileHash := testFileHasher.Sum(nil)
// Get the size, seek to the beginning of the compressed file
size, err := compressedFile.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
_, err = compressedFile.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
t.Logf("Compressed size: %d\n", size)
// Decompress file into a hasher
var FileHandle io.ReadSeeker
var decompressedSize int64
FileHandle, decompressedSize, err = comp.DecompressFileExtData(compressedFile, size, blockData)
if err != nil {
t.Fatal(err)
}
t.Logf("Decompressed size: %d\n", decompressedSize)
bufr := bufio.NewReaderSize(FileHandle, 12345678)
_, err = io.Copy(outHasher, bufr)
if err != nil && err != io.EOF {
t.Fatal(err)
}
decompressedFileHash := outHasher.Sum(nil)
// Clean up
err = compressedFile.Close()
if err != nil {
t.Log("Warning: cannot close compressed test file")
}
err = os.Remove(compressedFile.Name())
if err != nil {
t.Log("Warning: cannot remove compressed test file")
}
// Compare hashes
if !bytes.Equal(testFileHash, decompressedFileHash) {
t.Logf("Hash of original file: %x\n", testFileHash)
t.Logf("Hash of recovered file: %x\n", decompressedFileHash)
t.Fatal("Hashes do not match!")
}
}
// Tests both small and large strings for a preset
func testSmallLarge(t *testing.T, preset string) {
testStringLarge := getCompressibleString(TestSizeLarge)
t.Run("TestSmall", func(t *testing.T) {
testCompressDecompress(t, preset, TestStringSmall)
})
t.Run("TestLarge", func(t *testing.T) {
testCompressDecompress(t, preset, testStringLarge)
})
}
// Gets a compressible string
func getCompressibleString(size int) string {
// Get pseudorandom bytes
prbytes := make([]byte, size*3/4+16)
prsource := rand.New(rand.NewSource(0))
prsource.Read(prbytes)
// Encode in base64
encoding := base64.NewEncoding("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/")
return encoding.EncodeToString(prbytes)[:size]
}
func TestCompression(t *testing.T) {
testCases := []string{"lz4", "gzip", "xz"}
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
testSmallLarge(t, tc)
})
}
}

1398
backend/press/press.go Normal file

File diff suppressed because it is too large Load diff

129
backend/press/press_test.go Normal file
View file

@ -0,0 +1,129 @@
// Test Crypt filesystem interface
package press
import (
"os"
"path/filepath"
"testing"
_ "github.com/rclone/rclone/backend/local"
"github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/fstest/fstests"
)
// TestIntegration runs integration tests against the remote
func TestIntegration(t *testing.T) {
if *fstest.RemoteName == "" {
t.Skip("Skipping as -remote not set")
}
fstests.Run(t, &fstests.Opt{
RemoteName: *fstest.RemoteName,
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
})
}
// TestRemoteLz4 tests LZ4 compression
func TestRemoteLz4(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-lz4")
name := "TestPressLz4"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "lz4"},
},
})
}
// TestRemoteGzip tests GZIP compression
func TestRemoteGzip(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-gzip")
name := "TestPressGzip"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "gzip"},
},
})
}
// TestRemoteXz tests XZ compression
func TestRemoteXz(t *testing.T) {
if *fstest.RemoteName != "" {
t.Skip("Skipping as -remote set")
}
tempdir := filepath.Join(os.TempDir(), "rclone-press-test-xz")
name := "TestPressXz"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
UnimplementableFsMethods: []string{
"OpenWriterAt",
"MergeDirs",
"DirCacheFlush",
"PutUnchecked",
"PutStream",
"UserInfo",
"Disconnect",
},
UnimplementableObjectMethods: []string{
"GetTier",
"SetTier",
},
ExtraConfig: []fstests.ExtraConfigItem{
{Name: name, Key: "type", Value: "press"},
{Name: name, Key: "remote", Value: tempdir},
{Name: name, Key: "compression_mode", Value: "xz"},
},
})
}

View file

@ -72,6 +72,7 @@ Features
* [Cache](/cache/) backend
* [Chunking](/chunker/) backend
* [Union](/union/) backend
* Experimental [Compression](/press/) backend
* Optional FUSE mount ([rclone mount](/commands/rclone_mount/))
* Multi-threaded downloads to local disk
* Can [serve](/commands/rclone_serve/) local or remote files over [HTTP](/commands/rclone_serve_http/)/[WebDav](/commands/rclone_serve_webdav/)/[FTP](/commands/rclone_serve_ftp/)/[SFTP](/commands/rclone_serve_sftp/)/[dlna](/commands/rclone_serve_dlna/)

81
docs/content/press.md Normal file
View file

@ -0,0 +1,81 @@
---
title: "Press"
description: "Compression Remote"
date: "2019-05-12"
---
Press (Experimental)
-----------------------------------------
The `press` remote adds compression to another remote. It is best used with remotes containing
many large compressible files or on top of other remotes like crypt.
Please read the [warnings](#warnings) before using this remote.
To use this remote, all you need to do is specify another remote and a compression mode to use:
```
Current remotes:
Name Type
==== ====
remote_to_press sometype
e) Edit existing remote
$ rclone config
n) New remote
d) Delete remote
r) Rename remote
c) Copy remote
s) Set configuration password
q) Quit config
e/n/d/r/c/s/q> n
name> press
...
8 / Compress a remote
\ "press"
...
Storage> press
** See help for press backend at: https://rclone.org/press/ **
Remote to compress.
Enter a string value. Press Enter for the default ("")
remote> remote_to_press
Compression mode. XZ compression mode requires the xz binary to be in PATH.
Enter a string value. Press Enter for the default ("gzip-min").
Choose a number from below, or type in your own value
1 / Fast, real-time compression with reasonable compression ratios.
\ "lz4"
2 / Google's compression algorithm. Slightly faster and larger than LZ4.
\ "snappy"
3 / Standard gzip compression with fastest parameters.
\ "gzip-min"
4 / Standard gzip compression with default parameters.
\ "gzip-default"
5 / Slow but powerful compression with reasonable speed.
\ "xz-min"
6 / Slowest but best compression.
\ "xz-default"
compression_mode> gzip-min
```
### Compression Modes
Currently there are four compression algorithms supported: lz4, snappy, gzip, and xz.
Gzip and xz are further divided into two modes: "min" with less compression and "default" with more.
Currently, xz modes are only supported if there is an xz binary in your system's $PATH.
Depending on your operating system, the methods of installing this binary vary. This may be changed in
future updates.
### Warnings
#### Filetype
If you open a remote wrapped by press, you will see that there are many files with an extension corresponding to
the compression algorithm you chose. These files, with the exception of snappy files, are standard files that
can be opened by various archive programs, but they have some hidden metadata that allows them to be used by rclone.
While you may download and decompress these files at will, do **not** upload any compressed files to a wrapped remote
through any other means than rclone. This will upload files that do not contain metadata and **will** cause unexpected behavior.
#### Experimental
This remote is currently **experimental**. Things may break and data may be lost. Anything you do with this remote is
at your own risk. Please understand the risks associated with using experimental code and don't use this remote in
critical applications.

View file

@ -262,4 +262,4 @@ backends:
- backend: "mailru"
remote: "TestMailru:"
subdir: false
fastlist: false
fastlist: false

9
go.mod
View file

@ -13,14 +13,17 @@ require (
github.com/atotto/clipboard v0.1.2
github.com/aws/aws-sdk-go v1.29.9
github.com/billziss-gh/cgofuse v1.2.0
github.com/buengese/xxh32 v1.0.1
github.com/djherbis/times v1.2.0
github.com/dropbox/dropbox-sdk-go-unofficial v5.6.0+incompatible
github.com/gabriel-vasile/mimetype v1.0.2
github.com/google/go-querystring v1.0.0 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190812055157-5d271430af9f // indirect
github.com/hanwen/go-fuse/v2 v2.0.3-0.20191108143333-152e6ac32d54
github.com/jlaffaye/ftp v0.0.0-20191218041957-e1b8fdd0dcc3
github.com/jzelinskie/whirlpool v0.0.0-20170603002051-c19460b8caa6
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
github.com/klauspost/compress v1.10.1
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/koofr/go-httpclient v0.0.0-20190818202018-e0dc8fd921dc
github.com/koofr/go-koofrclient v0.0.0-20190724113126-8e5366da203a
@ -33,9 +36,10 @@ require (
github.com/ncw/swift v1.0.50
github.com/nsf/termbox-go v0.0.0-20200204031403-4d2b513ad8be
github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd
github.com/onsi/ginkgo v1.9.0 // indirect
github.com/onsi/gomega v1.6.0 // indirect
github.com/onsi/ginkgo v1.12.0 // indirect
github.com/onsi/gomega v1.9.0 // indirect
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pierrec/lz4 v2.4.1+incompatible
github.com/pkg/errors v0.9.1
github.com/pkg/sftp v1.11.0
github.com/prometheus/client_golang v1.4.1
@ -50,6 +54,7 @@ require (
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.5.1
github.com/t3rm1n4l/go-mega v0.0.0-20200117211730-79a813bb328d
github.com/ulikunitz/xz v0.5.7
github.com/xanzy/ssh-agent v0.2.1
github.com/youmark/pkcs8 v0.0.0-20191102193632-94c173a94d60
github.com/yunify/qingstor-sdk-go/v3 v3.2.0

27
go.sum
View file

@ -76,6 +76,8 @@ github.com/billziss-gh/cgofuse v1.2.0 h1:FMdQSygSBpD4yEPENJcmvfCdmNWMVkPLlD7wWdl
github.com/billziss-gh/cgofuse v1.2.0/go.mod h1:LJjoaUojlVjgo5GQoEJTcJNqZJeRU0nCR84CyxKt2YM=
github.com/bradfitz/iter v0.0.0-20140124041915-454541ec3da2/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo=
github.com/bradfitz/iter v0.0.0-20190303215204-33e6a9893b0c/go.mod h1:PyRFw1Lt2wKX4ZVSQ2mk+PeDa1rxyObEDlApuIsUKuo=
github.com/buengese/xxh32 v1.0.1 h1:aNZNg2XxotiTr6JD+R4bzmL1uzMZ2KEKvxyj4P1Z1Xw=
github.com/buengese/xxh32 v1.0.1/go.mod h1:Q5GTtu7m/GuqzCc8YZ0n+oetaGFwW7oy291HvqLTZFk=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
@ -106,8 +108,12 @@ github.com/dropbox/dropbox-sdk-go-unofficial v5.6.0+incompatible/go.mod h1:lr+Lh
github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/frankban/quicktest v1.7.3 h1:kV0lw0TH1j1hozahVmcpFCsbV5hcS4ZalH+U7UoeTow=
github.com/frankban/quicktest v1.7.3/go.mod h1:V1d2J5pfxYH6EjBAgSK7YNXcXlTWxUHdE1sVDXkjnig=
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/gabriel-vasile/mimetype v1.0.2 h1:GKCo1TUCg0pV0R4atTcaLv/9SI2W9xPgMySZxUxcJOE=
github.com/gabriel-vasile/mimetype v1.0.2/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
@ -140,6 +146,7 @@ github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3 h1:gyjaxf+svBWX08ZjK86iN9geUJF0H6gp2IRKX6Nf6/I=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@ -204,6 +211,8 @@ github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uia
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.1 h1:a/QY0o9S6wCi0XhxaMX/QmusicNUqCqFugR6WKPOSoQ=
github.com/klauspost/compress v1.10.1/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
@ -258,16 +267,19 @@ github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn
github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd h1:+iAPaTbi1gZpcpDwe/BW1fx7Xoesv69hLNGPheoyhBs=
github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd/go.mod h1:4soZNh0zW0LtYGdQ416i0jO0EIqMGcbtaspRS4BDvRQ=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.9.0 h1:SZjF721BByVj8QH636/8S2DnX4n0Re3SteMmw3N+tzc=
github.com/onsi/ginkgo v1.9.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.6.0 h1:8XTW0fcJZEq9q+Upcyws4JSGua2MFysCL5xkaSgHc+M=
github.com/onsi/gomega v1.6.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU=
github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg=
github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 h1:XeOYlK9W1uCmhjJSsY78Mcuh7MVkNjTzmHx1yBzizSU=
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14/go.mod h1:jVblp62SafmidSkvWrXyxAme3gaTfEtWwRPGz5cpvHg=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pierrec/lz4 v2.4.1+incompatible h1:mFe7ttWaflA46Mhqh+jUfjp2qTbPYxLB2/OyBppH9dg=
github.com/pierrec/lz4 v2.4.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@ -358,6 +370,8 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/ulikunitz/xz v0.5.7 h1:YvTNdFzX6+W5m9msiYg/zpkSURPPtOlzbqYjrFn7Yt4=
github.com/ulikunitz/xz v0.5.7/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
@ -475,6 +489,7 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191210023423-ac6580df4449 h1:gSbV7h1NRL2G1xTg/owz62CST1oJBmxy4QpMMregXVQ=
golang.org/x/sys v0.0.0-20191210023423-ac6580df4449/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -501,6 +516,7 @@ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384 h1:TFlARGu6Czu1z7q93HTxcP1P+/ZFC/IKythI5RzrnRg=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
@ -576,6 +592,7 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
@ -588,6 +605,8 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5 h1:ymVxjfMaHvXD8RqPRmzHHsB3VvucivSkIAvJFDI5O3c=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

5
vendor/github.com/buengese/xxh32/go.mod generated vendored Normal file
View file

@ -0,0 +1,5 @@
module github.com/buengese/xxh32
go 1.13
require github.com/frankban/quicktest v1.7.3

11
vendor/github.com/buengese/xxh32/go.sum generated vendored Normal file
View file

@ -0,0 +1,11 @@
github.com/frankban/quicktest v1.7.3 h1:kV0lw0TH1j1hozahVmcpFCsbV5hcS4ZalH+U7UoeTow=
github.com/frankban/quicktest v1.7.3/go.mod h1:V1d2J5pfxYH6EjBAgSK7YNXcXlTWxUHdE1sVDXkjnig=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

223
vendor/github.com/buengese/xxh32/xxh32zero.go generated vendored Normal file
View file

@ -0,0 +1,223 @@
// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
// (https://github.com/Cyan4973/XXH/)
package xxh32
import (
"encoding/binary"
)
const (
prime1 uint32 = 2654435761
prime2 uint32 = 2246822519
prime3 uint32 = 3266489917
prime4 uint32 = 668265263
prime5 uint32 = 374761393
primeMask = 0xFFFFFFFF
prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535
)
// XXHZero represents an xxhash32 object with seed 0.
type XXHZero struct {
v1 uint32
v2 uint32
v3 uint32
v4 uint32
totalLen uint64
buf [16]byte
bufused int
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (xxh XXHZero) Sum(b []byte) []byte {
h32 := xxh.Sum32()
return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
}
// Reset resets the Hash to its initial state.
func (xxh *XXHZero) Reset() {
xxh.v1 = prime1plus2
xxh.v2 = prime2
xxh.v3 = 0
xxh.v4 = prime1minus
xxh.totalLen = 0
xxh.bufused = 0
}
// Size returns the number of bytes returned by Sum().
func (xxh *XXHZero) Size() int {
return 4
}
// BlockSize gives the minimum number of bytes accepted by Write().
func (xxh *XXHZero) BlockSize() int {
return 1
}
// Write adds input bytes to the Hash.
// It never returns an error.
func (xxh *XXHZero) Write(input []byte) (int, error) {
if xxh.totalLen == 0 {
xxh.Reset()
}
n := len(input)
m := xxh.bufused
xxh.totalLen += uint64(n)
r := len(xxh.buf) - m
if n < r {
copy(xxh.buf[m:], input)
xxh.bufused += len(input)
return n, nil
}
p := 0
// Causes compiler to work directly from registers instead of stack:
v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4
if m > 0 {
// some data left from previous update
copy(xxh.buf[xxh.bufused:], input[:r])
xxh.bufused += len(input) - r
// fast rotl(13)
buf := xxh.buf[:16] // BCE hint.
v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
p = r
xxh.bufused = 0
}
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4
copy(xxh.buf[xxh.bufused:], input[p:])
xxh.bufused += len(input) - p
return n, nil
}
// Sum32 returns the 32 bits Hash value.
func (xxh *XXHZero) Sum32() uint32 {
h32 := uint32(xxh.totalLen)
if h32 >= 16 {
h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4)
} else {
h32 += prime5
}
p := 0
n := xxh.bufused
buf := xxh.buf
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for ; p < n; p++ {
h32 += uint32(buf[p]) * prime5
h32 = rol11(h32) * prime1
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// ChecksumZero returns the 32bits Hash value.
func ChecksumZero(input []byte) uint32 {
n := len(input)
h32 := uint32(n)
if n < 16 {
h32 += prime5
} else {
v1 := prime1plus2
v2 := prime2
v3 := uint32(0)
v4 := prime1minus
p := 0
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
input = input[p:]
n -= p
h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
}
p := 0
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for p < n {
h32 += uint32(input[p]) * prime5
h32 = rol11(h32) * prime1
p++
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// Uint32Zero hashes x with seed 0.
func Uint32Zero(x uint32) uint32 {
h := prime5 + 4 + x*prime3
h = rol17(h) * prime4
h ^= h >> 15
h *= prime2
h ^= h >> 13
h *= prime3
h ^= h >> 16
return h
}
func rol1(u uint32) uint32 {
return u<<1 | u>>31
}
func rol7(u uint32) uint32 {
return u<<7 | u>>25
}
func rol11(u uint32) uint32 {
return u<<11 | u>>21
}
func rol12(u uint32) uint32 {
return u<<12 | u>>20
}
func rol13(u uint32) uint32 {
return u<<13 | u>>19
}
func rol17(u uint32) uint32 {
return u<<17 | u>>15
}
func rol18(u uint32) uint32 {
return u<<18 | u>>14
}

View file

@ -0,0 +1 @@
testdata/* linguist-vendored

14
vendor/github.com/gabriel-vasile/mimetype/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,14 @@
language: go
go:
- "1.12"
- "master"
before_install:
- go get github.com/mattn/goveralls
- go get github.com/client9/misspell/cmd/misspell
before_script:
- go vet .
script:
- diff -u <(echo -n) <(gofmt -d ./)
- go test -v
- $GOPATH/bin/goveralls -service=travis-ci
- misspell -locale US -error *.md *.go

View file

@ -0,0 +1,76 @@
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at vasile.gabriel@email.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq

View file

@ -0,0 +1,12 @@
## Contribute
Contributions to **mimetype** are welcome. If you find an issue and you consider
contributing, you can use the [Github issues tracker](https://github.com/gabriel-vasile/mimetype/issues)
in order to report it, or better yet, open a pull request.
Code contributions must respect these rules:
- code must be test covered
- code must be formatted using gofmt tool
- exported names must be documented
**Important**: By submitting a pull request, you agree to allow the project
owner to license your work under the same license as that used by the project.

79
vendor/github.com/gabriel-vasile/mimetype/EXAMPLES.md generated vendored Normal file
View file

@ -0,0 +1,79 @@
## Examples
- [Detect MIME type](#detect)
- [Check against MIME type](#check)
- [Check base MIME type](#check-parent)
- [Binary file vs text file](#binary-file-vs-text-file)
### Detect
Get the MIME type from a slice of bytes, from a reader and from a file.
```go
// Detect the MIME type of a file stored as a byte slice.
file := "testdata/pdf.pdf"
// Detect the MIME type of a file.
mime, ferr := mimetype.DetectFile(file)
fmt.Println(mime, ferr)
// Output: application/pdf nil
// Detect the MIME type of a reader.
reader, _ := os.Open(file) // ignoring error for brevity's sake
mime, rerr := mimetype.DetectReader(reader)
fmt.Println(mime, rerr)
// Output: application/pdf nil
mime := mimetype.Detect(data)
fmt.Println(mime)
// Output: application/pdf
```
### Check
Test if a file has a specific MIME type. Also accepts MIME type aliases.
```go
mime, err := mimetype.DetectFile("testdata/zip.zip")
// application/x-zip is an alias of application/zip,
// therefore Is returns true both times.
fmt.Println(mime.Is("application/zip"), mime.Is("application/x-zip"), err)
// Output: true true <nil>
```
### Check parent
Test if a file has a specific base MIME type. First perform a detect on the
input and then navigate the parents until the base MIME type is found.
Considering JAR files are just ZIPs containing some metadata files,
if, for example, you need to tell if the input can be unzipped, go up the
MIME hierarchy until zip is found or the root is reached.
```go
detectedMIME, err := mimetype.DetectFile("testdata/jar.jar")
zip := false
for mime := detectedMIME; mime != nil; mime = mime.Parent() {
if mime.Is("application/zip") {
zip = true
}
}
// zip is true, even if the detected MIME was application/jar.
fmt.Println(zip, detectedMIME, err)
// Output: true application/jar <nil>
```
### Binary file vs text file
Considering the definition of a binary file as "a computer file that is not
a text file", they can be differentiated by searching for the text/plain MIME
in it's MIME hierarchy.
```go
detectedMIME, err := mimetype.DetectFile("testdata/xml.xml")
isBinary := true
for mime := detectedMIME; mime != nil; mime = mime.Parent() {
if mime.Is("text/plain") {
isBinary = false
}
}
fmt.Println(isBinary, detectedMIME, err)
// Output: false text/xml; charset=utf-8 <nil>
```

21
vendor/github.com/gabriel-vasile/mimetype/LICENSE generated vendored Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018, 2019 Gabriel Vasile
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

74
vendor/github.com/gabriel-vasile/mimetype/README.md generated vendored Normal file
View file

@ -0,0 +1,74 @@
<h1 align="center">
mimetype
</h1>
<h4 align="center">
A package for detecting MIME types and extensions based on magic numbers
</h4>
<h6 align="center">
No C bindings, zero dependencies and thread safe
</h6>
<p align="center">
<a href="https://travis-ci.org/gabriel-vasile/mimetype">
<img alt="Build Status" src="https://travis-ci.org/gabriel-vasile/mimetype.svg?branch=master">
</a>
<a href="https://godoc.org/github.com/gabriel-vasile/mimetype">
<img alt="Documentation" src="https://godoc.org/github.com/gabriel-vasile/mimetype?status.svg">
</a>
<a href="https://goreportcard.com/report/github.com/gabriel-vasile/mimetype">
<img alt="Go report card" src="https://goreportcard.com/badge/github.com/gabriel-vasile/mimetype">
</a>
<a href="https://coveralls.io/github/gabriel-vasile/mimetype?branch=master">
<img alt="Go report card" src="https://coveralls.io/repos/github/gabriel-vasile/mimetype/badge.svg?branch=master">
</a>
<a href="LICENSE">
<img alt="License" src="https://img.shields.io/badge/License-MIT-green.svg">
</a>
</p>
## Install
```bash
go get github.com/gabriel-vasile/mimetype
```
## Usage
There are quick [examples](EXAMPLES.md) and
[GoDoc](https://godoc.org/github.com/gabriel-vasile/mimetype) for full reference.
## Upgrade from v0.3.x to v1.x
In v1.x the detect functions no longer return the MIME type and extension as
strings. Instead they return a [MIME](https://godoc.org/github.com/gabriel-vasile/mimetype#MIME)
struct. To get the string value of the MIME and the extension, call the
`String()` and the `Extension()` methods.
In order to play better with the stdlib `mime` package, v1.x file extensions
include the leading dot, as in ".html".
In v1.x the `text/plain` MIME type is `text/plain; charset=utf-8`.
## Supported MIME types
See [supported mimes](supported_mimes.md) for the list of detected MIME types.
If support is needed for a specific file format, please open an [issue](https://github.com/gabriel-vasile/mimetype/issues/new/choose).
## Structure
**mimetype** uses an hierarchical structure to keep the MIME type detection logic.
This reduces the number of calls needed for detecting the file type. The reason
behind this choice is that there are file formats used as containers for other
file formats. For example, Microsoft Office files are just zip archives,
containing specific metadata files. Once a file a file has been identified as a
zip, there is no need to check if it is a text file, but it is worth checking if
it is an Microsoft Office file.
To prevent loading entire files into memory, when detecting from a
[reader](https://godoc.org/github.com/gabriel-vasile/mimetype#DetectReader)
or from a [file](https://godoc.org/github.com/gabriel-vasile/mimetype#DetectFile)
**mimetype** limits itself to reading only the first
[3072](https://github.com/gabriel-vasile/mimetype/blob/master/internal/matchers/matchers.go#L6)
bytes from the input.
<div align="center">
<img alt="structure" src="mimetype.gif" width="88%">
</div>
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md).

3
vendor/github.com/gabriel-vasile/mimetype/go.mod generated vendored Normal file
View file

@ -0,0 +1,3 @@
module github.com/gabriel-vasile/mimetype
go 1.12

View file

@ -0,0 +1,536 @@
// Copyright (c) 2009 The Go Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Package json provides a JSON value parser state machine.
// This package is almost entirely copied from the Go stdlib.
// Changes made to it permit users of the package to tell
// if some slice of bytes is a valid beginning of a json string.
package json
import "fmt"
type (
context int
scanStatus int
)
const (
contextKey context = iota
contextObj
contextArr
scanContinue scanStatus = iota // uninteresting byte
scanBeginLiteral // end implied by next result != scanContinue
scanBeginObject // begin object
scanObjectKey // just finished object key (string)
scanObjectValue // just finished non-last object value
scanEndObject // end object (implies scanObjectValue if possible)
scanBeginArray // begin array
scanArrayValue // just finished array value
scanEndArray // end array (implies scanArrayValue if possible)
scanSkipSpace // space byte; can skip; known to be last "continue" result
scanEnd // top-level value ended *before* this byte; known to be first "stop" result
scanError // hit an error, scanner.err.
)
type (
scanner struct {
step func(*scanner, byte) scanStatus
contexts []context
endTop bool
err error
index int
}
)
// Scan returns the number of bytes scanned and if there was any error
// in trying to reach the end of data
func Scan(data []byte) (int, error) {
s := &scanner{}
_ = checkValid(data, s)
return s.index, s.err
}
// checkValid verifies that data is valid JSON-encoded data.
// scan is passed in for use by checkValid to avoid an allocation.
func checkValid(data []byte, scan *scanner) error {
scan.reset()
for _, c := range data {
scan.index++
if scan.step(scan, c) == scanError {
return scan.err
}
}
if scan.eof() == scanError {
return scan.err
}
return nil
}
func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\r' || c == '\n'
}
func (s *scanner) reset() {
s.step = stateBeginValue
s.contexts = s.contexts[0:0]
s.err = nil
}
// eof tells the scanner that the end of input has been reached.
// It returns a scan status just as s.step does.
func (s *scanner) eof() scanStatus {
if s.err != nil {
return scanError
}
if s.endTop {
return scanEnd
}
s.step(s, ' ')
if s.endTop {
return scanEnd
}
if s.err == nil {
s.err = fmt.Errorf("unexpected end of JSON input")
}
return scanError
}
// pushContext pushes a new parse state p onto the parse stack.
func (s *scanner) pushParseState(p context) {
s.contexts = append(s.contexts, p)
}
// popParseState pops a parse state (already obtained) off the stack
// and updates s.step accordingly.
func (s *scanner) popParseState() {
n := len(s.contexts) - 1
s.contexts = s.contexts[0:n]
if n == 0 {
s.step = stateEndTop
s.endTop = true
} else {
s.step = stateEndValue
}
}
// stateBeginValueOrEmpty is the state after reading `[`.
func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == ']' {
return stateEndValue(s, c)
}
return stateBeginValue(s, c)
}
// stateBeginValue is the state at the beginning of the input.
func stateBeginValue(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
switch c {
case '{':
s.step = stateBeginStringOrEmpty
s.pushParseState(contextKey)
return scanBeginObject
case '[':
s.step = stateBeginValueOrEmpty
s.pushParseState(contextArr)
return scanBeginArray
case '"':
s.step = stateInString
return scanBeginLiteral
case '-':
s.step = stateNeg
return scanBeginLiteral
case '0': // beginning of 0.123
s.step = state0
return scanBeginLiteral
case 't': // beginning of true
s.step = stateT
return scanBeginLiteral
case 'f': // beginning of false
s.step = stateF
return scanBeginLiteral
case 'n': // beginning of null
s.step = stateN
return scanBeginLiteral
}
if '1' <= c && c <= '9' { // beginning of 1234.5
s.step = state1
return scanBeginLiteral
}
return s.error(c, "looking for beginning of value")
}
// stateBeginStringOrEmpty is the state after reading `{`.
func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '}' {
n := len(s.contexts)
s.contexts[n-1] = contextObj
return stateEndValue(s, c)
}
return stateBeginString(s, c)
}
// stateBeginString is the state after reading `{"key": value,`.
func stateBeginString(s *scanner, c byte) scanStatus {
if c <= ' ' && isSpace(c) {
return scanSkipSpace
}
if c == '"' {
s.step = stateInString
return scanBeginLiteral
}
return s.error(c, "looking for beginning of object key string")
}
// stateEndValue is the state after completing a value,
// such as after reading `{}` or `true` or `["x"`.
func stateEndValue(s *scanner, c byte) scanStatus {
n := len(s.contexts)
if n == 0 {
// Completed top-level before the current byte.
s.step = stateEndTop
s.endTop = true
return stateEndTop(s, c)
}
if c <= ' ' && isSpace(c) {
s.step = stateEndValue
return scanSkipSpace
}
ps := s.contexts[n-1]
switch ps {
case contextKey:
if c == ':' {
s.contexts[n-1] = contextObj
s.step = stateBeginValue
return scanObjectKey
}
return s.error(c, "after object key")
case contextObj:
if c == ',' {
s.contexts[n-1] = contextKey
s.step = stateBeginString
return scanObjectValue
}
if c == '}' {
s.popParseState()
return scanEndObject
}
return s.error(c, "after object key:value pair")
case contextArr:
if c == ',' {
s.step = stateBeginValue
return scanArrayValue
}
if c == ']' {
s.popParseState()
return scanEndArray
}
return s.error(c, "after array element")
}
return s.error(c, "")
}
// stateEndTop is the state after finishing the top-level value,
// such as after reading `{}` or `[1,2,3]`.
// Only space characters should be seen now.
func stateEndTop(s *scanner, c byte) scanStatus {
if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
// Complain about non-space byte on next call.
s.error(c, "after top-level value")
}
return scanEnd
}
// stateInString is the state after reading `"`.
func stateInString(s *scanner, c byte) scanStatus {
if c == '"' {
s.step = stateEndValue
return scanContinue
}
if c == '\\' {
s.step = stateInStringEsc
return scanContinue
}
if c < 0x20 {
return s.error(c, "in string literal")
}
return scanContinue
}
// stateInStringEsc is the state after reading `"\` during a quoted string.
func stateInStringEsc(s *scanner, c byte) scanStatus {
switch c {
case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
s.step = stateInString
return scanContinue
case 'u':
s.step = stateInStringEscU
return scanContinue
}
return s.error(c, "in string escape code")
}
// stateInStringEscU is the state after reading `"\u` during a quoted string.
func stateInStringEscU(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU1
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
func stateInStringEscU1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU12
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
func stateInStringEscU12(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInStringEscU123
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
func stateInStringEscU123(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
s.step = stateInString
return scanContinue
}
// numbers
return s.error(c, "in \\u hexadecimal character escape")
}
// stateNeg is the state after reading `-` during a number.
func stateNeg(s *scanner, c byte) scanStatus {
if c == '0' {
s.step = state0
return scanContinue
}
if '1' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return s.error(c, "in numeric literal")
}
// state1 is the state after reading a non-zero integer during a number,
// such as after reading `1` or `100` but not `0`.
func state1(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = state1
return scanContinue
}
return state0(s, c)
}
// state0 is the state after reading `0` during a number.
func state0(s *scanner, c byte) scanStatus {
if c == '.' {
s.step = stateDot
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateDot is the state after reading the integer and decimal point in a number,
// such as after reading `1.`.
func stateDot(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateDot0
return scanContinue
}
return s.error(c, "after decimal point in numeric literal")
}
// stateDot0 is the state after reading the integer, decimal point, and subsequent
// digits of a number, such as after reading `3.14`.
func stateDot0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
if c == 'e' || c == 'E' {
s.step = stateE
return scanContinue
}
return stateEndValue(s, c)
}
// stateE is the state after reading the mantissa and e in a number,
// such as after reading `314e` or `0.314e`.
func stateE(s *scanner, c byte) scanStatus {
if c == '+' || c == '-' {
s.step = stateESign
return scanContinue
}
return stateESign(s, c)
}
// stateESign is the state after reading the mantissa, e, and sign in a number,
// such as after reading `314e-` or `0.314e+`.
func stateESign(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
s.step = stateE0
return scanContinue
}
return s.error(c, "in exponent of numeric literal")
}
// stateE0 is the state after reading the mantissa, e, optional sign,
// and at least one digit of the exponent in a number,
// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
func stateE0(s *scanner, c byte) scanStatus {
if '0' <= c && c <= '9' {
return scanContinue
}
return stateEndValue(s, c)
}
// stateT is the state after reading `t`.
func stateT(s *scanner, c byte) scanStatus {
if c == 'r' {
s.step = stateTr
return scanContinue
}
return s.error(c, "in literal true (expecting 'r')")
}
// stateTr is the state after reading `tr`.
func stateTr(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateTru
return scanContinue
}
return s.error(c, "in literal true (expecting 'u')")
}
// stateTru is the state after reading `tru`.
func stateTru(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal true (expecting 'e')")
}
// stateF is the state after reading `f`.
func stateF(s *scanner, c byte) scanStatus {
if c == 'a' {
s.step = stateFa
return scanContinue
}
return s.error(c, "in literal false (expecting 'a')")
}
// stateFa is the state after reading `fa`.
func stateFa(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateFal
return scanContinue
}
return s.error(c, "in literal false (expecting 'l')")
}
// stateFal is the state after reading `fal`.
func stateFal(s *scanner, c byte) scanStatus {
if c == 's' {
s.step = stateFals
return scanContinue
}
return s.error(c, "in literal false (expecting 's')")
}
// stateFals is the state after reading `fals`.
func stateFals(s *scanner, c byte) scanStatus {
if c == 'e' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal false (expecting 'e')")
}
// stateN is the state after reading `n`.
func stateN(s *scanner, c byte) scanStatus {
if c == 'u' {
s.step = stateNu
return scanContinue
}
return s.error(c, "in literal null (expecting 'u')")
}
// stateNu is the state after reading `nu`.
func stateNu(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateNul
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateNul is the state after reading `nul`.
func stateNul(s *scanner, c byte) scanStatus {
if c == 'l' {
s.step = stateEndValue
return scanContinue
}
return s.error(c, "in literal null (expecting 'l')")
}
// stateError is the state after reaching a syntax error,
// such as after reading `[1}` or `5.1.2`.
func stateError(s *scanner, c byte) scanStatus {
return scanError
}
// error records an error and switches to the error state.
func (s *scanner) error(c byte, context string) scanStatus {
s.step = stateError
s.err = fmt.Errorf("invalid character <<%c>> %s", c, context)
return scanError
}

View file

@ -0,0 +1,98 @@
package matchers
import "bytes"
// Zip matches a zip archive.
func Zip(in []byte) bool {
return len(in) > 3 &&
in[0] == 0x50 && in[1] == 0x4B &&
(in[2] == 0x3 || in[2] == 0x5 || in[2] == 0x7) &&
(in[3] == 0x4 || in[3] == 0x6 || in[3] == 0x8)
}
// SevenZ matches a 7z archive.
func SevenZ(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C})
}
// Epub matches an EPUB file.
func Epub(in []byte) bool {
return len(in) > 58 && bytes.Equal(in[30:58], []byte("mimetypeapplication/epub+zip"))
}
// Jar matches a Java archive file.
func Jar(in []byte) bool {
return bytes.Contains(in, []byte("META-INF/MANIFEST.MF"))
}
// Gzip matched gzip files based on http://www.zlib.org/rfc-gzip.html#header-trailer.
func Gzip(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x1f, 0x8b})
}
// Crx matches a Chrome extension file: a zip archive prepended by "Cr24".
func Crx(in []byte) bool {
return bytes.HasPrefix(in, []byte("Cr24"))
}
// Tar matches a (t)ape (ar)chive file.
func Tar(in []byte) bool {
return len(in) > 262 && bytes.Equal(in[257:262], []byte("ustar"))
}
// Fits matches an Flexible Image Transport System file.
func Fits(in []byte) bool {
return bytes.HasPrefix(in, []byte{
0x53, 0x49, 0x4D, 0x50, 0x4C, 0x45, 0x20, 0x20, 0x3D, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x54,
})
}
// Xar matches an eXtensible ARchive format file.
func Xar(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x78, 0x61, 0x72, 0x21})
}
// Bz2 matches a bzip2 file.
func Bz2(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x42, 0x5A, 0x68})
}
// Ar matches an ar (Unix) archive file.
func Ar(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E})
}
// Deb matches a Debian package file.
func Deb(in []byte) bool {
return len(in) > 8 && bytes.HasPrefix(in[8:], []byte{
0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D,
0x62, 0x69, 0x6E, 0x61, 0x72, 0x79,
})
}
// Rar matches a RAR archive file.
func Rar(in []byte) bool {
if !bytes.HasPrefix(in, []byte{0x52, 0x61, 0x72, 0x21, 0x1A, 0x07}) {
return false
}
return len(in) > 8 && (bytes.Equal(in[6:8], []byte{0x01, 0x00}) || in[6] == 0x00)
}
// Warc matches a Web ARChive file.
func Warc(in []byte) bool {
return bytes.HasPrefix(in, []byte("WARC/"))
}
// Zstd matches a Zstandard archive file.
func Zstd(in []byte) bool {
return len(in) >= 4 &&
(0x22 <= in[0] && in[0] <= 0x28 || in[0] == 0x1E) && // Different Zstandard versions.
bytes.HasPrefix(in[1:], []byte{0xB5, 0x2F, 0xFD})
}
// Cab matches a Cabinet archive file.
func Cab(in []byte) bool {
return bytes.HasPrefix(in, []byte("MSCF"))
}

View file

@ -0,0 +1,95 @@
package matchers
import (
"bytes"
"encoding/binary"
)
// Mp3 matches an mp3 file.
func Mp3(in []byte) bool {
if len(in) < 3 {
return false
}
if bytes.HasPrefix(in, []byte("ID3")) {
// MP3s with an ID3v2 tag will start with "ID3"
// ID3v1 tags, however appear at the end of the file.
return true
}
// Match MP3 files without tags
switch binary.BigEndian.Uint16(in[:2]) & 0xFFFE {
case 0xFFFA:
// MPEG ADTS, layer III, v1
return true
case 0xFFF2:
// MPEG ADTS, layer III, v2
return true
case 0xFFE2:
// MPEG ADTS, layer III, v2.5
return true
}
return false
}
// Flac matches a Free Lossless Audio Codec file.
func Flac(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x66\x4C\x61\x43\x00\x00\x00\x22"))
}
// Midi matches a Musical Instrument Digital Interface file.
func Midi(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x4D\x54\x68\x64"))
}
// Ape matches a Monkey's Audio file.
func Ape(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x4D\x41\x43\x20\x96\x0F\x00\x00\x34\x00\x00\x00\x18\x00\x00\x00\x90\xE3"))
}
// MusePack matches a Musepack file.
func MusePack(in []byte) bool {
return bytes.HasPrefix(in, []byte("MPCK"))
}
// Wav matches a Waveform Audio File Format file.
func Wav(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte("\x57\x41\x56\x45"))
}
// Aiff matches Audio Interchange File Format file.
func Aiff(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("\x46\x4F\x52\x4D")) &&
bytes.Equal(in[8:12], []byte("\x41\x49\x46\x46"))
}
// Au matches a Sun Microsystems au file.
func Au(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x2E\x73\x6E\x64"))
}
// Amr matches an Adaptive Multi-Rate file.
func Amr(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x23\x21\x41\x4D\x52"))
}
// Aac matches an Advanced Audio Coding file.
func Aac(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xF1}) || bytes.HasPrefix(in, []byte{0xFF, 0xF9})
}
// Voc matches a Creative Voice file.
func Voc(in []byte) bool {
return bytes.HasPrefix(in, []byte("Creative Voice File"))
}
// Qcp matches a Qualcomm Pure Voice file.
func Qcp(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte("QLCM"))
}

View file

@ -0,0 +1,146 @@
package matchers
import (
"bytes"
"debug/macho"
"encoding/binary"
)
// Java bytecode and Mach-O binaries share the same magic number.
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
func classOrMachOFat(in []byte) bool {
// There should be at least 8 bytes for both of them because the only way to
// quickly distinguish them is by comparing byte at position 7
if len(in) < 8 {
return false
}
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
}
// Class matches a java class file.
func Class(in []byte) bool {
return classOrMachOFat(in) && in[7] > 30
}
// MachO matches Mach-O binaries format.
func MachO(in []byte) bool {
if classOrMachOFat(in) && in[7] < 20 {
return true
}
if len(in) < 4 {
return false
}
be := binary.BigEndian.Uint32(in)
le := binary.LittleEndian.Uint32(in)
return be == macho.Magic32 || le == macho.Magic32 || be == macho.Magic64 || le == macho.Magic64
}
// Swf matches an Adobe Flash swf file.
func Swf(in []byte) bool {
return bytes.HasPrefix(in, []byte("CWS")) ||
bytes.HasPrefix(in, []byte("FWS")) ||
bytes.HasPrefix(in, []byte("ZWS"))
}
// Wasm matches a web assembly File Format file.
func Wasm(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x61, 0x73, 0x6D})
}
// Dbf matches a dBase file.
// https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
func Dbf(in []byte) bool {
if len(in) < 4 {
return false
}
// 3rd and 4th bytes contain the last update month and day of month
if !(0 < in[2] && in[2] < 13 && 0 < in[3] && in[3] < 32) {
return false
}
// dbf type is dictated by the first byte
dbfTypes := []byte{
0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82,
0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB,
}
for _, b := range dbfTypes {
if in[0] == b {
return true
}
}
return false
}
// Exe matches a Windows/DOS executable file.
func Exe(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x4D, 0x5A})
}
// Elf matches an Executable and Linkable Format file.
func Elf(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x7F, 0x45, 0x4C, 0x46})
}
// ElfObj matches an object file.
func ElfObj(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x01 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x01))
}
// ElfExe matches an executable file.
func ElfExe(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x02 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x02))
}
// ElfLib matches a shared library file.
func ElfLib(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x03 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x03))
}
// ElfDump matches a core dump file.
func ElfDump(in []byte) bool {
return len(in) > 17 && ((in[16] == 0x04 && in[17] == 0x00) ||
(in[16] == 0x00 && in[17] == 0x04))
}
// Dcm matches a DICOM medical format file.
func Dcm(in []byte) bool {
return len(in) > 131 &&
bytes.Equal(in[128:132], []byte{0x44, 0x49, 0x43, 0x4D})
}
// Nes matches a Nintendo Entertainment system ROM file.
func Nes(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x4E, 0x45, 0x53, 0x1A})
}
// Marc matches a MARC21 (MAchine-Readable Cataloging) file.
func Marc(in []byte) bool {
// File is at least 24 bytes ("leader" field size)
if len(in) < 24 {
return false
}
// Fixed bytes at offset 20
if !bytes.Equal(in[20:24], []byte("4500")) {
return false
}
// First 5 bytes are ASCII digits
for i := 0; i < 5; i++ {
if in[i] < '0' || in[i] > '9' {
return false
}
}
// Field terminator is present
return bytes.Contains(in, []byte{0x1E})
}

View file

@ -0,0 +1,25 @@
package matchers
import "bytes"
// Sqlite matches an SQLite database file.
func Sqlite(in []byte) bool {
return bytes.HasPrefix(in, []byte{
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66,
0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00,
})
}
// MsAccessAce matches Microsoft Access dababase file.
func MsAccessAce(in []byte) bool {
return msAccess(in, []byte("Standard ACE DB"))
}
// MsAccessMdb matches legacy Microsoft Access database file (JET, 2003 and earlier).
func MsAccessMdb(in []byte) bool {
return msAccess(in, []byte("Standard Jet DB"))
}
func msAccess(in []byte, magic []byte) bool {
return len(in) > 19 && bytes.Equal(in[4:19], magic)
}

View file

@ -0,0 +1,32 @@
package matchers
import "bytes"
// Pdf matches a Portable Document Format file.
func Pdf(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x25, 0x50, 0x44, 0x46})
}
// DjVu matches a DjVu file.
func DjVu(in []byte) bool {
if len(in) < 12 {
return false
}
if !bytes.HasPrefix(in, []byte{0x41, 0x54, 0x26, 0x54, 0x46, 0x4F, 0x52, 0x4D}) {
return false
}
return bytes.HasPrefix(in[12:], []byte("DJVM")) ||
bytes.HasPrefix(in[12:], []byte("DJVU")) ||
bytes.HasPrefix(in[12:], []byte("DJVI")) ||
bytes.HasPrefix(in[12:], []byte("THUM"))
}
// Mobi matches a Mobi file.
func Mobi(in []byte) bool {
return len(in) > 67 && bytes.Equal(in[60:68], []byte("BOOKMOBI"))
}
// Lit matches a Microsoft Lit file.
func Lit(in []byte) bool {
return bytes.HasPrefix(in, []byte("ITOLITLS"))
}

View file

@ -0,0 +1,27 @@
package matchers
import "bytes"
// Woff matches a Web Open Font Format file.
func Woff(in []byte) bool {
return bytes.HasPrefix(in, []byte("wOFF"))
}
// Woff2 matches a Web Open Font Format version 2 file.
func Woff2(in []byte) bool {
return bytes.HasPrefix(in, []byte("wOF2"))
}
// Otf matches an OpenType font file.
func Otf(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x4F, 0x54, 0x54, 0x4F, 0x00})
}
// Eot matches an Embedded OpenType font file.
func Eot(in []byte) bool {
return len(in) > 35 &&
bytes.Equal(in[34:36], []byte{0x4C, 0x50}) &&
(bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x01}) ||
bytes.Equal(in[8:11], []byte{0x01, 0x00, 0x00}) ||
bytes.Equal(in[8:11], []byte{0x02, 0x00, 0x02}))
}

View file

@ -0,0 +1,44 @@
package matchers
import (
"bytes"
"encoding/binary"
)
// Shp matches a shape format file.
// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
func Shp(in []byte) bool {
if len(in) < 112 {
return false
}
shapeTypes := []int{
0, // Null shape
1, // Point
3, // Polyline
5, // Polygon
8, // MultiPoint
11, // PointZ
13, // PolylineZ
15, // PolygonZ
18, // MultiPointZ
21, // PointM
23, // PolylineM
25, // PolygonM
28, // MultiPointM
31, // MultiPatch
}
for _, st := range shapeTypes {
if st == int(binary.LittleEndian.Uint32(in[108:112])) {
return true
}
}
return false
}
// Shx matches a shape index format file.
// https://www.esri.com/library/whitepapers/pdfs/shapefile.pdf
func Shx(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0x27, 0x0A})
}

View file

@ -0,0 +1,164 @@
package matchers
import "bytes"
// Png matches a Portable Network Graphics file.
func Png(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A})
}
// Jpg matches a Joint Photographic Experts Group file.
func Jpg(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xD8, 0xFF})
}
// isJpeg2k matches a generic JPEG2000 file.
func isJpeg2k(in []byte) bool {
if len(in) < 24 {
return false
}
signature := in[4:8]
return bytes.Equal(signature, []byte{0x6A, 0x50, 0x20, 0x20}) ||
bytes.Equal(signature, []byte{0x6A, 0x50, 0x32, 0x20})
}
// Jp2 matches a JPEG 2000 Image file (ISO 15444-1).
func Jp2(in []byte) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x32, 0x20})
}
// Jpx matches a JPEG 2000 Image file (ISO 15444-2).
func Jpx(in []byte) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x78, 0x20})
}
// Jpm matches a JPEG 2000 Image file (ISO 15444-6).
func Jpm(in []byte) bool {
return isJpeg2k(in) && bytes.Equal(in[20:24], []byte{0x6a, 0x70, 0x6D, 0x20})
}
// Gif matches a Graphics Interchange Format file.
func Gif(in []byte) bool {
return bytes.HasPrefix(in, []byte("GIF87a")) ||
bytes.HasPrefix(in, []byte("GIF89a"))
}
// Webp matches a WebP file.
func Webp(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[0:4], []byte("RIFF")) &&
bytes.Equal(in[8:12], []byte{0x57, 0x45, 0x42, 0x50})
}
// Bmp matches a bitmap image file.
func Bmp(in []byte) bool {
return len(in) > 1 && in[0] == 0x42 && in[1] == 0x4D
}
// Ps matches a PostScript file.
func Ps(in []byte) bool {
return bytes.HasPrefix(in, []byte("%!PS-Adobe-"))
}
// Psd matches a Photoshop Document file.
func Psd(in []byte) bool {
return bytes.HasPrefix(in, []byte("8BPS"))
}
// Ico matches an ICO file.
func Ico(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0x01, 0x00})
}
// Icns matches an ICNS (Apple Icon Image format) file.
func Icns(in []byte) bool {
return bytes.HasPrefix(in, []byte("icns"))
}
// Tiff matches a Tagged Image File Format file.
func Tiff(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x49, 0x49, 0x2A, 0x00}) ||
bytes.HasPrefix(in, []byte{0x4D, 0x4D, 0x00, 0x2A})
}
// Bpg matches a Better Portable Graphics file.
func Bpg(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x42, 0x50, 0x47, 0xFB})
}
// Dwg matches a CAD drawing file.
func Dwg(in []byte) bool {
if len(in) < 6 || in[0] != 0x41 || in[1] != 0x43 {
return false
}
dwgVersions := [][]byte{
{0x31, 0x2E, 0x34, 0x30},
{0x31, 0x2E, 0x35, 0x30},
{0x32, 0x2E, 0x31, 0x30},
{0x31, 0x30, 0x30, 0x32},
{0x31, 0x30, 0x30, 0x33},
{0x31, 0x30, 0x30, 0x34},
{0x31, 0x30, 0x30, 0x36},
{0x31, 0x30, 0x30, 0x39},
{0x31, 0x30, 0x31, 0x32},
{0x31, 0x30, 0x31, 0x34},
{0x31, 0x30, 0x31, 0x35},
{0x31, 0x30, 0x31, 0x38},
{0x31, 0x30, 0x32, 0x31},
{0x31, 0x30, 0x32, 0x34},
{0x31, 0x30, 0x33, 0x32},
}
for _, d := range dwgVersions {
if bytes.Equal(in[2:6], d) {
return true
}
}
return false
}
// Heic matches a High Efficiency Image Coding (HEIC) file.
func Heic(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftypheic")) ||
bytes.Equal(in[4:12], []byte("ftypheix"))
}
// HeicSequence matches a High Efficiency Image Coding (HEIC) file sequence.
func HeicSequence(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftyphevc")) ||
bytes.Equal(in[4:12], []byte("ftyphevx"))
}
// Heif matches a High Efficiency Image File Format (HEIF) file.
func Heif(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftypmif1")) ||
bytes.Equal(in[4:12], []byte("ftypheim")) ||
bytes.Equal(in[4:12], []byte("ftypheis")) ||
bytes.Equal(in[4:12], []byte("ftypavic"))
}
// HeifSequence matches a High Efficiency Image File Format (HEIF) file sequence.
func HeifSequence(in []byte) bool {
if len(in) <= 12 {
return false
}
return bytes.Equal(in[4:12], []byte("ftypmsf1")) ||
bytes.Equal(in[4:12], []byte("ftyphevm")) ||
bytes.Equal(in[4:12], []byte("ftyphevs")) ||
bytes.Equal(in[4:12], []byte("ftypavcs"))
}

View file

@ -0,0 +1,48 @@
// Package matchers holds the matching functions used to find MIME types.
package matchers
// ReadLimit is the maximum number of bytes read
// from the input when detecting a reader.
const ReadLimit = 3072
// True is a dummy matching function used to match any input.
func True([]byte) bool {
return true
}
// trimLWS trims whitespace from beginning of the input.
func trimLWS(in []byte) []byte {
firstNonWS := 0
for ; firstNonWS < len(in) && isWS(in[firstNonWS]); firstNonWS++ {
}
return in[firstNonWS:]
}
// trimRWS trims whitespace from the end of the input.
func trimRWS(in []byte) []byte {
lastNonWS := len(in) - 1
for ; lastNonWS > 0 && isWS(in[lastNonWS]); lastNonWS-- {
}
return in[:lastNonWS+1]
}
func firstLine(in []byte) []byte {
lineEnd := 0
for ; lineEnd < len(in) && in[lineEnd] != '\n'; lineEnd++ {
}
return in[:lineEnd]
}
func isWS(b byte) bool {
return b == '\t' || b == '\n' || b == '\x0c' || b == '\r' || b == ' '
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

View file

@ -0,0 +1,154 @@
package matchers
import (
"bytes"
"encoding/binary"
"regexp"
)
var msoXMLreg = regexp.MustCompile("\\[Content_Types\\]\\.xml|_rels/\\.rels|docProps")
// msoXML walks through the first 4 zip local file headers and returns whether
// any of the headers contain a file whose name starts with sig.
func msoXML(in, sig []byte) bool {
pkSig := []byte("PK\003\004")
if !msoXMLreg.Match(in[:min(len(in), 8000)]) {
return false
}
// 30 is the offset where the file name is located in each zip header.
lastCheckedIndex := 0
check := func(in, sig []byte, offset int) bool {
return len(in) > offset && bytes.HasPrefix(in[offset:], sig)
}
// github.com/file/file looks for the msoXML signature in the first 4 local
// headers, but some xlsx files have their signature in later headers.
// testdata/xlsx.1.xlsx is such an example, with the signature in the 5th header.
for i := 0; i < 6 && lastCheckedIndex < len(in); i++ {
in = in[lastCheckedIndex:]
pkIndex := bytes.Index(in, pkSig)
if pkIndex == -1 {
return false
}
if check(in, sig, pkIndex+30) {
return true
}
lastCheckedIndex = pkIndex + 30
}
return false
}
// Xlsx matches a Microsoft Excel 2007 file.
func Xlsx(in []byte) bool {
return msoXML(in, []byte("xl/"))
}
// Docx matches a Microsoft Office 2007 file.
func Docx(in []byte) bool {
return msoXML(in, []byte("word/"))
}
// Pptx matches a Microsoft PowerPoint 2007 file.
func Pptx(in []byte) bool {
return msoXML(in, []byte("ppt/"))
}
// Ole matches an Open Linking and Embedding file.
//
// https://en.wikipedia.org/wiki/Object_Linking_and_Embedding
func Ole(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1})
}
// Doc matches a Microsoft Office 97-2003 file.
//
// BUG(gabriel-vasile): Doc should look for subheaders like Ppt and Xls does.
//
// Ole is a container for Doc, Ppt, Pub and Xls.
// Right now, when an Ole file is detected, it is considered to be a Doc file
// if the checks for Ppt, Pub and Xls failed.
func Doc(in []byte) bool {
return true
}
// Ppt matches a Microsoft PowerPoint 97-2003 file.
func Ppt(in []byte) bool {
if len(in) < 520 {
return false
}
pptSubHeaders := [][]byte{
{0xA0, 0x46, 0x1D, 0xF0},
{0x00, 0x6E, 0x1E, 0xF0},
{0x0F, 0x00, 0xE8, 0x03},
}
for _, h := range pptSubHeaders {
if bytes.HasPrefix(in[512:], h) {
return true
}
}
if bytes.HasPrefix(in[512:], []byte{0xFD, 0xFF, 0xFF, 0xFF}) &&
in[518] == 0x00 && in[519] == 0x00 {
return true
}
return bytes.Contains(in, []byte("MS PowerPoint 97")) ||
bytes.Contains(in, []byte("P\x00o\x00w\x00e\x00r\x00P\x00o\x00i\x00n\x00t\x00 D\x00o\x00c\x00u\x00m\x00e\x00n\x00t"))
}
// Xls matches a Microsoft Excel 97-2003 file.
func Xls(in []byte) bool {
if len(in) <= 512 {
return false
}
xlsSubHeaders := [][]byte{
{0x09, 0x08, 0x10, 0x00, 0x00, 0x06, 0x05, 0x00},
{0xFD, 0xFF, 0xFF, 0xFF, 0x10},
{0xFD, 0xFF, 0xFF, 0xFF, 0x1F},
{0xFD, 0xFF, 0xFF, 0xFF, 0x22},
{0xFD, 0xFF, 0xFF, 0xFF, 0x23},
{0xFD, 0xFF, 0xFF, 0xFF, 0x28},
{0xFD, 0xFF, 0xFF, 0xFF, 0x29},
}
for _, h := range xlsSubHeaders {
if bytes.HasPrefix(in[512:], h) {
return true
}
}
return bytes.Contains(in, []byte("Microsoft Excel")) ||
bytes.Contains(in, []byte("W\x00o\x00r\x00k\x00b\x00o\x00o\x00k"))
}
// Pub matches a Microsoft Publisher file.
func Pub(in []byte) bool {
return matchOleClsid(in, []byte{
0x01, 0x12, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
})
}
// Helper to match by a specific CLSID of a compound file
//
// http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File
func matchOleClsid(in []byte, clsid []byte) bool {
if len(in) <= 512 {
return false
}
// SecID of first sector of the directory stream
firstSecID := int(binary.LittleEndian.Uint32(in[48:52]))
// Expected offset of CLSID for root storage object
clsidOffset := 512*(1+firstSecID) + 80
if len(in) <= clsidOffset+16 {
return false
}
return bytes.HasPrefix(in[clsidOffset:], clsid)
}

View file

@ -0,0 +1,48 @@
package matchers
import "bytes"
// Odt matches an OpenDocument Text file.
func Odt(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.text"))
}
// Ott matches an OpenDocument Text Template file.
func Ott(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.text-template"))
}
// Ods matches an OpenDocument Spreadsheet file.
func Ods(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet"))
}
// Ots matches an OpenDocument Spreadsheet Template file.
func Ots(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template"))
}
// Odp matches an OpenDocument Presentation file.
func Odp(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation"))
}
// Otp matches an OpenDocument Presentation Template file.
func Otp(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.presentation-template"))
}
// Odg matches an OpenDocument Drawing file.
func Odg(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics"))
}
// Otg matches an OpenDocument Drawing Template file.
func Otg(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.graphics-template"))
}
// Odf matches an OpenDocument Formula file.
func Odf(in []byte) bool {
return bytes.Contains(in, []byte("mimetypeapplication/vnd.oasis.opendocument.formula"))
}

View file

@ -0,0 +1,42 @@
package matchers
import (
"bytes"
)
/*
NOTE:
In May 2003, two Internet RFCs were published relating to the format.
The Ogg bitstream was defined in RFC 3533 (which is classified as
'informative') and its Internet content type (application/ogg) in RFC
3534 (which is, as of 2006, a proposed standard protocol). In
September 2008, RFC 3534 was obsoleted by RFC 5334, which added
content types video/ogg, audio/ogg and filename extensions .ogx, .ogv,
.oga, .spx.
See:
https://tools.ietf.org/html/rfc3533
https://developer.mozilla.org/en-US/docs/Web/HTTP/Configuring_servers_for_Ogg_media#Serve_media_with_the_correct_MIME_type
https://github.com/file/file/blob/master/magic/Magdir/vorbis
*/
// Ogg matches an Ogg file.
func Ogg(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x4F\x67\x67\x53\x00"))
}
// OggAudio matches an audio ogg file.
func OggAudio(in []byte) bool {
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x7fFLAC")) ||
bytes.HasPrefix(in[28:], []byte("\x01vorbis")) ||
bytes.HasPrefix(in[28:], []byte("OpusHead")) ||
bytes.HasPrefix(in[28:], []byte("Speex\x20\x20\x20")))
}
// OggVideo matches a video ogg file.
func OggVideo(in []byte) bool {
return len(in) >= 37 && (bytes.HasPrefix(in[28:], []byte("\x80theora")) ||
bytes.HasPrefix(in[28:], []byte("fishead\x00")) ||
bytes.HasPrefix(in[28:], []byte("\x01video\x00\x00\x00"))) // OGM video
}

View file

@ -0,0 +1,130 @@
package matchers
import (
"bytes"
"fmt"
)
type (
markupSig []byte
ciSig []byte // case insensitive signature
shebangSig []byte // matches !# followed by the signature
ftypSig []byte // matches audio/video files. www.ftyps.com
xmlSig struct {
// the local name of the root tag
localName []byte
// the namespace of the XML document
xmlns []byte
}
sig interface {
detect([]byte) bool
}
)
func newXmlSig(localName, xmlns string) xmlSig {
ret := xmlSig{xmlns: []byte(xmlns)}
if localName != "" {
ret.localName = []byte(fmt.Sprintf("<%s", localName))
}
return ret
}
// Implement sig interface.
func (hSig markupSig) detect(in []byte) bool {
if len(in) < len(hSig)+1 {
return false
}
// perform case insensitive check
for i, b := range hSig {
db := in[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
// Next byte must be space or right angle bracket.
if db := in[len(hSig)]; db != ' ' && db != '>' {
return false
}
return true
}
// Implement sig interface.
func (tSig ciSig) detect(in []byte) bool {
if len(in) < len(tSig)+1 {
return false
}
// perform case insensitive check
for i, b := range tSig {
db := in[i]
if 'A' <= b && b <= 'Z' {
db &= 0xDF
}
if b != db {
return false
}
}
return true
}
// a valid shebang starts with the "#!" characters
// followed by any number of spaces
// followed by the path to the interpreter and optionally, the args for the interpreter
func (sSig shebangSig) detect(in []byte) bool {
in = firstLine(in)
if len(in) < len(sSig)+2 {
return false
}
if in[0] != '#' || in[1] != '!' {
return false
}
in = trimLWS(trimRWS(in[2:]))
return bytes.Equal(in, sSig)
}
// Implement sig interface.
func (fSig ftypSig) detect(in []byte) bool {
return len(in) > 12 &&
bytes.Equal(in[4:8], []byte("ftyp")) &&
bytes.Equal(in[8:12], fSig)
}
// Implement sig interface.
func (xSig xmlSig) detect(in []byte) bool {
l := 512
if len(in) < l {
l = len(in)
}
in = in[:l]
if len(xSig.localName) == 0 {
return bytes.Index(in, xSig.xmlns) > 0
}
if len(xSig.xmlns) == 0 {
return bytes.Index(in, xSig.localName) > 0
}
localNameIndex := bytes.Index(in, xSig.localName)
return localNameIndex != -1 && localNameIndex < bytes.Index(in, xSig.xmlns)
}
// detect returns true if any of the provided signatures pass for in input.
func detect(in []byte, sigs []sig) bool {
for _, sig := range sigs {
if sig.detect(in) {
return true
}
}
return false
}

View file

@ -0,0 +1,389 @@
package matchers
import (
"bytes"
"github.com/gabriel-vasile/mimetype/internal/json"
)
var (
htmlSigs = []sig{
markupSig("<!DOCTYPE HTML"),
markupSig("<HTML"),
markupSig("<HEAD"),
markupSig("<SCRIPT"),
markupSig("<IFRAME"),
markupSig("<H1"),
markupSig("<DIV"),
markupSig("<FONT"),
markupSig("<TABLE"),
markupSig("<A"),
markupSig("<STYLE"),
markupSig("<TITLE"),
markupSig("<B"),
markupSig("<BODY"),
markupSig("<BR"),
markupSig("<P"),
markupSig("<!--"),
}
xmlSigs = []sig{
markupSig("<?XML"),
}
rssSigs = []sig{
newXmlSig("rss", ""),
}
atomSigs = []sig{
newXmlSig("feed", `xmlns="http://www.w3.org/2005/Atom"`),
}
kmlSigs = []sig{
newXmlSig("kml", `xmlns="http://www.opengis.net/kml/2.2"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.0"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.1"`),
newXmlSig("kml", `xmlns="http://earth.google.com/kml/2.2"`),
}
xliffSigs = []sig{
newXmlSig("xliff", `xmlns="urn:oasis:names:tc:xliff:document:1.2"`),
}
colladaSigs = []sig{
newXmlSig("COLLADA", `xmlns="http://www.collada.org/2005/11/COLLADASchema"`),
}
gmlSigs = []sig{
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.2"`),
newXmlSig("", `xmlns:gml="http://www.opengis.net/gml/3.3/exr"`),
}
gpxSigs = []sig{
newXmlSig("gpx", `xmlns="http://www.topografix.com/GPX/1/1"`),
}
tcxSigs = []sig{
newXmlSig("TrainingCenterDatabase", `xmlns="http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2"`),
}
x3dSigs = []sig{
newXmlSig("X3D", `xmlns:xsd="http://www.w3.org/2001/XMLSchema-instance"`),
}
amfSigs = []sig{
newXmlSig("amf", ""),
}
threemfSigs = []sig{
newXmlSig("model", `xmlns="http://schemas.microsoft.com/3dmanufacturing/core/2015/02"`),
}
vCardSigs = []sig{
ciSig("BEGIN:VCARD\n"),
ciSig("BEGIN:VCARD\r\n"),
}
iCalSigs = []sig{
ciSig("BEGIN:VCALENDAR\n"),
ciSig("BEGIN:VCALENDAR\r\n"),
}
phpSigs = []sig{
ciSig("<?PHP"),
ciSig("<?\n"),
ciSig("<?\r"),
ciSig("<? "),
shebangSig("/usr/local/bin/php"),
shebangSig("/usr/bin/php"),
shebangSig("/usr/bin/env php"),
}
jsSigs = []sig{
shebangSig("/bin/node"),
shebangSig("/usr/bin/node"),
shebangSig("/bin/nodejs"),
shebangSig("/usr/bin/nodejs"),
shebangSig("/usr/bin/env node"),
shebangSig("/usr/bin/env nodejs"),
}
luaSigs = []sig{
shebangSig("/usr/bin/lua"),
shebangSig("/usr/local/bin/lua"),
shebangSig("/usr/bin/env lua"),
}
perlSigs = []sig{
shebangSig("/usr/bin/perl"),
shebangSig("/usr/bin/env perl"),
}
pythonSigs = []sig{
shebangSig("/usr/bin/python"),
shebangSig("/usr/local/bin/python"),
shebangSig("/usr/bin/env python"),
}
tclSigs = []sig{
shebangSig("/usr/bin/tcl"),
shebangSig("/usr/local/bin/tcl"),
shebangSig("/usr/bin/env tcl"),
shebangSig("/usr/bin/tclsh"),
shebangSig("/usr/local/bin/tclsh"),
shebangSig("/usr/bin/env tclsh"),
shebangSig("/usr/bin/wish"),
shebangSig("/usr/local/bin/wish"),
shebangSig("/usr/bin/env wish"),
}
)
// Utf32be matches a text file encoded with UTF-32 and with the characters
// represented in big endian.
func Utf32be(in []byte) bool {
return bytes.HasPrefix(in, []byte{0x00, 0x00, 0xFE, 0xFF})
}
// Utf32le matches a text file encoded with UTF-32 and with the characters
// represented in little endian.
func Utf32le(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xFE, 0x00, 0x00})
}
// Utf16be matches a text file encoded with UTF-16 and with the characters
// represented in big endian.
func Utf16be(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFE, 0xFF})
}
// Utf16le matches a text file encoded with UTF-16 and with the characters
// represented in little endian.
func Utf16le(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xFE})
}
// Utf8 matches a UTF-8 text file.
func Utf8(in []byte) bool {
in = trimLWS(in)
for _, b := range in {
if b <= 0x08 ||
b == 0x0B ||
0x0E <= b && b <= 0x1A ||
0x1C <= b && b <= 0x1F {
return false
}
}
return true
}
// Html matches a Hypertext Markup Language file.
func Html(in []byte) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
return detect(in, htmlSigs)
}
// Xml matches an Extensible Markup Language file.
func Xml(in []byte) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
return detect(in, xmlSigs)
}
// Php matches a PHP: Hypertext Preprocessor file.
func Php(in []byte) bool {
return detect(in, phpSigs)
}
// Json matches a JavaScript Object Notation file.
func Json(in []byte) bool {
parsed, err := json.Scan(in)
if len(in) < ReadLimit {
return err == nil
}
return parsed == len(in)
}
// GeoJson matches a RFC 7946 GeoJSON file.
//
// BUG(gabriel-vasile): The "type" key should be searched for in the root object.
func GeoJson(in []byte) bool {
in = trimLWS(in)
if len(in) == 0 {
return false
}
// geojson is always an object
if in[0] != '{' {
return false
}
s := []byte(`"type"`)
si := bytes.Index(in, s)
sl := len(s)
if si == -1 {
return false
}
// if the "type" string is the suffix of the input
// there is no need to search for the value of the key
if si+sl == len(in) {
return false
}
// skip the "type" part
in = in[si+sl:]
// skip any whitespace before the colon
in = trimLWS(in)
// skip any whitesapce after the colon
// not checking if char is colon because json matcher already did check
in = trimLWS(in[1:])
geoJsonTypes := [][]byte{
[]byte(`"Feature"`),
[]byte(`"FeatureCollection"`),
[]byte(`"Point"`),
[]byte(`"LineString"`),
[]byte(`"Polygon"`),
[]byte(`"MultiPoint"`),
[]byte(`"MultiLineString"`),
[]byte(`"MultiPolygon"`),
[]byte(`"GeometryCollection"`),
}
for _, t := range geoJsonTypes {
if bytes.HasPrefix(in, t) {
return true
}
}
return false
}
// NdJson matches a Newline delimited JSON file.
func NdJson(in []byte) bool {
// Separator with carriage return and new line `\r\n`
srn := []byte{0x0D, 0x0A}
// Separator with only new line `\n`
sn := []byte{0x0A}
// total bytes scanned
parsed := 0
// Split by `srn`
for rni, insrn := range bytes.Split(in, srn) {
// separator byte count should be added only after the first split
if rni != 0 {
// Add two as `\r\n` is used for split
parsed += 2
}
// Return false if there is a carriage return `\r`
if bytes.Contains(insrn, []byte{0x0D}) {
return false
}
// Split again by `sn`
for ni, insn := range bytes.Split(insrn, sn) {
// separator byte count should be added only after the first split
if ni != 0 {
// Add one as `\n` is used for split
parsed++
}
// Empty line is valid
if len(insn) == 0 {
continue
}
p, err := json.Scan(insn)
parsed += p
if parsed < ReadLimit && err != nil {
return false
}
}
}
return parsed == len(in)
}
// Js matches a Javascript file.
func Js(in []byte) bool {
return detect(in, jsSigs)
}
// Lua matches a Lua programming language file.
func Lua(in []byte) bool {
return detect(in, luaSigs)
}
// Perl matches a Perl programming language file.
func Perl(in []byte) bool {
return detect(in, perlSigs)
}
// Python matches a Python programming language file.
func Python(in []byte) bool {
return detect(in, pythonSigs)
}
// Tcl matches a Tcl programming language file.
func Tcl(in []byte) bool {
return detect(in, tclSigs)
}
// Rtf matches a Rich Text Format file.
func Rtf(in []byte) bool {
return bytes.HasPrefix(in, []byte("{\\rtf1"))
}
// Svg matches a SVG file.
func Svg(in []byte) bool {
return bytes.Contains(in, []byte("<svg"))
}
// Rss matches a Rich Site Summary file.
func Rss(in []byte) bool {
return detect(in, rssSigs)
}
// Atom matches an Atom Syndication Format file.
func Atom(in []byte) bool {
return detect(in, atomSigs)
}
// Kml matches a Keyhole Markup Language file.
func Kml(in []byte) bool {
return detect(in, kmlSigs)
}
// Xliff matches a XML Localization Interchange File Format file.
func Xliff(in []byte) bool {
return detect(in, xliffSigs)
}
// Collada matches a COLLAborative Design Activity file.
func Collada(in []byte) bool {
return detect(in, colladaSigs)
}
// Gml matches a Geography Markup Language file.
func Gml(in []byte) bool {
return detect(in, gmlSigs)
}
// Gpx matches a GPS Exchange Format file.
func Gpx(in []byte) bool {
return detect(in, gpxSigs)
}
// Tcx matches a Training Center XML file.
func Tcx(in []byte) bool {
return detect(in, tcxSigs)
}
// Amf matches an Additive Manufacturing XML file.
func Amf(in []byte) bool {
return detect(in, amfSigs)
}
// Threemf matches a 3D Manufacturing Format file.
func Threemf(in []byte) bool {
return detect(in, threemfSigs)
}
// X3d matches an Extensible 3D Graphics file.
func X3d(in []byte) bool {
return detect(in, x3dSigs)
}
// VCard matches a Virtual Contact File.
func VCard(in []byte) bool {
return detect(in, vCardSigs)
}
// ICalendar matches a iCalendar file.
func ICalendar(in []byte) bool {
return detect(in, iCalSigs)
}

View file

@ -0,0 +1,46 @@
package matchers
import (
"bytes"
"encoding/csv"
"io"
)
// Csv matches a comma-separated values file.
func Csv(in []byte) bool {
return sv(in, ',')
}
// Tsv matches a tab-separated values file.
func Tsv(in []byte) bool {
return sv(in, '\t')
}
func sv(in []byte, comma rune) bool {
r := csv.NewReader(butLastLineReader(in, ReadLimit))
r.Comma = comma
r.TrimLeadingSpace = true
r.LazyQuotes = true
r.Comment = '#'
lines, err := r.ReadAll()
return err == nil && r.FieldsPerRecord > 1 && len(lines) > 1
}
// butLastLineReader returns a reader to the provided byte slice.
// the reader is guaranteed to reach EOF before it reads `cutAt` bytes.
// bytes after the last newline are dropped from the input.
func butLastLineReader(in []byte, cutAt int) io.Reader {
if len(in) >= cutAt {
for i := cutAt - 1; i > 0; i-- {
if in[i] == '\n' {
return bytes.NewReader(in[:i])
}
}
// no newline was found between the 0 index and cutAt
return bytes.NewReader(in[:cutAt])
}
return bytes.NewReader(in)
}

View file

@ -0,0 +1,71 @@
package matchers
import (
"bytes"
)
// WebM matches a WebM file.
func WebM(in []byte) bool {
return isMatroskaFileTypeMatched(in, "webm")
}
// Mkv matches a mkv file.
func Mkv(in []byte) bool {
return isMatroskaFileTypeMatched(in, "matroska")
}
// isMatroskaFileTypeMatched is used for webm and mkv file matching.
// It checks for .Eߣ sequence. If the sequence is found,
// then it means it is Matroska media container, including WebM.
// Then it verifies which of the file type it is representing by matching the
// file specific string.
func isMatroskaFileTypeMatched(in []byte, flType string) bool {
if bytes.HasPrefix(in, []byte("\x1A\x45\xDF\xA3")) {
return isFileTypeNamePresent(in, flType)
}
return false
}
// isFileTypeNamePresent accepts the matroska input data stream and searches
// for the given file type in the stream. Return whether a match is found.
// The logic of search is: find first instance of \x42\x82 and then
// search for given string after one byte of above instance.
func isFileTypeNamePresent(in []byte, flType string) bool {
ind, maxInd, lenIn := 0, 4096, len(in)
if lenIn < maxInd { // restricting length to 4096
maxInd = lenIn
}
ind = bytes.Index(in[:maxInd], []byte("\x42\x82"))
if ind > 0 && lenIn > ind+3 {
// filetype name will be present exactly
// one byte after the match of the two bytes "\x42\x82"
return bytes.HasPrefix(in[ind+3:], []byte(flType))
}
return false
}
// Flv matches a Flash video file.
func Flv(in []byte) bool {
return bytes.HasPrefix(in, []byte("\x46\x4C\x56\x01"))
}
// Mpeg matches a Moving Picture Experts Group file.
func Mpeg(in []byte) bool {
return len(in) > 3 && bytes.HasPrefix(in, []byte{0x00, 0x00, 0x01}) &&
in[3] >= 0xB0 && in[3] <= 0xBF
}
// Avi matches an Audio Video Interleaved file.
func Avi(in []byte) bool {
return len(in) > 16 &&
bytes.Equal(in[:4], []byte("RIFF")) &&
bytes.Equal(in[8:16], []byte("AVI LIST"))
}
// Asf matches an Advanced Systems Format file.
func Asf(in []byte) bool {
return bytes.HasPrefix(in, []byte{
0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11,
0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C,
})
}

View file

@ -0,0 +1,71 @@
package matchers
var (
mp4Sigs = []sig{
ftypSig("avc1"), ftypSig("dash"), ftypSig("iso2"), ftypSig("iso3"),
ftypSig("iso4"), ftypSig("iso5"), ftypSig("iso6"), ftypSig("isom"),
ftypSig("mmp4"), ftypSig("mp41"), ftypSig("mp42"), ftypSig("mp4v"),
ftypSig("mp71"), ftypSig("MSNV"), ftypSig("NDAS"), ftypSig("NDSC"),
ftypSig("NSDC"), ftypSig("NSDH"), ftypSig("NDSM"), ftypSig("NDSP"),
ftypSig("NDSS"), ftypSig("NDXC"), ftypSig("NDXH"), ftypSig("NDXM"),
ftypSig("NDXP"), ftypSig("NDXS"), ftypSig("F4V "), ftypSig("F4P "),
}
threeGPSigs = []sig{
ftypSig("3gp1"), ftypSig("3gp2"), ftypSig("3gp3"), ftypSig("3gp4"),
ftypSig("3gp5"), ftypSig("3gp6"), ftypSig("3gp7"), ftypSig("3gs7"),
ftypSig("3ge6"), ftypSig("3ge7"), ftypSig("3gg6"),
}
threeG2Sigs = []sig{
ftypSig("3g24"), ftypSig("3g25"), ftypSig("3g26"), ftypSig("3g2a"),
ftypSig("3g2b"), ftypSig("3g2c"), ftypSig("KDDI"),
}
amp4Sigs = []sig{
// audio for Adobe Flash Player 9+
ftypSig("F4A "), ftypSig("F4B "),
// Apple iTunes AAC-LC (.M4A) Audio
ftypSig("M4B "), ftypSig("M4P "),
// MPEG-4 (.MP4) for SonyPSP
ftypSig("MSNV"),
// Nero Digital AAC Audio
ftypSig("NDAS"),
}
qtSigs = []sig{ftypSig("qt "), ftypSig("moov")}
mqvSigs = []sig{ftypSig("mqt ")}
m4aSigs = []sig{ftypSig("M4A ")}
// TODO: add support for remaining video formats at ftyps.com.
)
// Mp4 matches an MP4 file.
func Mp4(in []byte) bool {
return detect(in, mp4Sigs)
}
// ThreeGP matches a 3GPP file.
func ThreeGP(in []byte) bool {
return detect(in, threeGPSigs)
}
// ThreeG2 matches a 3GPP2 file.
func ThreeG2(in []byte) bool {
return detect(in, threeG2Sigs)
}
// AMp4 matches an audio MP4 file.
func AMp4(in []byte) bool {
return detect(in, amp4Sigs)
}
// QuickTime matches a QuickTime File Format file.
func QuickTime(in []byte) bool {
return detect(in, qtSigs)
}
// Mqv matches a Sony / Mobile QuickTime file.
func Mqv(in []byte) bool {
return detect(in, mqvSigs)
}
// M4a matches an audio M4A file.
func M4a(in []byte) bool {
return detect(in, m4aSigs)
}

94
vendor/github.com/gabriel-vasile/mimetype/mime.go generated vendored Normal file
View file

@ -0,0 +1,94 @@
package mimetype
import "mime"
// MIME represents a file format in the tree structure of formats.
type MIME struct {
mime string
aliases []string
extension string
matchFunc func([]byte) bool
children []*MIME
parent *MIME
}
// String returns the string representation of the MIME type, e.g., "application/zip".
func (n *MIME) String() string {
return n.mime
}
// Extension returns the file extension associated with the MIME type.
// It includes the leading dot, as in ".html". When the file format does not
// have an extension, the empty string is returned.
func (n *MIME) Extension() string {
return n.extension
}
// Parent returns the parent MIME type from the tree structure.
// Each MIME type has a non-nil parent, except for the root MIME type.
func (n *MIME) Parent() *MIME {
return n.parent
}
// Is checks whether this MIME type, or any of its aliases, is equal to the
// expected MIME type. MIME type equality test is done on the "type/subtype"
// sections, ignores any optional MIME parameters, ignores any leading and
// trailing whitespace, and is case insensitive.
func (n *MIME) Is(expectedMIME string) bool {
// Parsing is needed because some detected MIME types contain parameters
// that need to be stripped for the comparison.
expectedMIME, _, _ = mime.ParseMediaType(expectedMIME)
found, _, _ := mime.ParseMediaType(n.mime)
if expectedMIME == found {
return true
}
for _, alias := range n.aliases {
if alias == expectedMIME {
return true
}
}
return false
}
func newMIME(mime, extension string, matchFunc func([]byte) bool, children ...*MIME) *MIME {
n := &MIME{
mime: mime,
extension: extension,
matchFunc: matchFunc,
children: children,
}
for _, c := range children {
c.parent = n
}
return n
}
func (n *MIME) alias(aliases ...string) *MIME {
n.aliases = aliases
return n
}
// match does a depth-first search on the matchers tree.
// it returns the deepest successful matcher for which all the children fail.
func (n *MIME) match(in []byte, deepestMatch *MIME) *MIME {
for _, c := range n.children {
if c.matchFunc(in) {
return c.match(in, c)
}
}
return deepestMatch
}
func (n *MIME) flatten() []*MIME {
out := []*MIME{n}
for _, c := range n.children {
out = append(out, c.flatten()...)
}
return out
}

BIN
vendor/github.com/gabriel-vasile/mimetype/mimetype.gif generated vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 744 KiB

68
vendor/github.com/gabriel-vasile/mimetype/mimetype.go generated vendored Normal file
View file

@ -0,0 +1,68 @@
// Package mimetype uses magic number signatures to detect the MIME type of a file.
//
// mimetype stores the list of MIME types in a tree structure with
// "application/octet-stream" at the root of the hierarchy. The hierarchy
// approach minimizes the number of checks that need to be done on the input
// and allows for more precise results once the base type of file has been
// identified.
package mimetype
import (
"io"
"os"
"github.com/gabriel-vasile/mimetype/internal/matchers"
)
// Detect returns the MIME type found from the provided byte slice.
//
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed.
func Detect(in []byte) (mime *MIME) {
if len(in) == 0 {
return newMIME("inode/x-empty", "", matchers.True)
}
return root.match(in, root)
}
// DetectReader returns the MIME type of the provided reader.
//
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed with or without an error.
// Any error returned is related to the reading from the input reader.
//
// DetectReader assumes the reader offset is at the start. If the input
// is a ReadSeeker you read from before, it should be rewinded before detection:
// reader.Seek(0, io.SeekStart)
//
// To prevent loading entire files into memory, DetectReader reads at most
// matchers.ReadLimit bytes from the reader.
func DetectReader(r io.Reader) (mime *MIME, err error) {
in := make([]byte, matchers.ReadLimit)
n, err := r.Read(in)
if err != nil && err != io.EOF {
return root, err
}
in = in[:n]
return Detect(in), nil
}
// DetectFile returns the MIME type of the provided file.
//
// The result is always a valid MIME type, with application/octet-stream
// returned when identification failed with or without an error.
// Any error returned is related to the opening and reading from the input file.
//
// To prevent loading entire files into memory, DetectFile reads at most
// matchers.ReadLimit bytes from the reader.
func DetectFile(file string) (mime *MIME, err error) {
f, err := os.Open(file)
if err != nil {
return root, err
}
defer f.Close()
return DetectReader(f)
}

View file

@ -0,0 +1,146 @@
## 141 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.
Extension | MIME type | Aliases
--------- | --------- | -------
**n/a** | application/octet-stream | -
**.7z** | application/x-7z-compressed | -
**.zip** | application/zip | application/x-zip, application/x-zip-compressed
**.xlsx** | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | -
**.docx** | application/vnd.openxmlformats-officedocument.wordprocessingml.document | -
**.pptx** | application/vnd.openxmlformats-officedocument.presentationml.presentation | -
**.epub** | application/epub+zip | -
**.jar** | application/jar | -
**.odt** | application/vnd.oasis.opendocument.text | application/x-vnd.oasis.opendocument.text
**.ott** | application/vnd.oasis.opendocument.text-template | application/x-vnd.oasis.opendocument.text-template
**.ods** | application/vnd.oasis.opendocument.spreadsheet | application/x-vnd.oasis.opendocument.spreadsheet
**.ots** | application/vnd.oasis.opendocument.spreadsheet-template | application/x-vnd.oasis.opendocument.spreadsheet-template
**.odp** | application/vnd.oasis.opendocument.presentation | application/x-vnd.oasis.opendocument.presentation
**.otp** | application/vnd.oasis.opendocument.presentation-template | application/x-vnd.oasis.opendocument.presentation-template
**.odg** | application/vnd.oasis.opendocument.graphics | application/x-vnd.oasis.opendocument.graphics
**.otg** | application/vnd.oasis.opendocument.graphics-template | application/x-vnd.oasis.opendocument.graphics-template
**.odf** | application/vnd.oasis.opendocument.formula | application/x-vnd.oasis.opendocument.formula
**.pdf** | application/pdf | application/x-pdf
**n/a** | application/x-ole-storage | -
**.xls** | application/vnd.ms-excel | application/msexcel
**.pub** | application/vnd.ms-publisher | -
**.ppt** | application/vnd.ms-powerpoint | application/mspowerpoint
**.doc** | application/msword | application/vnd.ms-word
**.ps** | application/postscript | -
**.psd** | image/vnd.adobe.photoshop | image/x-psd, application/photoshop
**.ogg** | application/ogg | application/x-ogg
**.oga** | audio/ogg | -
**.ogv** | video/ogg | -
**.png** | image/png | -
**.jpg** | image/jpeg | -
**.jp2** | image/jp2 | -
**.jpf** | image/jpx | -
**.jpm** | image/jpm | video/jpm
**.gif** | image/gif | -
**.webp** | image/webp | -
**.exe** | application/vnd.microsoft.portable-executable | -
**n/a** | application/x-elf | -
**n/a** | application/x-object | -
**n/a** | application/x-executable | -
**.so** | application/x-sharedlib | -
**n/a** | application/x-coredump | -
**.a** | application/x-archive | application/x-unix-archive
**.deb** | application/vnd.debian.binary-package | -
**.tar** | application/x-tar | -
**.xar** | application/x-xar | -
**.bz2** | application/x-bzip2 | -
**.fits** | application/fits | -
**.tiff** | image/tiff | -
**.bmp** | image/bmp | image/x-bmp, image/x-ms-bmp
**.ico** | image/x-icon | -
**.mp3** | audio/mpeg | audio/x-mpeg, audio/mp3
**.flac** | audio/flac | -
**.midi** | audio/midi | audio/mid, audio/sp-midi, audio/x-mid, audio/x-midi
**.ape** | audio/ape | -
**.mpc** | audio/musepack | -
**.amr** | audio/amr | audio/amr-nb
**.wav** | audio/wav | audio/x-wav, audio/vnd.wave, audio/wave
**.aiff** | audio/aiff | -
**.au** | audio/basic | -
**.mpeg** | video/mpeg | -
**.mov** | video/quicktime | -
**.mqv** | video/quicktime | -
**.mp4** | video/mp4 | -
**.webm** | video/webm | audio/webm
**.3gp** | video/3gpp | video/3gp, audio/3gpp
**.3g2** | video/3gpp2 | video/3g2, audio/3gpp2
**.avi** | video/x-msvideo | video/avi, video/msvideo
**.flv** | video/x-flv | -
**.mkv** | video/x-matroska | -
**.asf** | video/x-ms-asf | video/asf, video/x-ms-wmv
**.aac** | audio/aac | -
**.voc** | audio/x-unknown | -
**.mp4** | audio/mp4 | audio/x-m4a, audio/x-mp4a
**.m4a** | audio/x-m4a | -
**.txt** | text/plain; charset=utf-32le | -
**.txt** | text/plain; charset=utf-32be | -
**.txt** | text/plain; charset=utf-16le | -
**.txt** | text/plain; charset=utf-16be | -
**.gz** | application/gzip | application/x-gzip, application/x-gunzip, application/gzipped, application/gzip-compressed, application/x-gzip-compressed, gzip/document
**.class** | application/x-java-applet; charset=binary | -
**.swf** | application/x-shockwave-flash | -
**.crx** | application/x-chrome-extension | -
**.woff** | font/woff | -
**.woff2** | font/woff2 | -
**.otf** | font/otf | -
**.eot** | application/vnd.ms-fontobject | -
**.wasm** | application/wasm | -
**.shx** | application/octet-stream | -
**.shp** | application/octet-stream | -
**.dbf** | application/x-dbf | -
**.dcm** | application/dicom | -
**.rar** | application/x-rar-compressed | application/x-rar
**.djvu** | image/vnd.djvu | -
**.mobi** | application/x-mobipocket-ebook | -
**.lit** | application/x-ms-reader | -
**.bpg** | image/bpg | -
**.sqlite** | application/x-sqlite3 | -
**.dwg** | image/vnd.dwg | image/x-dwg, application/acad, application/x-acad, application/autocad_dwg, application/dwg, application/x-dwg, application/x-autocad, drawing/dwg
**.nes** | application/vnd.nintendo.snes.rom | -
**.macho** | application/x-mach-binary | -
**.qcp** | audio/qcelp | -
**.icns** | image/x-icns | -
**.heic** | image/heic | -
**.heic** | image/heic-sequence | -
**.heif** | image/heif | -
**.heif** | image/heif-sequence | -
**.mrc** | application/marc | -
**.mdb** | application/x-msaccess | -
**.accdb** | application/x-msaccess | -
**.zst** | application/zstd | -
**.cab** | application/vnd.ms-cab-compressed | -
**.txt** | text/plain; charset=utf-8 | -
**.html** | text/html; charset=utf-8 | -
**.svg** | image/svg+xml | -
**.xml** | text/xml; charset=utf-8 | -
**.rss** | application/rss+xml | text/rss
**.atom** | application/atom+xml | -
**.x3d** | model/x3d+xml | -
**.kml** | application/vnd.google-earth.kml+xml | -
**.xlf** | application/x-xliff+xml | -
**.dae** | model/vnd.collada+xml | -
**.gml** | application/gml+xml | -
**.gpx** | application/gpx+xml | -
**.tcx** | application/vnd.garmin.tcx+xml | -
**.amf** | application/x-amf | -
**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | -
**.php** | text/x-php; charset=utf-8 | -
**.js** | application/javascript | application/x-javascript, text/javascript
**.lua** | text/x-lua | -
**.pl** | text/x-perl | -
**.py** | application/x-python | -
**.json** | application/json | -
**.geojson** | application/geo+json | -
**.ndjson** | application/x-ndjson | -
**.rtf** | text/rtf | -
**.tcl** | text/x-tcl | application/x-tcl
**.csv** | text/csv | -
**.tsv** | text/tab-separated-values | -
**.vcf** | text/vcard | -
**.ics** | text/calendar | -
**.warc** | application/warc | -

195
vendor/github.com/gabriel-vasile/mimetype/tree.go generated vendored Normal file
View file

@ -0,0 +1,195 @@
package mimetype
import "github.com/gabriel-vasile/mimetype/internal/matchers"
// root is a matcher which passes for any slice of bytes.
// When a matcher passes the check, the children matchers
// are tried in order to find a more accurate MIME type.
var root = newMIME("application/octet-stream", "", matchers.True,
sevenZ, zip, pdf, ole, ps, psd, ogg, png, jpg, jp2, jpx, jpm, gif, webp,
exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape,
musePack, amr, wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP,
threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, utf32le, utf32be, utf16le,
utf16be, gzip, class, swf, crx, woff, woff2, otf, eot, wasm, shx, dbf, dcm,
rar, djvu, mobi, lit, bpg, sqlite3, dwg, nes, macho, qcp, icns, heic,
heicSeq, heif, heifSeq, mrc, mdb, accdb, zstd, cab, utf8,
)
// The list of nodes appended to the root node
var (
gzip = newMIME("application/gzip", ".gz", matchers.Gzip).
alias("application/x-gzip", "application/x-gunzip", "application/gzipped", "application/gzip-compressed", "application/x-gzip-compressed", "gzip/document")
sevenZ = newMIME("application/x-7z-compressed", ".7z", matchers.SevenZ)
zip = newMIME("application/zip", ".zip", matchers.Zip, xlsx, docx, pptx, epub, jar, odt, ods, odp, odg, odf).
alias("application/x-zip", "application/x-zip-compressed")
tar = newMIME("application/x-tar", ".tar", matchers.Tar)
xar = newMIME("application/x-xar", ".xar", matchers.Xar)
bz2 = newMIME("application/x-bzip2", ".bz2", matchers.Bz2)
pdf = newMIME("application/pdf", ".pdf", matchers.Pdf).
alias("application/x-pdf")
xlsx = newMIME("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx", matchers.Xlsx)
docx = newMIME("application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx", matchers.Docx)
pptx = newMIME("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", matchers.Pptx)
epub = newMIME("application/epub+zip", ".epub", matchers.Epub)
jar = newMIME("application/jar", ".jar", matchers.Jar)
ole = newMIME("application/x-ole-storage", "", matchers.Ole, xls, pub, ppt, doc)
doc = newMIME("application/msword", ".doc", matchers.Doc).
alias("application/vnd.ms-word")
ppt = newMIME("application/vnd.ms-powerpoint", ".ppt", matchers.Ppt).
alias("application/mspowerpoint")
pub = newMIME("application/vnd.ms-publisher", ".pub", matchers.Pub)
xls = newMIME("application/vnd.ms-excel", ".xls", matchers.Xls).
alias("application/msexcel")
ps = newMIME("application/postscript", ".ps", matchers.Ps)
fits = newMIME("application/fits", ".fits", matchers.Fits)
ogg = newMIME("application/ogg", ".ogg", matchers.Ogg, oggAudio, oggVideo).
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", matchers.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", matchers.OggVideo)
utf32le = newMIME("text/plain; charset=utf-32le", ".txt", matchers.Utf32le)
utf32be = newMIME("text/plain; charset=utf-32be", ".txt", matchers.Utf32be)
utf16le = newMIME("text/plain; charset=utf-16le", ".txt", matchers.Utf16le)
utf16be = newMIME("text/plain; charset=utf-16be", ".txt", matchers.Utf16be)
utf8 = newMIME("text/plain; charset=utf-8", ".txt", matchers.Utf8, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc)
xml = newMIME("text/xml; charset=utf-8", ".xml", matchers.Xml, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf)
json = newMIME("application/json", ".json", matchers.Json, geoJson)
csv = newMIME("text/csv", ".csv", matchers.Csv)
tsv = newMIME("text/tab-separated-values", ".tsv", matchers.Tsv)
geoJson = newMIME("application/geo+json", ".geojson", matchers.GeoJson)
ndJson = newMIME("application/x-ndjson", ".ndjson", matchers.NdJson)
html = newMIME("text/html; charset=utf-8", ".html", matchers.Html)
php = newMIME("text/x-php; charset=utf-8", ".php", matchers.Php)
rtf = newMIME("text/rtf", ".rtf", matchers.Rtf)
js = newMIME("application/javascript", ".js", matchers.Js).
alias("application/x-javascript", "text/javascript")
lua = newMIME("text/x-lua", ".lua", matchers.Lua)
perl = newMIME("text/x-perl", ".pl", matchers.Perl)
python = newMIME("application/x-python", ".py", matchers.Python)
tcl = newMIME("text/x-tcl", ".tcl", matchers.Tcl).
alias("application/x-tcl")
vCard = newMIME("text/vcard", ".vcf", matchers.VCard)
iCalendar = newMIME("text/calendar", ".ics", matchers.ICalendar)
svg = newMIME("image/svg+xml", ".svg", matchers.Svg)
rss = newMIME("application/rss+xml", ".rss", matchers.Rss).
alias("text/rss")
atom = newMIME("application/atom+xml", ".atom", matchers.Atom)
x3d = newMIME("model/x3d+xml", ".x3d", matchers.X3d)
kml = newMIME("application/vnd.google-earth.kml+xml", ".kml", matchers.Kml)
xliff = newMIME("application/x-xliff+xml", ".xlf", matchers.Xliff)
collada = newMIME("model/vnd.collada+xml", ".dae", matchers.Collada)
gml = newMIME("application/gml+xml", ".gml", matchers.Gml)
gpx = newMIME("application/gpx+xml", ".gpx", matchers.Gpx)
tcx = newMIME("application/vnd.garmin.tcx+xml", ".tcx", matchers.Tcx)
amf = newMIME("application/x-amf", ".amf", matchers.Amf)
threemf = newMIME("application/vnd.ms-package.3dmanufacturing-3dmodel+xml", ".3mf", matchers.Threemf)
png = newMIME("image/png", ".png", matchers.Png)
jpg = newMIME("image/jpeg", ".jpg", matchers.Jpg)
jp2 = newMIME("image/jp2", ".jp2", matchers.Jp2)
jpx = newMIME("image/jpx", ".jpf", matchers.Jpx)
jpm = newMIME("image/jpm", ".jpm", matchers.Jpm).
alias("video/jpm")
bpg = newMIME("image/bpg", ".bpg", matchers.Bpg)
gif = newMIME("image/gif", ".gif", matchers.Gif)
webp = newMIME("image/webp", ".webp", matchers.Webp)
tiff = newMIME("image/tiff", ".tiff", matchers.Tiff)
bmp = newMIME("image/bmp", ".bmp", matchers.Bmp).
alias("image/x-bmp", "image/x-ms-bmp")
ico = newMIME("image/x-icon", ".ico", matchers.Ico)
icns = newMIME("image/x-icns", ".icns", matchers.Icns)
psd = newMIME("image/vnd.adobe.photoshop", ".psd", matchers.Psd).
alias("image/x-psd", "application/photoshop")
heic = newMIME("image/heic", ".heic", matchers.Heic)
heicSeq = newMIME("image/heic-sequence", ".heic", matchers.HeicSequence)
heif = newMIME("image/heif", ".heif", matchers.Heif)
heifSeq = newMIME("image/heif-sequence", ".heif", matchers.HeifSequence)
mp3 = newMIME("audio/mpeg", ".mp3", matchers.Mp3).
alias("audio/x-mpeg", "audio/mp3")
flac = newMIME("audio/flac", ".flac", matchers.Flac)
midi = newMIME("audio/midi", ".midi", matchers.Midi).
alias("audio/mid", "audio/sp-midi", "audio/x-mid", "audio/x-midi")
ape = newMIME("audio/ape", ".ape", matchers.Ape)
musePack = newMIME("audio/musepack", ".mpc", matchers.MusePack)
wav = newMIME("audio/wav", ".wav", matchers.Wav).
alias("audio/x-wav", "audio/vnd.wave", "audio/wave")
aiff = newMIME("audio/aiff", ".aiff", matchers.Aiff)
au = newMIME("audio/basic", ".au", matchers.Au)
amr = newMIME("audio/amr", ".amr", matchers.Amr).
alias("audio/amr-nb")
aac = newMIME("audio/aac", ".aac", matchers.Aac)
voc = newMIME("audio/x-unknown", ".voc", matchers.Voc)
aMp4 = newMIME("audio/mp4", ".mp4", matchers.AMp4).
alias("audio/x-m4a", "audio/x-mp4a")
m4a = newMIME("audio/x-m4a", ".m4a", matchers.M4a)
mp4 = newMIME("video/mp4", ".mp4", matchers.Mp4)
webM = newMIME("video/webm", ".webm", matchers.WebM).
alias("audio/webm")
mpeg = newMIME("video/mpeg", ".mpeg", matchers.Mpeg)
quickTime = newMIME("video/quicktime", ".mov", matchers.QuickTime)
mqv = newMIME("video/quicktime", ".mqv", matchers.Mqv)
threeGP = newMIME("video/3gpp", ".3gp", matchers.ThreeGP).
alias("video/3gp", "audio/3gpp")
threeG2 = newMIME("video/3gpp2", ".3g2", matchers.ThreeG2).
alias("video/3g2", "audio/3gpp2")
avi = newMIME("video/x-msvideo", ".avi", matchers.Avi).
alias("video/avi", "video/msvideo")
flv = newMIME("video/x-flv", ".flv", matchers.Flv)
mkv = newMIME("video/x-matroska", ".mkv", matchers.Mkv)
asf = newMIME("video/x-ms-asf", ".asf", matchers.Asf).
alias("video/asf", "video/x-ms-wmv")
class = newMIME("application/x-java-applet; charset=binary", ".class", matchers.Class)
swf = newMIME("application/x-shockwave-flash", ".swf", matchers.Swf)
crx = newMIME("application/x-chrome-extension", ".crx", matchers.Crx)
woff = newMIME("font/woff", ".woff", matchers.Woff)
woff2 = newMIME("font/woff2", ".woff2", matchers.Woff2)
otf = newMIME("font/otf", ".otf", matchers.Otf)
eot = newMIME("application/vnd.ms-fontobject", ".eot", matchers.Eot)
wasm = newMIME("application/wasm", ".wasm", matchers.Wasm)
shp = newMIME("application/octet-stream", ".shp", matchers.Shp)
shx = newMIME("application/octet-stream", ".shx", matchers.Shx, shp)
dbf = newMIME("application/x-dbf", ".dbf", matchers.Dbf)
exe = newMIME("application/vnd.microsoft.portable-executable", ".exe", matchers.Exe)
elf = newMIME("application/x-elf", "", matchers.Elf, elfObj, elfExe, elfLib, elfDump)
elfObj = newMIME("application/x-object", "", matchers.ElfObj)
elfExe = newMIME("application/x-executable", "", matchers.ElfExe)
elfLib = newMIME("application/x-sharedlib", ".so", matchers.ElfLib)
elfDump = newMIME("application/x-coredump", "", matchers.ElfDump)
ar = newMIME("application/x-archive", ".a", matchers.Ar, deb).
alias("application/x-unix-archive")
deb = newMIME("application/vnd.debian.binary-package", ".deb", matchers.Deb)
dcm = newMIME("application/dicom", ".dcm", matchers.Dcm)
odt = newMIME("application/vnd.oasis.opendocument.text", ".odt", matchers.Odt, ott).
alias("application/x-vnd.oasis.opendocument.text")
ott = newMIME("application/vnd.oasis.opendocument.text-template", ".ott", matchers.Ott).
alias("application/x-vnd.oasis.opendocument.text-template")
ods = newMIME("application/vnd.oasis.opendocument.spreadsheet", ".ods", matchers.Ods, ots).
alias("application/x-vnd.oasis.opendocument.spreadsheet")
ots = newMIME("application/vnd.oasis.opendocument.spreadsheet-template", ".ots", matchers.Ots).
alias("application/x-vnd.oasis.opendocument.spreadsheet-template")
odp = newMIME("application/vnd.oasis.opendocument.presentation", ".odp", matchers.Odp, otp).
alias("application/x-vnd.oasis.opendocument.presentation")
otp = newMIME("application/vnd.oasis.opendocument.presentation-template", ".otp", matchers.Otp).
alias("application/x-vnd.oasis.opendocument.presentation-template")
odg = newMIME("application/vnd.oasis.opendocument.graphics", ".odg", matchers.Odg, otg).
alias("application/x-vnd.oasis.opendocument.graphics")
otg = newMIME("application/vnd.oasis.opendocument.graphics-template", ".otg", matchers.Otg).
alias("application/x-vnd.oasis.opendocument.graphics-template")
odf = newMIME("application/vnd.oasis.opendocument.formula", ".odf", matchers.Odf).
alias("application/x-vnd.oasis.opendocument.formula")
rar = newMIME("application/x-rar-compressed", ".rar", matchers.Rar).
alias("application/x-rar")
djvu = newMIME("image/vnd.djvu", ".djvu", matchers.DjVu)
mobi = newMIME("application/x-mobipocket-ebook", ".mobi", matchers.Mobi)
lit = newMIME("application/x-ms-reader", ".lit", matchers.Lit)
sqlite3 = newMIME("application/x-sqlite3", ".sqlite", matchers.Sqlite)
dwg = newMIME("image/vnd.dwg", ".dwg", matchers.Dwg).
alias("image/x-dwg", "application/acad", "application/x-acad", "application/autocad_dwg", "application/dwg", "application/x-dwg", "application/x-autocad", "drawing/dwg")
warc = newMIME("application/warc", ".warc", matchers.Warc)
nes = newMIME("application/vnd.nintendo.snes.rom", ".nes", matchers.Nes)
macho = newMIME("application/x-mach-binary", ".macho", matchers.MachO)
qcp = newMIME("audio/qcelp", ".qcp", matchers.Qcp)
mrc = newMIME("application/marc", ".mrc", matchers.Marc)
mdb = newMIME("application/x-msaccess", ".mdb", matchers.MsAccessMdb)
accdb = newMIME("application/x-msaccess", ".accdb", matchers.MsAccessAce)
zstd = newMIME("application/zstd", ".zst", matchers.Zstd)
cab = newMIME("application/vnd.ms-cab-compressed", ".cab", matchers.Cab)
)

28
vendor/github.com/klauspost/compress/LICENSE generated vendored Normal file
View file

@ -0,0 +1,28 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2019 Klaus Post. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

817
vendor/github.com/klauspost/compress/flate/deflate.go generated vendored Normal file
View file

@ -0,0 +1,817 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Copyright (c) 2015 Klaus Post
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"fmt"
"io"
"math"
)
const (
NoCompression = 0
BestSpeed = 1
BestCompression = 9
DefaultCompression = -1
// HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman
// entropy encoding. This mode is useful in compressing data that has
// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
// that lacks an entropy encoder. Compression gains are achieved when
// certain bytes in the input stream occur more frequently than others.
//
// Note that HuffmanOnly produces a compressed output that is
// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
// continue to be able to decompress this output.
HuffmanOnly = -2
ConstantCompression = HuffmanOnly // compatibility alias.
logWindowSize = 15
windowSize = 1 << logWindowSize
windowMask = windowSize - 1
logMaxOffsetSize = 15 // Standard DEFLATE
minMatchLength = 4 // The smallest match that the compressor looks for
maxMatchLength = 258 // The longest match for the compressor
minOffsetSize = 1 // The shortest offset that makes any sense
// The maximum number of tokens we put into a single flat block, just too
// stop things from getting too large.
maxFlateBlockTokens = 1 << 14
maxStoreBlockSize = 65535
hashBits = 17 // After 17 performance degrades
hashSize = 1 << hashBits
hashMask = (1 << hashBits) - 1
hashShift = (hashBits + minMatchLength - 1) / minMatchLength
maxHashOffset = 1 << 24
skipNever = math.MaxInt32
)
type compressionLevel struct {
good, lazy, nice, chain, fastSkipHashing, level int
}
// Compression levels have been rebalanced from zlib deflate defaults
// to give a bigger spread in speed and compression.
// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/
var levels = []compressionLevel{
{}, // 0
// Level 1-6 uses specialized algorithm - values not used
{0, 0, 0, 0, 0, 1},
{0, 0, 0, 0, 0, 2},
{0, 0, 0, 0, 0, 3},
{0, 0, 0, 0, 0, 4},
{0, 0, 0, 0, 0, 5},
{0, 0, 0, 0, 0, 6},
// Levels 7-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
{8, 8, 24, 16, skipNever, 7},
{10, 16, 24, 64, skipNever, 8},
{32, 258, 258, 4096, skipNever, 9},
}
// advancedState contains state for the advanced levels, with bigger hash tables, etc.
type advancedState struct {
// deflate state
length int
offset int
hash uint32
maxInsertIndex int
ii uint16 // position of last match, intended to overflow to reset.
// Input hash chains
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
// If hashHead[hashValue] is within the current window, then
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
// with the same hash value.
chainHead int
hashHead [hashSize]uint32
hashPrev [windowSize]uint32
hashOffset int
// input window: unprocessed data is window[index:windowEnd]
index int
hashMatch [maxMatchLength + minMatchLength]uint32
}
type compressor struct {
compressionLevel
w *huffmanBitWriter
// compression algorithm
fill func(*compressor, []byte) int // copy data to window
step func(*compressor) // process window
sync bool // requesting flush
window []byte
windowEnd int
blockStart int // window index where current tokens start
byteAvailable bool // if true, still need to process window[index-1].
err error
// queued output tokens
tokens tokens
fast fastEnc
state *advancedState
}
func (d *compressor) fillDeflate(b []byte) int {
s := d.state
if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
// shift the window by windowSize
copy(d.window[:], d.window[windowSize:2*windowSize])
s.index -= windowSize
d.windowEnd -= windowSize
if d.blockStart >= windowSize {
d.blockStart -= windowSize
} else {
d.blockStart = math.MaxInt32
}
s.hashOffset += windowSize
if s.hashOffset > maxHashOffset {
delta := s.hashOffset - 1
s.hashOffset -= delta
s.chainHead -= delta
// Iterate over slices instead of arrays to avoid copying
// the entire table onto the stack (Issue #18625).
for i, v := range s.hashPrev[:] {
if int(v) > delta {
s.hashPrev[i] = uint32(int(v) - delta)
} else {
s.hashPrev[i] = 0
}
}
for i, v := range s.hashHead[:] {
if int(v) > delta {
s.hashHead[i] = uint32(int(v) - delta)
} else {
s.hashHead[i] = 0
}
}
}
}
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
if index > 0 || eof {
var window []byte
if d.blockStart <= index {
window = d.window[d.blockStart:index]
}
d.blockStart = index
d.w.writeBlock(tok, eof, window)
return d.w.err
}
return nil
}
// writeBlockSkip writes the current block and uses the number of tokens
// to determine if the block should be stored on no matches, or
// only huffman encoded.
func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error {
if index > 0 || eof {
if d.blockStart <= index {
window := d.window[d.blockStart:index]
// If we removed less than a 64th of all literals
// we huffman compress the block.
if int(tok.n) > len(window)-int(tok.n>>6) {
d.w.writeBlockHuff(eof, window, d.sync)
} else {
// Write a dynamic huffman block.
d.w.writeBlockDynamic(tok, eof, window, d.sync)
}
} else {
d.w.writeBlock(tok, eof, nil)
}
d.blockStart = index
return d.w.err
}
return nil
}
// fillWindow will fill the current window with the supplied
// dictionary and calculate all hashes.
// This is much faster than doing a full encode.
// Should only be used after a start/reset.
func (d *compressor) fillWindow(b []byte) {
// Do not fill window if we are in store-only or huffman mode.
if d.level <= 0 {
return
}
if d.fast != nil {
// encode the last data, but discard the result
if len(b) > maxMatchOffset {
b = b[len(b)-maxMatchOffset:]
}
d.fast.Encode(&d.tokens, b)
d.tokens.Reset()
return
}
s := d.state
// If we are given too much, cut it.
if len(b) > windowSize {
b = b[len(b)-windowSize:]
}
// Add all to window.
n := copy(d.window[d.windowEnd:], b)
// Calculate 256 hashes at the time (more L1 cache hits)
loops := (n + 256 - minMatchLength) / 256
for j := 0; j < loops; j++ {
startindex := j * 256
end := startindex + 256 + minMatchLength - 1
if end > n {
end = n
}
tocheck := d.window[startindex:end]
dstSize := len(tocheck) - minMatchLength + 1
if dstSize <= 0 {
continue
}
dst := s.hashMatch[:dstSize]
bulkHash4(tocheck, dst)
var newH uint32
for i, val := range dst {
di := i + startindex
newH = val & hashMask
// Get previous value with the same hash.
// Our chain should point to the previous value.
s.hashPrev[di&windowMask] = s.hashHead[newH]
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
s.hash = newH
}
// Update window information.
d.windowEnd += n
s.index = n
}
// Try to find a match starting at index whose length is greater than prevSize.
// We only look at chainCount possibilities before giving up.
// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead
func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
minMatchLook := maxMatchLength
if lookahead < minMatchLook {
minMatchLook = lookahead
}
win := d.window[0 : pos+minMatchLook]
// We quit when we get a match that's at least nice long
nice := len(win) - pos
if d.nice < nice {
nice = d.nice
}
// If we've got a match that's good enough, only look in 1/4 the chain.
tries := d.chain
length = prevLength
if length >= d.good {
tries >>= 2
}
wEnd := win[pos+length]
wPos := win[pos:]
minIndex := pos - windowSize
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
n := matchLen(win[i:i+minMatchLook], wPos)
if n > length && (n > minMatchLength || pos-i <= 4096) {
length = n
offset = pos - i
ok = true
if n >= nice {
// The match is good enough that we don't try to find a better one.
break
}
wEnd = win[pos+n]
}
}
if i == minIndex {
// hashPrev[i & windowMask] has already been overwritten, so stop now.
break
}
i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset
if i < minIndex || i < 0 {
break
}
}
return
}
func (d *compressor) writeStoredBlock(buf []byte) error {
if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
return d.w.err
}
d.w.writeBytes(buf)
return d.w.err
}
// hash4 returns a hash representation of the first 4 bytes
// of the supplied slice.
// The caller must ensure that len(b) >= 4.
func hash4(b []byte) uint32 {
b = b[:4]
return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits)
}
// bulkHash4 will compute hashes using the same
// algorithm as hash4
func bulkHash4(b []byte, dst []uint32) {
if len(b) < 4 {
return
}
hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
dst[0] = hash4u(hb, hashBits)
end := len(b) - 4 + 1
for i := 1; i < end; i++ {
hb = (hb << 8) | uint32(b[i+3])
dst[i] = hash4u(hb, hashBits)
}
}
func (d *compressor) initDeflate() {
d.window = make([]byte, 2*windowSize)
d.byteAvailable = false
d.err = nil
if d.state == nil {
return
}
s := d.state
s.index = 0
s.hashOffset = 1
s.length = minMatchLength - 1
s.offset = 0
s.hash = 0
s.chainHead = -1
}
// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever,
// meaning it always has lazy matching on.
func (d *compressor) deflateLazy() {
s := d.state
// Sanity enables additional runtime tests.
// It's intended to be used during development
// to supplement the currently ad-hoc unit tests.
const sanity = false
if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
return
}
s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
if s.index < s.maxInsertIndex {
s.hash = hash4(d.window[s.index : s.index+minMatchLength])
}
for {
if sanity && s.index > d.windowEnd {
panic("index > windowEnd")
}
lookahead := d.windowEnd - s.index
if lookahead < minMatchLength+maxMatchLength {
if !d.sync {
return
}
if sanity && s.index > d.windowEnd {
panic("index > windowEnd")
}
if lookahead == 0 {
// Flush current output block if any.
if d.byteAvailable {
// There is still one pending token that needs to be flushed
d.tokens.AddLiteral(d.window[s.index-1])
d.byteAvailable = false
}
if d.tokens.n > 0 {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
return
}
}
if s.index < s.maxInsertIndex {
// Update the hash
s.hash = hash4(d.window[s.index : s.index+minMatchLength])
ch := s.hashHead[s.hash&hashMask]
s.chainHead = int(ch)
s.hashPrev[s.index&windowMask] = ch
s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
}
prevLength := s.length
prevOffset := s.offset
s.length = minMatchLength - 1
s.offset = 0
minIndex := s.index - windowSize
if minIndex < 0 {
minIndex = 0
}
if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy {
if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok {
s.length = newLength
s.offset = newOffset
}
}
if prevLength >= minMatchLength && s.length <= prevLength {
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize))
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
// lookahead, the last two strings are not inserted into the hash
// table.
var newIndex int
newIndex = s.index + prevLength - 1
// Calculate missing hashes
end := newIndex
if end > s.maxInsertIndex {
end = s.maxInsertIndex
}
end += minMatchLength - 1
startindex := s.index + 1
if startindex > s.maxInsertIndex {
startindex = s.maxInsertIndex
}
tocheck := d.window[startindex:end]
dstSize := len(tocheck) - minMatchLength + 1
if dstSize > 0 {
dst := s.hashMatch[:dstSize]
bulkHash4(tocheck, dst)
var newH uint32
for i, val := range dst {
di := i + startindex
newH = val & hashMask
// Get previous value with the same hash.
// Our chain should point to the previous value.
s.hashPrev[di&windowMask] = s.hashHead[newH]
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
s.hash = newH
}
s.index = newIndex
d.byteAvailable = false
s.length = minMatchLength - 1
if d.tokens.n == maxFlateBlockTokens {
// The block includes the current character
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
} else {
// Reset, if we got a match this run.
if s.length >= minMatchLength {
s.ii = 0
}
// We have a byte waiting. Emit it.
if d.byteAvailable {
s.ii++
d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
s.index++
// If we have a long run of no matches, skip additional bytes
// Resets when s.ii overflows after 64KB.
if s.ii > 31 {
n := int(s.ii >> 5)
for j := 0; j < n; j++ {
if s.index >= d.windowEnd-1 {
break
}
d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
s.index++
}
// Flush last byte
d.tokens.AddLiteral(d.window[s.index-1])
d.byteAvailable = false
// s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
}
} else {
s.index++
d.byteAvailable = true
}
}
}
}
func (d *compressor) store() {
if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
d.windowEnd = 0
}
}
// fillWindow will fill the buffer with data for huffman-only compression.
// The number of bytes copied is returned.
func (d *compressor) fillBlock(b []byte) int {
n := copy(d.window[d.windowEnd:], b)
d.windowEnd += n
return n
}
// storeHuff will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) storeHuff() {
if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 {
return
}
d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
d.windowEnd = 0
}
// storeFast will compress and store the currently added data,
// if enough has been accumulated or we at the end of the stream.
// Any error that occurred will be in d.err
func (d *compressor) storeFast() {
// We only compress if we have maxStoreBlockSize.
if d.windowEnd < len(d.window) {
if !d.sync {
return
}
// Handle extremely small sizes.
if d.windowEnd < 128 {
if d.windowEnd == 0 {
return
}
if d.windowEnd <= 32 {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
} else {
d.w.writeBlockHuff(false, d.window[:d.windowEnd], true)
d.err = d.w.err
}
d.tokens.Reset()
d.windowEnd = 0
d.fast.Reset()
return
}
}
d.fast.Encode(&d.tokens, d.window[:d.windowEnd])
// If we made zero matches, store the block as is.
if d.tokens.n == 0 {
d.err = d.writeStoredBlock(d.window[:d.windowEnd])
// If we removed less than 1/16th, huffman compress the block.
} else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) {
d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
} else {
d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync)
d.err = d.w.err
}
d.tokens.Reset()
d.windowEnd = 0
}
// write will add input byte to the stream.
// Unless an error occurs all bytes will be consumed.
func (d *compressor) write(b []byte) (n int, err error) {
if d.err != nil {
return 0, d.err
}
n = len(b)
for len(b) > 0 {
d.step(d)
b = b[d.fill(d, b):]
if d.err != nil {
return 0, d.err
}
}
return n, d.err
}
func (d *compressor) syncFlush() error {
d.sync = true
if d.err != nil {
return d.err
}
d.step(d)
if d.err == nil {
d.w.writeStoredHeader(0, false)
d.w.flush()
d.err = d.w.err
}
d.sync = false
return d.err
}
func (d *compressor) init(w io.Writer, level int) (err error) {
d.w = newHuffmanBitWriter(w)
switch {
case level == NoCompression:
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
d.w.logNewTablePenalty = 4
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
case level == DefaultCompression:
level = 5
fallthrough
case level >= 1 && level <= 6:
d.w.logNewTablePenalty = 6
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
case 7 <= level && level <= 9:
d.w.logNewTablePenalty = 10
d.state = &advancedState{}
d.compressionLevel = levels[level]
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflateLazy
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
d.level = level
return nil
}
// reset the state of the compressor.
func (d *compressor) reset(w io.Writer) {
d.w.reset(w)
d.sync = false
d.err = nil
// We only need to reset a few things for Snappy.
if d.fast != nil {
d.fast.Reset()
d.windowEnd = 0
d.tokens.Reset()
return
}
switch d.compressionLevel.chain {
case 0:
// level was NoCompression or ConstantCompresssion.
d.windowEnd = 0
default:
s := d.state
s.chainHead = -1
for i := range s.hashHead {
s.hashHead[i] = 0
}
for i := range s.hashPrev {
s.hashPrev[i] = 0
}
s.hashOffset = 1
s.index, d.windowEnd = 0, 0
d.blockStart, d.byteAvailable = 0, false
d.tokens.Reset()
s.length = minMatchLength - 1
s.offset = 0
s.hash = 0
s.ii = 0
s.maxInsertIndex = 0
}
}
func (d *compressor) close() error {
if d.err != nil {
return d.err
}
d.sync = true
d.step(d)
if d.err != nil {
return d.err
}
if d.w.writeStoredHeader(0, true); d.w.err != nil {
return d.w.err
}
d.w.flush()
d.w.reset(nil)
return d.w.err
}
// NewWriter returns a new Writer compressing data at the given level.
// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
// higher levels typically run slower but compress more.
// Level 0 (NoCompression) does not attempt any compression; it only adds the
// necessary DEFLATE framing.
// Level -1 (DefaultCompression) uses the default compression level.
// Level -2 (ConstantCompression) will use Huffman compression only, giving
// a very fast compression for all types of input, but sacrificing considerable
// compression efficiency.
//
// If level is in the range [-2, 9] then the error returned will be nil.
// Otherwise the error returned will be non-nil.
func NewWriter(w io.Writer, level int) (*Writer, error) {
var dw Writer
if err := dw.d.init(w, level); err != nil {
return nil, err
}
return &dw, nil
}
// NewWriterDict is like NewWriter but initializes the new
// Writer with a preset dictionary. The returned Writer behaves
// as if the dictionary had been written to it without producing
// any compressed output. The compressed data written to w
// can only be decompressed by a Reader initialized with the
// same dictionary.
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
zw, err := NewWriter(w, level)
if err != nil {
return nil, err
}
zw.d.fillWindow(dict)
zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
return zw, err
}
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
d compressor
dict []byte
}
// Write writes data to w, which will eventually write the
// compressed form of data to its underlying writer.
func (w *Writer) Write(data []byte) (n int, err error) {
return w.d.write(data)
}
// Flush flushes any pending data to the underlying writer.
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet.
// Flush does not return until the data has been written.
// Calling Flush when there is no pending data still causes the Writer
// to emit a sync marker of at least 4 bytes.
// If the underlying writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (w *Writer) Flush() error {
// For more about flushing:
// http://www.bolet.org/~pornin/deflate-flush.html
return w.d.syncFlush()
}
// Close flushes and closes the writer.
func (w *Writer) Close() error {
return w.d.close()
}
// Reset discards the writer's state and makes it equivalent to
// the result of NewWriter or NewWriterDict called with dst
// and w's level and dictionary.
func (w *Writer) Reset(dst io.Writer) {
if len(w.dict) > 0 {
// w was created with NewWriterDict
w.d.reset(dst)
if dst != nil {
w.d.fillWindow(w.dict)
}
} else {
// w was created with NewWriter
w.d.reset(dst)
}
}
// ResetDict discards the writer's state and makes it equivalent to
// the result of NewWriter or NewWriterDict called with dst
// and w's level, but sets a specific dictionary.
func (w *Writer) ResetDict(dst io.Writer, dict []byte) {
w.dict = dict
w.d.reset(dst)
w.d.fillWindow(w.dict)
}

View file

@ -0,0 +1,184 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
// * Literal insertions: Runs of one or more symbols are inserted into the data
// stream as is. This is accomplished through the writeByte method for a
// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
// Any valid stream must start with a literal insertion if no preset dictionary
// is used.
//
// * Backward copies: Runs of one or more symbols are copied from previously
// emitted data. Backward copies come as the tuple (dist, length) where dist
// determines how far back in the stream to copy from and length determines how
// many bytes to copy. Note that it is valid for the length to be greater than
// the distance. Since LZ77 uses forward copies, that situation is used to
// perform a form of run-length encoding on repeated runs of symbols.
// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
// method call must be respected.
type dictDecoder struct {
hist []byte // Sliding window history
// Invariant: 0 <= rdPos <= wrPos <= len(hist)
wrPos int // Current output position in buffer
rdPos int // Have emitted hist[:rdPos] already
full bool // Has a full window length been written yet?
}
// init initializes dictDecoder to have a sliding window dictionary of the given
// size. If a preset dict is provided, it will initialize the dictionary with
// the contents of dict.
func (dd *dictDecoder) init(size int, dict []byte) {
*dd = dictDecoder{hist: dd.hist}
if cap(dd.hist) < size {
dd.hist = make([]byte, size)
}
dd.hist = dd.hist[:size]
if len(dict) > len(dd.hist) {
dict = dict[len(dict)-len(dd.hist):]
}
dd.wrPos = copy(dd.hist, dict)
if dd.wrPos == len(dd.hist) {
dd.wrPos = 0
dd.full = true
}
dd.rdPos = dd.wrPos
}
// histSize reports the total amount of historical data in the dictionary.
func (dd *dictDecoder) histSize() int {
if dd.full {
return len(dd.hist)
}
return dd.wrPos
}
// availRead reports the number of bytes that can be flushed by readFlush.
func (dd *dictDecoder) availRead() int {
return dd.wrPos - dd.rdPos
}
// availWrite reports the available amount of output buffer space.
func (dd *dictDecoder) availWrite() int {
return len(dd.hist) - dd.wrPos
}
// writeSlice returns a slice of the available buffer to write data to.
//
// This invariant will be kept: len(s) <= availWrite()
func (dd *dictDecoder) writeSlice() []byte {
return dd.hist[dd.wrPos:]
}
// writeMark advances the writer pointer by cnt.
//
// This invariant must be kept: 0 <= cnt <= availWrite()
func (dd *dictDecoder) writeMark(cnt int) {
dd.wrPos += cnt
}
// writeByte writes a single byte to the dictionary.
//
// This invariant must be kept: 0 < availWrite()
func (dd *dictDecoder) writeByte(c byte) {
dd.hist[dd.wrPos] = c
dd.wrPos++
}
// writeCopy copies a string at a given (dist, length) to the output.
// This returns the number of bytes copied and may be less than the requested
// length if the available space in the output buffer is too small.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) writeCopy(dist, length int) int {
dstBase := dd.wrPos
dstPos := dstBase
srcPos := dstPos - dist
endPos := dstPos + length
if endPos > len(dd.hist) {
endPos = len(dd.hist)
}
// Copy non-overlapping section after destination position.
//
// This section is non-overlapping in that the copy length for this section
// is always less than or equal to the backwards distance. This can occur
// if a distance refers to data that wraps-around in the buffer.
// Thus, a backwards copy is performed here; that is, the exact bytes in
// the source prior to the copy is placed in the destination.
if srcPos < 0 {
srcPos += len(dd.hist)
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
srcPos = 0
}
// Copy possibly overlapping section before destination position.
//
// This section can overlap if the copy length for this section is larger
// than the backwards distance. This is allowed by LZ77 so that repeated
// strings can be succinctly represented using (dist, length) pairs.
// Thus, a forwards copy is performed here; that is, the bytes copied is
// possibly dependent on the resulting bytes in the destination as the copy
// progresses along. This is functionally equivalent to the following:
//
// for i := 0; i < endPos-dstPos; i++ {
// dd.hist[dstPos+i] = dd.hist[srcPos+i]
// }
// dstPos = endPos
//
for dstPos < endPos {
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// tryWriteCopy tries to copy a string at a given (distance, length) to the
// output. This specialized version is optimized for short distances.
//
// This method is designed to be inlined for performance reasons.
//
// This invariant must be kept: 0 < dist <= histSize()
func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
dstPos := dd.wrPos
endPos := dstPos + length
if dstPos < dist || endPos > len(dd.hist) {
return 0
}
dstBase := dstPos
srcPos := dstPos - dist
// Copy possibly overlapping section before destination position.
loop:
dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
if dstPos < endPos {
goto loop // Avoid for-loop so that this function can be inlined
}
dd.wrPos = dstPos
return dstPos - dstBase
}
// readFlush returns a slice of the historical buffer that is ready to be
// emitted to the user. The data returned by readFlush must be fully consumed
// before calling any other dictDecoder methods.
func (dd *dictDecoder) readFlush() []byte {
toRead := dd.hist[dd.rdPos:dd.wrPos]
dd.rdPos = dd.wrPos
if dd.wrPos == len(dd.hist) {
dd.wrPos, dd.rdPos = 0, 0
dd.full = true
}
return toRead
}

View file

@ -0,0 +1,255 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Modified for deflate by Klaus Post (c) 2015.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"fmt"
"math/bits"
)
type fastEnc interface {
Encode(dst *tokens, src []byte)
Reset()
}
func newFastEnc(level int) fastEnc {
switch level {
case 1:
return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
case 2:
return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
case 3:
return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
case 4:
return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
case 5:
return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
case 6:
return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
default:
panic("invalid level specified")
}
}
const (
tableBits = 16 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
baseMatchOffset = 1 // The smallest match offset
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
maxMatchOffset = 1 << 15 // The largest match offset
bTableBits = 18 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 20 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)
const (
prime3bytes = 506832829
prime4bytes = 2654435761
prime5bytes = 889523592379
prime6bytes = 227718039650203
prime7bytes = 58295818150454627
prime8bytes = 0xcf1bbcdcb7a56463
)
func load32(b []byte, i int) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func load3232(b []byte, i int32) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load6432(b []byte, i int32) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func hash(u uint32) uint32 {
return (u * 0x1e35a7bd) >> tableShift
}
type tableEntry struct {
val uint32
offset int32
}
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastGen struct {
hist []byte
cur int32
}
func (e *fastGen) addBlock(src []byte) int32 {
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < maxMatchOffset*2 {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> ((32 - h) & 31)
}
type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}
// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4x64(u uint64, h uint8) uint32 {
return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
}
// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8(u uint64, h uint8) uint32 {
return uint32((u * prime8bytes) >> ((64 - h) & 63))
}
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash6(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
}
// matchlen will return the match length between offsets and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
}
// matchlenLong will return the match length between offsets and t in src.
// It is assumed that s > t, that t >=0 and s < len(src).
func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
if debugDecode {
if t >= s {
panic(fmt.Sprint("t >=s:", t, s))
}
if int(s) >= len(src) {
panic(fmt.Sprint("s >= len(src):", s, len(src)))
}
if t < 0 {
panic(fmt.Sprint("t < 0:", t))
}
if s-t > maxMatchOffset {
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
func (e *fastGen) Reset() {
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= bufferReset {
e.cur += maxMatchOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}
// matchLen returns the maximum length.
// 'a' must be the shortest of the two.
func matchLen(a, b []byte) int {
b = b[:len(a)]
var checked int
if len(a) > 4 {
// Try 4 bytes first
if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
return bits.TrailingZeros32(diff) >> 3
}
// Switch to 8 byte matching.
checked = 4
a = a[4:]
b = b[4:]
for len(a) >= 8 {
b = b[:len(a)]
if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
return checked + (bits.TrailingZeros64(diff) >> 3)
}
checked += 8
a = a[8:]
b = b[8:]
}
}
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
return int(i) + checked
}
}
return len(a) + checked
}

View file

@ -0,0 +1,898 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"io"
)
const (
// The largest offset code.
offsetCodeCount = 30
// The special code used to mark the end of a block.
endBlockMarker = 256
// The first length code.
lengthCodesStart = 257
// The number of codegen codes.
codegenCodeCount = 19
badCode = 255
// bufferFlushSize indicates the buffer size
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
bufferFlushSize = 240
// bufferSize is the actual output byte buffer size.
// It must have additional headroom for a flush
// which can contain up to 8 bytes.
bufferSize = bufferFlushSize + 8
)
// The number of extra bits needed by length code X - LENGTH_CODES_START.
var lengthExtraBits = [32]int8{
/* 257 */ 0, 0, 0,
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
/* 280 */ 4, 5, 5, 5, 5, 0,
}
// The length indicated by length code X - LENGTH_CODES_START.
var lengthBase = [32]uint8{
0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
64, 80, 96, 112, 128, 160, 192, 224, 255,
}
// offset code word extra bits.
var offsetExtraBits = [64]int8{
0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
/* extended window */
14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
}
var offsetBase = [64]uint32{
/* normal deflate */
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
/* extended window */
0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
0x100000, 0x180000, 0x200000, 0x300000,
}
// The odd order in which the codegen code sizes are written.
var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
type huffmanBitWriter struct {
// writer is the underlying writer.
// Do not use it directly; use the write method, which ensures
// that Write errors are sticky.
writer io.Writer
// Data waiting to be written is bytes[0:nbytes]
// and then the low nbits of bits.
bits uint64
nbits uint16
nbytes uint8
literalEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
lastHeader int
// Set between 0 (reused block can be up to 2x the size)
logNewTablePenalty uint
lastHuffMan bool
bytes [256]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
// codegen must have an extra space for the final symbol.
codegen [literalCount + offsetCodeCount + 1]uint8
}
// Huffman reuse.
//
// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections.
//
// This is controlled by several variables:
//
// If lastHeader is non-zero the Huffman table can be reused.
// This also indicates that a Huffman table has been generated that can output all
// possible symbols.
// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated
// an EOB with the previous table must be written.
//
// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
//
// An incoming block estimates the output size of a new table using a 'fresh' by calculating the
// optimal size and adding a penalty in 'logNewTablePenalty'.
// A Huffman table is not optimal, which is why we add a penalty, and generating a new table
// is slower both for compression and decompression.
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
literalEncoding: newHuffmanEncoder(literalCount),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
}
func (w *huffmanBitWriter) reset(writer io.Writer) {
w.writer = writer
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
w.lastHeader = 0
w.lastHuffMan = false
}
func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) {
offsets, lits = true, true
a := t.offHist[:offsetCodeCount]
b := w.offsetFreq[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
offsets = false
break
}
}
a = t.extraHist[:literalCount-256]
b = w.literalFreq[256:literalCount]
b = b[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
lits = false
break
}
}
if lits {
a = t.litHist[:]
b = w.literalFreq[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
lits = false
break
}
}
}
return
}
func (w *huffmanBitWriter) flush() {
if w.err != nil {
w.nbits = 0
return
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
n := w.nbytes
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
if w.nbits > 8 { // Avoid underflow
w.nbits -= 8
} else {
w.nbits = 0
}
n++
}
w.bits = 0
w.write(w.bytes[:n])
w.nbytes = 0
}
func (w *huffmanBitWriter) write(b []byte) {
if w.err != nil {
return
}
_, w.err = w.writer.Write(b)
}
func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
w.bits |= uint64(b) << (w.nbits & 63)
w.nbits += nb
if w.nbits >= 48 {
w.writeOutBits()
}
}
func (w *huffmanBitWriter) writeBytes(bytes []byte) {
if w.err != nil {
return
}
n := w.nbytes
if w.nbits&7 != 0 {
w.err = InternalError("writeBytes with unfinished bits")
return
}
for w.nbits != 0 {
w.bytes[n] = byte(w.bits)
w.bits >>= 8
w.nbits -= 8
n++
}
if n != 0 {
w.write(w.bytes[:n])
}
w.nbytes = 0
w.write(bytes)
}
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
// the literal and offset lengths arrays (which are concatenated into a single
// array). This method generates that run-length encoding.
//
// The result is written into the codegen array, and the frequencies
// of each code is written into the codegenFreq array.
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code badCode is an end marker
//
// numLiterals The number of literals in literalEncoding
// numOffsets The number of offsets in offsetEncoding
// litenc, offenc The literal and offset encoder to use
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
for i := range w.codegenFreq {
w.codegenFreq[i] = 0
}
// Note that we are using codegen both as a temporary variable for holding
// a copy of the frequencies, and as the place where we put the result.
// This is fine because the output is always shorter than the input used
// so far.
codegen := w.codegen[:] // cache
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
cgnl[i] = uint8(litEnc.codes[i].len)
}
cgnl = codegen[numLiterals : numLiterals+numOffsets]
for i := range cgnl {
cgnl[i] = uint8(offEnc.codes[i].len)
}
codegen[numLiterals+numOffsets] = badCode
size := codegen[0]
count := 1
outIndex := 0
for inIndex := 1; size != badCode; inIndex++ {
// INVARIANT: We have seen "count" copies of size that have not yet
// had output generated for them.
nextSize := codegen[inIndex]
if nextSize == size {
count++
continue
}
// We need to generate codegen indicating "count" of size.
if size != 0 {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
count--
for count >= 3 {
n := 6
if n > count {
n = count
}
codegen[outIndex] = 16
outIndex++
codegen[outIndex] = uint8(n - 3)
outIndex++
w.codegenFreq[16]++
count -= n
}
} else {
for count >= 11 {
n := 138
if n > count {
n = count
}
codegen[outIndex] = 18
outIndex++
codegen[outIndex] = uint8(n - 11)
outIndex++
w.codegenFreq[18]++
count -= n
}
if count >= 3 {
// count >= 3 && count <= 10
codegen[outIndex] = 17
outIndex++
codegen[outIndex] = uint8(count - 3)
outIndex++
w.codegenFreq[17]++
count = 0
}
}
count--
for ; count >= 0; count-- {
codegen[outIndex] = size
outIndex++
w.codegenFreq[size]++
}
// Set up invariant for next time through the loop.
size = nextSize
count = 1
}
// Marker indicating the end of the codegen.
codegen[outIndex] = badCode
}
func (w *huffmanBitWriter) codegens() int {
numCodegens := len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
return numCodegens
}
func (w *huffmanBitWriter) headerSize() (size, numCodegens int) {
numCodegens = len(w.codegenFreq)
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
numCodegens--
}
return 3 + 5 + 5 + 4 + (3 * numCodegens) +
w.codegenEncoding.bitLength(w.codegenFreq[:]) +
int(w.codegenFreq[16])*2 +
int(w.codegenFreq[17])*3 +
int(w.codegenFreq[18])*7, numCodegens
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) {
size = litEnc.bitLength(w.literalFreq[:]) +
offEnc.bitLength(w.offsetFreq[:])
return size
}
// dynamicSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) {
header, numCodegens := w.headerSize()
size = header +
litEnc.bitLength(w.literalFreq[:]) +
offEnc.bitLength(w.offsetFreq[:]) +
extraBits
return size, numCodegens
}
// extraBitSize will return the number of bits that will be written
// as "extra" bits on matches.
func (w *huffmanBitWriter) extraBitSize() int {
total := 0
for i, n := range w.literalFreq[257:literalCount] {
total += int(n) * int(lengthExtraBits[i&31])
}
for i, n := range w.offsetFreq[:offsetCodeCount] {
total += int(n) * int(offsetExtraBits[i&31])
}
return total
}
// fixedSize returns the size of dynamically encoded data in bits.
func (w *huffmanBitWriter) fixedSize(extraBits int) int {
return 3 +
fixedLiteralEncoding.bitLength(w.literalFreq[:]) +
fixedOffsetEncoding.bitLength(w.offsetFreq[:]) +
extraBits
}
// storedSize calculates the stored size, including header.
// The function returns the size in bits and whether the block
// fits inside a single block.
func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
if in == nil {
return 0, false
}
if len(in) <= maxStoreBlockSize {
return (len(in) + 5) * 8, true
}
return 0, false
}
func (w *huffmanBitWriter) writeCode(c hcode) {
// The function does not get inlined if we "& 63" the shift.
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
}
}
// writeOutBits will write bits to the buffer.
func (w *huffmanBitWriter) writeOutBits() {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
w.bytes[n] = byte(bits)
w.bytes[n+1] = byte(bits >> 8)
w.bytes[n+2] = byte(bits >> 16)
w.bytes[n+3] = byte(bits >> 24)
w.bytes[n+4] = byte(bits >> 32)
w.bytes[n+5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
if w.err != nil {
n = 0
return
}
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
// Write the header of a dynamic Huffman block to the output stream.
//
// numLiterals The number of literals specified in codegen
// numOffsets The number of offsets specified in codegen
// numCodegens The number of codegens used in codegen
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
if w.err != nil {
return
}
var firstBits int32 = 4
if isEof {
firstBits = 5
}
w.writeBits(firstBits, 3)
w.writeBits(int32(numLiterals-257), 5)
w.writeBits(int32(numOffsets-1), 5)
w.writeBits(int32(numCodegens-4), 4)
for i := 0; i < numCodegens; i++ {
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
w.writeBits(int32(value), 3)
}
i := 0
for {
var codeWord = uint32(w.codegen[i])
i++
if codeWord == badCode {
break
}
w.writeCode(w.codegenEncoding.codes[codeWord])
switch codeWord {
case 16:
w.writeBits(int32(w.codegen[i]), 2)
i++
case 17:
w.writeBits(int32(w.codegen[i]), 3)
i++
case 18:
w.writeBits(int32(w.codegen[i]), 7)
i++
}
}
}
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
var flag int32
if isEof {
flag = 1
}
w.writeBits(flag, 3)
w.flush()
w.writeBits(int32(length), 16)
w.writeBits(int32(^uint16(length)), 16)
}
func (w *huffmanBitWriter) writeFixedHeader(isEof bool) {
if w.err != nil {
return
}
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
// Indicate that we are a fixed Huffman block
var value int32 = 2
if isEof {
value = 3
}
w.writeBits(value, 3)
}
// writeBlock will write a block of tokens with the smallest encoding.
// The original input can be supplied, and if the huffman encoded data
// is larger than the original bytes, the data will be written as a
// stored block.
// If the input is nil, the tokens will always be Huffman encoded.
func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
if w.err != nil {
return
}
tokens.AddEOB()
if w.lastHeader > 0 {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
numLiterals, numOffsets := w.indexTokens(tokens, false)
w.generate(tokens)
var extraBits int
storedSize, storable := w.storedSize(input)
if storable {
extraBits = w.extraBitSize()
}
// Figure out smallest code.
// Fixed Huffman baseline.
var literalEncoding = fixedLiteralEncoding
var offsetEncoding = fixedOffsetEncoding
var size = w.fixedSize(extraBits)
// Dynamic Huffman?
var numCodegens int
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)
if dynamicSize < size {
size = dynamicSize
literalEncoding = w.literalEncoding
offsetEncoding = w.offsetEncoding
}
// Stored bytes?
if storable && storedSize < size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
// Huffman.
if literalEncoding == fixedLiteralEncoding {
w.writeFixedHeader(eof)
} else {
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
}
// Write the tokens.
w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes)
}
// writeBlockDynamic encodes a block using a dynamic Huffman table.
// This should be used if the symbols used have a disproportionate
// histogram distribution.
// If input is supplied and the compression savings are below 1/16th of the
// input size the block is stored.
func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) {
if w.err != nil {
return
}
sync = sync || eof
if sync {
tokens.AddEOB()
}
// We cannot reuse pure huffman table, and must mark as EOF.
if (w.lastHuffMan || eof) && w.lastHeader > 0 {
// We will not try to reuse.
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
w.lastHuffMan = false
}
if !sync {
tokens.Fill()
}
numLiterals, numOffsets := w.indexTokens(tokens, !sync)
var size int
// Check if we should reuse.
if w.lastHeader > 0 {
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
newSize += newSize >> w.logNewTablePenalty
// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize()
// Check if a new table is better.
if newSize < reuseSize {
// Write the EOB we owe.
w.writeCode(w.literalEncoding.codes[endBlockMarker])
size = newSize
w.lastHeader = 0
} else {
size = reuseSize
}
// Check if we get a reasonable size decrease.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
w.lastHeader = 0
return
}
}
// We want a new block/table
if w.lastHeader == 0 {
w.generate(tokens)
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
var numCodegens int
size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize())
// Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
w.lastHeader = 0
return
}
// Write Huffman table.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHeader, _ = w.headerSize()
w.lastHuffMan = false
}
if sync {
w.lastHeader = 0
}
// Write the tokens.
w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
}
// indexTokens indexes a slice of tokens, and updates
// literalFreq and offsetFreq, and generates literalEncoding
// and offsetEncoding.
// The number of literal and offset tokens is returned.
func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
copy(w.literalFreq[:], t.litHist[:])
copy(w.literalFreq[256:], t.extraHist[:])
copy(w.offsetFreq[:], t.offHist[:offsetCodeCount])
if t.n == 0 {
return
}
if filled {
return maxNumLit, maxNumDist
}
// get the number of literals
numLiterals = len(w.literalFreq)
for w.literalFreq[numLiterals-1] == 0 {
numLiterals--
}
// get the number of offsets
numOffsets = len(w.offsetFreq)
for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
numOffsets--
}
if numOffsets == 0 {
// We haven't found a single match. If we want to go with the dynamic encoding,
// we should count at least one offset to be sure that the offset huffman tree could be encoded.
w.offsetFreq[0] = 1
numOffsets = 1
}
return
}
func (w *huffmanBitWriter) generate(t *tokens) {
w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
}
// writeTokens writes a slice of tokens to the output.
// codes for literal and offset encoding must be supplied.
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) {
if w.err != nil {
return
}
if len(tokens) == 0 {
return
}
// Only last token should be endBlockMarker.
var deferEOB bool
if tokens[len(tokens)-1] == endBlockMarker {
tokens = tokens[:len(tokens)-1]
deferEOB = true
}
// Create slices up to the next power of two to avoid bounds checks.
lits := leCodes[:256]
offs := oeCodes[:32]
lengths := leCodes[lengthCodesStart:]
lengths = lengths[:32]
for _, t := range tokens {
if t < matchType {
w.writeCode(lits[t.literal()])
continue
}
// Write the length
length := t.length()
lengthCode := lengthCode(length)
if false {
w.writeCode(lengths[lengthCode&31])
} else {
// inlined
c := lengths[lengthCode&31]
w.bits |= uint64(c.code) << (w.nbits & 63)
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
}
}
extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
if extraLengthBits > 0 {
extraLength := int32(length - lengthBase[lengthCode&31])
w.writeBits(extraLength, extraLengthBits)
}
// Write the offset
offset := t.offset()
offsetCode := offsetCode(offset)
if false {
w.writeCode(offs[offsetCode&31])
} else {
// inlined
c := offs[offsetCode&31]
w.bits |= uint64(c.code) << (w.nbits & 63)
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
}
}
extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
if extraOffsetBits > 0 {
extraOffset := int32(offset - offsetBase[offsetCode&63])
w.writeBits(extraOffset, extraOffsetBits)
}
}
if deferEOB {
w.writeCode(leCodes[endBlockMarker])
}
}
// huffOffset is a static offset encoder used for huffman only encoding.
// It can be reused since we will not be encoding offset values.
var huffOffset *huffmanEncoder
func init() {
w := newHuffmanBitWriter(nil)
w.offsetFreq[0] = 1
huffOffset = newHuffmanEncoder(offsetCodeCount)
huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
}
// writeBlockHuff encodes a block of bytes as either
// Huffman encoded literals or uncompressed bytes if the
// results only gains very little from compression.
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
if w.err != nil {
return
}
// Clear histogram
for i := range w.literalFreq[:] {
w.literalFreq[i] = 0
}
if !w.lastHuffMan {
for i := range w.offsetFreq[:] {
w.offsetFreq[i] = 0
}
}
// Add everything as literals
// We have to estimate the header size.
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + 15
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
estBits += estBits >> w.logNewTablePenalty
// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
if storable && ssize < estBits {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
if w.lastHeader > 0 {
reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
estBits += estExtra
if estBits < reuseSize {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}
}
const numLiterals = endBlockMarker + 1
const numOffsets = 1
if w.lastHeader == 0 {
w.literalFreq[endBlockMarker] = 1
w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
w.codegenEncoding.generate(w.codegenFreq[:], 7)
numCodegens := w.codegens()
// Huffman.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHuffMan = true
w.lastHeader, _ = w.headerSize()
}
encoding := w.literalEncoding.codes[:257]
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
w.bits |= uint64(c.code) << ((w.nbits) & 63)
w.nbits += c.len
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
w.bytes[n] = byte(bits)
w.bytes[n+1] = byte(bits >> 8)
w.bytes[n+2] = byte(bits >> 16)
w.bytes[n+3] = byte(bits >> 24)
w.bytes[n+4] = byte(bits >> 32)
w.bytes[n+5] = byte(bits >> 40)
n += 6
if n >= bufferFlushSize {
if w.err != nil {
n = 0
return
}
w.write(w.bytes[:n])
n = 0
}
w.nbytes = n
}
}
if eof || sync {
w.writeCode(encoding[endBlockMarker])
w.lastHeader = 0
w.lastHuffMan = false
}
}

View file

@ -0,0 +1,363 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"math"
"math/bits"
)
const (
maxBitsLimit = 16
// number of valid literals
literalCount = 286
)
// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code, len uint16
}
type huffmanEncoder struct {
codes []hcode
freqcache []literalNode
bitCount [17]int32
}
type literalNode struct {
literal uint16
freq uint16
}
// A levelInfo describes the state of the constructed tree for a given depth.
type levelInfo struct {
// Our level. for better printing
level int32
// The frequency of the last node at this level
lastFreq int32
// The frequency of the next character to add to this level
nextCharFreq int32
// The frequency of the next pair (from level below) to add to this level.
// Only valid if the "needed" value of the next lower level is 0.
nextPairFreq int32
// The number of chains remaining to generate for this level before moving
// up to the next level
needed int32
}
// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint16) {
h.len = length
h.code = code
}
func reverseBits(number uint16, bitLength byte) uint16 {
return bits.Reverse16(number << ((16 - bitLength) & 15))
}
func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} }
func newHuffmanEncoder(size int) *huffmanEncoder {
// Make capacity to next power of two.
c := uint(bits.Len32(uint32(size - 1)))
return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
}
// Generates a HuffmanCode corresponding to the fixed literal table
func generateFixedLiteralEncoding() *huffmanEncoder {
h := newHuffmanEncoder(literalCount)
codes := h.codes
var ch uint16
for ch = 0; ch < literalCount; ch++ {
var bits uint16
var size uint16
switch {
case ch < 144:
// size 8, 000110000 .. 10111111
bits = ch + 48
size = 8
case ch < 256:
// size 9, 110010000 .. 111111111
bits = ch + 400 - 144
size = 9
case ch < 280:
// size 7, 0000000 .. 0010111
bits = ch - 256
size = 7
default:
// size 8, 11000000 .. 11000111
bits = ch + 192 - 280
size = 8
}
codes[ch] = hcode{code: reverseBits(bits, byte(size)), len: size}
}
return h
}
func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
}
return h
}
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
total += int(f) * int(h.codes[i].len)
}
}
return total
}
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list An array of the literals with non-zero frequencies
// and their associated frequencies. The array is in order of increasing
// frequency, and has as its last element a special element with frequency
// MaxInt32
// maxBits The maximum number of bits that should be used to encode any literal.
// Must be less than 16.
// return An integer array in which array[i] indicates the number of literals
// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
}
n := int32(len(list))
list = list[0 : n+1]
list[n] = maxNode()
// The tree can't have greater depth than n - 1, no matter what. This
// saves a little bit of work in some small cases
if maxBits > n-1 {
maxBits = n - 1
}
// Create information about each of the levels.
// A bogus "Level 0" whose sole purpose is so that
// level1.prev.needed==0. This makes level1.nextPairFreq
// be a legitimate value that never gets chosen.
var levels [maxBitsLimit]levelInfo
// leafCounts[i] counts the number of literals at the left
// of ancestors of the rightmost node at level i.
// leafCounts[i][j] is the number of literals at the left
// of the level j ancestor.
var leafCounts [maxBitsLimit][maxBitsLimit]int32
for level := int32(1); level <= maxBits; level++ {
// For every level, the first two items are the first two characters.
// We initialize the levels as if we had already figured this out.
levels[level] = levelInfo{
level: level,
lastFreq: int32(list[1].freq),
nextCharFreq: int32(list[2].freq),
nextPairFreq: int32(list[0].freq) + int32(list[1].freq),
}
leafCounts[level][level] = 2
if level == 1 {
levels[level].nextPairFreq = math.MaxInt32
}
}
// We need a total of 2*n - 2 items at top level and have already generated 2.
levels[maxBits].needed = 2*n - 4
level := maxBits
for {
l := &levels[level]
if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
// We've run out of both leafs and pairs.
// End all calculations for this level.
// To make sure we never come back to this level or any lower level,
// set nextPairFreq impossibly large.
l.needed = 0
levels[level+1].nextPairFreq = math.MaxInt32
level++
continue
}
prevFreq := l.lastFreq
if l.nextCharFreq < l.nextPairFreq {
// The next item on this row is a leaf node.
n := leafCounts[level][level] + 1
l.lastFreq = l.nextCharFreq
// Lower leafCounts are the same of the previous node.
leafCounts[level][level] = n
e := list[n]
if e.literal < math.MaxUint16 {
l.nextCharFreq = int32(e.freq)
} else {
l.nextCharFreq = math.MaxInt32
}
} else {
// The next item on this row is a pair from the previous row.
// nextPairFreq isn't valid until we generate two
// more values in the level below
l.lastFreq = l.nextPairFreq
// Take leaf counts from the lower level, except counts[level] remains the same.
copy(leafCounts[level][:level], leafCounts[level-1][:level])
levels[l.level-1].needed = 2
}
if l.needed--; l.needed == 0 {
// We've done everything we need to do for this level.
// Continue calculating one level up. Fill in nextPairFreq
// of that level with the sum of the two nodes we've just calculated on
// this level.
if l.level == maxBits {
// All done!
break
}
levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
level++
} else {
// If we stole from below, move down temporarily to replenish it.
for levels[level-1].needed > 0 {
level--
}
}
}
// Somethings is wrong if at the end, the top level is null or hasn't used
// all of the leaves.
if leafCounts[maxBits][maxBits] != n {
panic("leafCounts[maxBits][maxBits] != n")
}
bitCount := h.bitCount[:maxBits+1]
bits := 1
counts := &leafCounts[maxBits]
for level := maxBits; level > 0; level-- {
// chain.leafCount gives the number of literals requiring at least "bits"
// bits to encode.
bitCount[bits] = counts[level] - counts[level-1]
bits++
}
return bitCount
}
// Look at the leaves and assign them a bit count and an encoding as specified
// in RFC 1951 3.2.2
func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
code := uint16(0)
for n, bits := range bitCount {
code <<= 1
if n == 0 || bits == 0 {
continue
}
// The literals list[len(list)-bits] .. list[len(list)-bits]
// are encoded using "bits" bits, and get the values
// code, code + 1, .... The code values are
// assigned in literal order (not frequency order).
chunk := list[len(list)-int(bits):]
sortByLiteral(chunk)
for _, node := range chunk {
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)}
code++
}
list = list[0 : len(list)-int(bits)]
}
}
// Update this Huffman Code object to be the minimum code for the specified frequency count.
//
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
h.freqcache = make([]literalNode, literalCount+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
count := 0
// Set list to be the set of all non-zero literals and their frequencies
for i, f := range freq {
if f != 0 {
list[count] = literalNode{uint16(i), f}
count++
} else {
list[count] = literalNode{}
h.codes[i].len = 0
}
}
list[len(freq)] = literalNode{}
list = list[:count]
if count <= 2 {
// Handle the small cases here, because they are awkward for the general case code. With
// two or fewer literals, everything has bit length 1.
for i, node := range list {
// "list" is in order of increasing literal value.
h.codes[node.literal].set(uint16(i), 1)
}
return
}
sortByFreq(list)
// Get the number of literals for each bit count
bitCount := h.bitCounts(list, maxBits)
// And do the assignment
h.assignEncodingAndSize(bitCount, list)
}
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
return v
}
// histogramSize accumulates a histogram of b in h.
// An estimated size in bits is returned.
// Unassigned values are assigned '1' in the histogram.
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
h = h[:256]
for _, t := range b {
h[t]++
}
invTotal := 1.0 / float32(len(b))
shannon := float32(0.0)
var extra float32
if fill {
oneBits := atLeastOne(-mFastLog2(invTotal))
for i, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
} else {
h[i] = 1
extra += oneBits
}
}
} else {
for _, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
}
return int(shannon + 0.99), int(extra + 0.99)
}

View file

@ -0,0 +1,178 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByFreq(data []literalNode) {
n := len(data)
quickSortByFreq(data, 0, n, maxDepth(n))
}
func quickSortByFreq(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivotByFreq(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSortByFreq(data, a, mlo, maxDepth)
a = mhi // i.e., quickSortByFreq(data, mhi, b)
} else {
quickSortByFreq(data, mhi, b, maxDepth)
b = mlo // i.e., quickSortByFreq(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSortByFreq(data, a, b)
}
}
// siftDownByFreq implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDownByFreq(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) {
child++
}
if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s)
medianOfThreeSortByFreq(data, m, m-s, m+s)
medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThreeSortByFreq(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ {
}
b := a
for {
for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot
}
for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot
}
for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSortByFreq(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// quickSortByFreq, loosely following Bentley and McIlroy,
// ``Engineering a Sort Function,'' SP&E November 1993.
// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

View file

@ -0,0 +1,201 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
// Sort sorts data.
// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
// data.Less and data.Swap. The sort is not guaranteed to be stable.
func sortByLiteral(data []literalNode) {
n := len(data)
quickSort(data, 0, n, maxDepth(n))
}
func quickSort(data []literalNode, a, b, maxDepth int) {
for b-a > 12 { // Use ShellSort for slices <= 12 elements
if maxDepth == 0 {
heapSort(data, a, b)
return
}
maxDepth--
mlo, mhi := doPivot(data, a, b)
// Avoiding recursion on the larger subproblem guarantees
// a stack depth of at most lg(b-a).
if mlo-a < b-mhi {
quickSort(data, a, mlo, maxDepth)
a = mhi // i.e., quickSort(data, mhi, b)
} else {
quickSort(data, mhi, b, maxDepth)
b = mlo // i.e., quickSort(data, a, mlo)
}
}
if b-a > 1 {
// Do ShellSort pass with gap 6
// It could be written in this simplified form cause b-a <= 12
for i := a + 6; i < b; i++ {
if data[i].literal < data[i-6].literal {
data[i], data[i-6] = data[i-6], data[i]
}
}
insertionSort(data, a, b)
}
}
func heapSort(data []literalNode, a, b int) {
first := a
lo := 0
hi := b - a
// Build heap with greatest element at top.
for i := (hi - 1) / 2; i >= 0; i-- {
siftDown(data, i, hi, first)
}
// Pop elements, largest first, into end of data.
for i := hi - 1; i >= 0; i-- {
data[first], data[first+i] = data[first+i], data[first]
siftDown(data, lo, i, first)
}
}
// siftDown implements the heap property on data[lo, hi).
// first is an offset into the array where the root of the heap lies.
func siftDown(data []literalNode, lo, hi, first int) {
root := lo
for {
child := 2*root + 1
if child >= hi {
break
}
if child+1 < hi && data[first+child].literal < data[first+child+1].literal {
child++
}
if data[first+root].literal > data[first+child].literal {
return
}
data[first+root], data[first+child] = data[first+child], data[first+root]
root = child
}
}
func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) {
m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
if hi-lo > 40 {
// Tukey's ``Ninther,'' median of three medians of three.
s := (hi - lo) / 8
medianOfThree(data, lo, lo+s, lo+2*s)
medianOfThree(data, m, m-s, m+s)
medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
}
medianOfThree(data, lo, m, hi-1)
// Invariants are:
// data[lo] = pivot (set up by ChoosePivot)
// data[lo < i < a] < pivot
// data[a <= i < b] <= pivot
// data[b <= i < c] unexamined
// data[c <= i < hi-1] > pivot
// data[hi-1] >= pivot
pivot := lo
a, c := lo+1, hi-1
for ; a < c && data[a].literal < data[pivot].literal; a++ {
}
b := a
for {
for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot
}
for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot
}
if b >= c {
break
}
// data[b] > pivot; data[c-1] <= pivot
data[b], data[c-1] = data[c-1], data[b]
b++
c--
}
// If hi-c<3 then there are duplicates (by property of median of nine).
// Let's be a bit more conservative, and set border to 5.
protect := hi-c < 5
if !protect && hi-c < (hi-lo)/4 {
// Lets test some points for equality to pivot
dups := 0
if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot
data[c], data[hi-1] = data[hi-1], data[c]
c++
dups++
}
if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot
b--
dups++
}
// m-lo = (hi-lo)/2 > 6
// b-lo > (hi-lo)*3/4-1 > 8
// ==> m < b ==> data[m] <= pivot
if data[m].literal > data[pivot].literal { // data[m] = pivot
data[m], data[b-1] = data[b-1], data[m]
b--
dups++
}
// if at least 2 points are equal to pivot, assume skewed distribution
protect = dups > 1
}
if protect {
// Protect against a lot of duplicates
// Add invariant:
// data[a <= i < b] unexamined
// data[b <= i < c] = pivot
for {
for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot
}
for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot
}
if a >= b {
break
}
// data[a] == pivot; data[b-1] < pivot
data[a], data[b-1] = data[b-1], data[a]
a++
b--
}
}
// Swap pivot into middle
data[pivot], data[b-1] = data[b-1], data[pivot]
return b - 1, c
}
// Insertion sort
func insertionSort(data []literalNode, a, b int) {
for i := a + 1; i < b; i++ {
for j := i; j > a && data[j].literal < data[j-1].literal; j-- {
data[j], data[j-1] = data[j-1], data[j]
}
}
}
// maxDepth returns a threshold at which quicksort should switch
// to heapsort. It returns 2*ceil(lg(n+1)).
func maxDepth(n int) int {
var depth int
for i := n; i > 0; i >>= 1 {
depth++
}
return depth * 2
}
// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
func medianOfThree(data []literalNode, m1, m0, m2 int) {
// sort 3 elements
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
// data[m0] <= data[m1]
if data[m2].literal < data[m1].literal {
data[m2], data[m1] = data[m1], data[m2]
// data[m0] <= data[m2] && data[m1] < data[m2]
if data[m1].literal < data[m0].literal {
data[m1], data[m0] = data[m0], data[m1]
}
}
// now data[m0] <= data[m1] <= data[m2]
}

937
vendor/github.com/klauspost/compress/flate/inflate.go generated vendored Normal file
View file

@ -0,0 +1,937 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in
// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
// formats.
package flate
import (
"bufio"
"fmt"
"io"
"math/bits"
"strconv"
"sync"
)
const (
maxCodeLen = 16 // max length of Huffman code
maxCodeLenMask = 15 // mask for max length of Huffman code
// The next three numbers come from the RFC section 3.2.7, with the
// additional proviso in section 3.2.5 which implies that distance codes
// 30 and 31 should never occur in compressed data.
maxNumLit = 286
maxNumDist = 30
numCodes = 19 // number of codes in Huffman meta-code
debugDecode = false
)
// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
// A CorruptInputError reports the presence of corrupt input at a given offset.
type CorruptInputError int64
func (e CorruptInputError) Error() string {
return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
}
// An InternalError reports an error in the flate code itself.
type InternalError string
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
// A ReadError reports an error encountered while reading input.
//
// Deprecated: No longer returned.
type ReadError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Read
}
func (e *ReadError) Error() string {
return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// A WriteError reports an error encountered while writing output.
//
// Deprecated: No longer returned.
type WriteError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Write
}
func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
//
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
//
// See the following:
// http://www.gzip.org/algorithm.txt
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
min int // the minimum code length
chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
// Following this function, h is guaranteed to be initialized into a complete
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
// degenerate case where the tree has only a single symbol with length 1. Empty
// trees are permitted.
func (h *huffmanDecoder) init(lengths []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
const sanity = false
if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
}
if h.min != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n&maxCodeLenMask]++
}
// Empty tree. The decompressor.huffSym function will fail later if the tree
// is used. Technically, an empty tree is only valid for the HDIST tree and
// not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
// is guaranteed to fail since it will attempt to use the tree to decode the
// codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
// guaranteed to fail later since the compressed data section must be
// composed of at least one symbol (the end-of-block marker).
if max == 0 {
return true
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
code <<= 1
nextcode[i&maxCodeLenMask] = code
code += count[i&maxCodeLenMask]
}
// Check that the coding is complete (i.e., that we've
// assigned all 2-to-the-max possible bit sequences).
// Exception: To be compatible with zlib, we also need to
// accept degenerate single-code codings. See also
// TestDegenerateHuffmanCoding.
if code != 1<<uint(max) && !(code == 1 && max == 1) {
if debugDecode {
fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
}
return false
}
h.min = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
}
if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
h.linkMask = uint32(numLinks - 1)
// create link tables
link := nextcode[huffmanChunkBits+1] >> 1
if cap(h.links) < huffmanNumChunks-link {
h.links = make([][]uint16, huffmanNumChunks-link)
} else {
h.links = h.links[:huffmanNumChunks-link]
}
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(bits.Reverse16(uint16(j)))
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
links := h.links[off][:0]
h.links[off] = links[:numLinks]
}
}
} else {
h.links = h.links[:0]
}
for i, n := range lengths {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint16(i<<huffmanValueShift | n)
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
// We should never need to overwrite
// an existing chunk. Also, 0 is
// never a valid chunk, because the
// lower 4 "count" bits should be
// between 1 and 15.
if sanity && h.chunks[off] != 0 {
panic("impossible: overwriting existing chunk")
}
h.chunks[off] = chunk
}
} else {
j := reverse & (huffmanNumChunks - 1)
if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
// Longer codes should have been
// associated with a link table above.
panic("impossible: not an indirect chunk")
}
value := h.chunks[j] >> huffmanValueShift
linktab := h.links[value]
reverse >>= huffmanChunkBits
for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
if sanity && linktab[off] != 0 {
panic("impossible: overwriting existing chunk")
}
linktab[off] = chunk
}
}
}
if sanity {
// Above we've sanity checked that we never overwrote
// an existing entry. Here we additionally check that
// we filled the tables completely.
for i, chunk := range h.chunks {
if chunk == 0 {
// As an exception, in the degenerate
// single-code case, we allow odd
// chunks to be missing.
if code == 1 && i%2 == 1 {
continue
}
panic("impossible: missing chunk")
}
}
for _, linktab := range h.links {
for _, chunk := range linktab {
if chunk == 0 {
panic("impossible: missing chunk")
}
}
}
}
return true
}
// The actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
io.Reader
io.ByteReader
}
// Decompress state.
type decompressor struct {
// Input source.
r Reader
roffset int64
// Input bits, in top of b.
b uint32
nb uint
// Huffman decoders for literal/length, distance.
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
bits *[maxNumLit + maxNumDist]int
codebits *[numCodes]int
// Output history, buffer.
dict dictDecoder
// Temporary buffer (avoids repeated allocation).
buf [4]byte
// Next step in the decompression,
// and decompression state.
step func(*decompressor)
stepState int
final bool
err error
toRead []byte
hl, hd *huffmanDecoder
copyLen int
copyDist int
}
func (f *decompressor) nextBlock() {
for f.nb < 1+2 {
if f.err = f.moreBits(); f.err != nil {
return
}
}
f.final = f.b&1 == 1
f.b >>= 1
typ := f.b & 3
f.b >>= 2
f.nb -= 1 + 2
switch typ {
case 0:
f.dataBlock()
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlock()
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
break
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlock()
default:
// 3 is reserved.
if debugDecode {
fmt.Println("reserved data block encountered")
}
f.err = CorruptInputError(f.roffset)
}
}
func (f *decompressor) Read(b []byte) (int, error) {
for {
if len(f.toRead) > 0 {
n := copy(b, f.toRead)
f.toRead = f.toRead[n:]
if len(f.toRead) == 0 {
return n, f.err
}
return n, nil
}
if f.err != nil {
return 0, f.err
}
f.step(f)
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
// Support the io.WriteTo interface for io.Copy and friends.
func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
flushed := false
for {
if len(f.toRead) > 0 {
n, err := w.Write(f.toRead)
total += int64(n)
if err != nil {
f.err = err
return total, err
}
if n != len(f.toRead) {
return total, io.ErrShortWrite
}
f.toRead = f.toRead[:0]
}
if f.err != nil && flushed {
if f.err == io.EOF {
return total, nil
}
return total, f.err
}
if f.err == nil {
f.step(f)
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
flushed = true
}
}
}
func (f *decompressor) Close() error {
if f.err == io.EOF {
return nil
}
return f.err
}
// RFC 1951 section 3.2.7.
// Compression with dynamic Huffman codes
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
func (f *decompressor) readHuffman() error {
// HLIT[5], HDIST[5], HCLEN[4].
for f.nb < 5+5+4 {
if err := f.moreBits(); err != nil {
return err
}
}
nlit := int(f.b&0x1F) + 257
if nlit > maxNumLit {
if debugDecode {
fmt.Println("nlit > maxNumLit", nlit)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
if ndist > maxNumDist {
if debugDecode {
fmt.Println("ndist > maxNumDist", ndist)
}
return CorruptInputError(f.roffset)
}
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
}
}
f.codebits[codeOrder[i]] = int(f.b & 0x7)
f.b >>= 3
f.nb -= 3
}
for i := nclen; i < len(codeOrder); i++ {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
if debugDecode {
fmt.Println("init codebits failed")
}
return CorruptInputError(f.roffset)
}
// HLIT + 257 code lengths, HDIST + 1 code lengths,
// using the code length Huffman code.
for i, n := 0, nlit+ndist; i < n; {
x, err := f.huffSym(&f.h1)
if err != nil {
return err
}
if x < 16 {
// Actual length.
f.bits[i] = x
i++
continue
}
// Repeat previous length or zero.
var rep int
var nb uint
var b int
switch x {
default:
return InternalError("unexpected length code")
case 16:
rep = 3
nb = 2
if i == 0 {
if debugDecode {
fmt.Println("i==0")
}
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
case 17:
rep = 3
nb = 3
b = 0
case 18:
rep = 11
nb = 7
b = 0
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits:", err)
}
return err
}
}
rep += int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
if i+rep > n {
if debugDecode {
fmt.Println("i+rep > n", i, rep, n)
}
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
f.bits[i] = b
i++
}
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
if debugDecode {
fmt.Println("init2 failed")
}
return CorruptInputError(f.roffset)
}
// As an optimization, we can initialize the min bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker]
}
return nil
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBlock() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
v, err := f.huffSym(f.hl)
if err != nil {
f.err = err
return
}
var n uint // number of bits extra
var length int
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = f.moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
if debugDecode {
fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
}
f.err = CorruptInputError(f.roffset)
return
}
if n == 0 {
f.toRead = f.dict.readFlush()
f.finishBlock()
return
}
f.copyLen = n
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
buf := f.dict.writeSlice()
if len(buf) > f.copyLen {
buf = buf[:f.copyLen]
}
cnt, err := io.ReadFull(f.r, buf)
f.roffset += int64(cnt)
f.copyLen -= cnt
f.dict.writeMark(cnt)
if err != nil {
f.err = noEOF(err)
return
}
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).copyData
return
}
f.finishBlock()
}
func (f *decompressor) finishBlock() {
if f.final {
if f.dict.availRead() > 0 {
f.toRead = f.dict.readFlush()
}
f.err = io.EOF
}
f.step = (*decompressor).nextBlock
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
func noEOF(e error) error {
if e == io.EOF {
return io.ErrUnexpectedEOF
}
return e
}
func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
return 0, noEOF(err)
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := h.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
f.b = b >> (n & 31)
f.nb = nb - n
return int(chunk >> huffmanValueShift), nil
}
}
}
func makeReader(r io.Reader) Reader {
if rr, ok := r.(Reader); ok {
return rr
}
return bufio.NewReader(r)
}
func fixedHuffmanDecoderInit() {
fixedOnce.Do(func() {
// These come from the RFC section 3.2.6.
var bits [288]int
for i := 0; i < 144; i++ {
bits[i] = 8
}
for i := 144; i < 256; i++ {
bits[i] = 9
}
for i := 256; i < 280; i++ {
bits[i] = 7
}
for i := 280; i < 288; i++ {
bits[i] = 8
}
fixedHuffmanDecoder.init(bits[:])
})
}
func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
r: makeReader(r),
bits: f.bits,
codebits: f.codebits,
h1: f.h1,
h2: f.h2,
dict: f.dict,
step: (*decompressor).nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
}
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser
// when finished reading.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
// NewReaderDict is like NewReader but initializes the reader
// with a preset dictionary. The returned Reader behaves as if
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
fixedHuffmanDecoderInit()
var f decompressor
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}

179
vendor/github.com/klauspost/compress/flate/level1.go generated vendored Normal file
View file

@ -0,0 +1,179 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL1 struct {
fastGen
table [tableSize]tableEntry
}
// EncodeL1 uses a similar algorithm to level 1
func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hash(cv)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
nextHash = hash(uint32(now))
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
break
}
cv = uint32(now)
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
// Save the match found
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv}
}
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
x >>= 16
currHash := hash(uint32(x))
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != candidate.val {
cv = uint32(x >> 8)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

205
vendor/github.com/klauspost/compress/flate/level2.go generated vendored Normal file
View file

@ -0,0 +1,205 @@
package flate
import "fmt"
// fastGen maintains the table for matches,
// and the previous byte block for level 2.
// This is the generic implementation.
type fastEncL2 struct {
fastGen
table [bTableSize]tableEntry
}
// EncodeL2 uses a similar algorithm to level 1, but is capable
// of matching across blocks giving better compression at a small slowdown.
func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hash4u(cv, bTableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
nextHash = hash4u(uint32(now), bTableBits)
offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
break
}
cv = uint32(now)
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv}
}
goto emitRemainder
}
// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, int32(i))
nextHash := hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits)
prevHash2 := hash4u(uint32(x>>8), bTableBits)
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)}
currHash := hash4u(uint32(x>>16), bTableBits)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != candidate.val {
cv = uint32(x >> 24)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

231
vendor/github.com/klauspost/compress/flate/level3.go generated vendored Normal file
View file

@ -0,0 +1,231 @@
package flate
import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
table [tableSize]tableEntryPrev
}
// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
inputMargin = 8 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
}
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
e.table[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// Skip if too small.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
for {
const skipLog = 6
nextS := s
var candidate tableEntry
for {
nextHash := hash(cv)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidates := e.table[nextHash]
now := load3232(src, nextS)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
// Check both candidates
candidate = candidates.Cur
offset := s - (candidate.offset - e.cur)
if cv == candidate.val {
if offset > maxMatchOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
o2 := s - (candidates.Prev.offset - e.cur)
if cv != candidates.Prev.val || o2 > maxMatchOffset {
break
}
// Both match and are valid, pick longest.
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 {
candidate = candidates.Prev
}
break
} else {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
break
}
}
}
cv = now
}
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
//
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
t += l
// Index first pair after match end.
if int(t+4) < len(src) && t > 0 {
cv := load3232(src, t)
nextHash := hash(cv)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t, val: cv},
}
}
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-3 to s.
x := load6432(src, s-3)
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
}
x >>= 8
prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
}
x >>= 8
prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
}
x >>= 8
currHash := hash(uint32(x))
candidates := e.table[currHash]
cv = uint32(x)
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur, val: cv},
}
// Check both candidates
candidate = candidates.Cur
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
}
} else {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
}
}
}
cv = uint32(x >> 8)
s++
break
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

212
vendor/github.com/klauspost/compress/flate/level4.go generated vendored Normal file
View file

@ -0,0 +1,212 @@
package flate
import "fmt"
type fastEncL4 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntry
}
func (e *fastEncL4) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntry{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.bTable[i].offset = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var t int32
for {
nextHashS := hash4x64(cv, tableBits)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHashS] = entry
e.bTable[nextHashL] = entry
t = lCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.val {
// We got a long match. Use that.
break
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
// Found a 4 match...
lCandidate = e.bTable[hash7(next, tableBits)]
// If the next long is a candidate, check if we should use that instead...
lOff := nextS - (lCandidate.offset - e.cur)
if lOff < maxMatchOffset && lCandidate.val == uint32(next) {
l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
if l2 > l1 {
s = nextS
t = lCandidate.offset - e.cur
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if t >= s {
panic("s-t")
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index first pair after match end.
if int(s+8) < len(src) {
cv := load6432(src, s)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)}
}
goto emitRemainder
}
// Store every 3rd hash in-between
if true {
i := nextS
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2
i += 3
for ; i < s-1; i += 3 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)}
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

279
vendor/github.com/klauspost/compress/flate/level5.go generated vendored Normal file
View file

@ -0,0 +1,279 @@
package flate
import "fmt"
type fastEncL5 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL5) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
for {
const skipLog = 6
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hash4x64(cv, tableBits)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
nextHashS = hash4x64(next, tableBits)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
// If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// Store every 3rd hash in-between.
if true {
const hashEvery = 3
i := s - l + 1
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
e.table[hash4x64(cv, tableBits)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1
cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)}
e.table[hash4x64(cv, tableBits)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
for ; i < s-1; i += hashEvery {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hash4u(t2.val, tableBits)] = t2
}
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)}
eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur
cv = x >> 8
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

282
vendor/github.com/klauspost/compress/flate/level6.go generated vendored Normal file
View file

@ -0,0 +1,282 @@
package flate
import "fmt"
type fastEncL6 struct {
fastGen
table [tableSize]tableEntry
bTable [tableSize]tableEntryPrev
}
func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.bTable[:] {
e.bTable[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i].offset
if v <= minOff {
v = 0
} else {
v = v - e.cur + maxMatchOffset
}
e.table[i].offset = v
}
for i := range e.bTable[:] {
v := e.bTable[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
v.Prev.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
}
e.bTable[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load6432(src, s)
// Repeat MUST be > 1 and within range
repeat := int32(1)
for {
const skipLog = 7
const doEvery = 1
nextS := s
var l int32
var t int32
for {
nextHashS := hash4x64(cv, tableBits)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
// Fetch a short+long candidate
sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
// Calculate hashes of 'next'
nextHashS = hash4x64(next, tableBits)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val {
// Long candidate matches at least 4 bytes.
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
// Check the previous long candidate as well.
t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l {
t = t2
l = ml1
break
}
}
break
}
// Current value did not match, but check if previous long value does.
t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val {
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
break
}
}
t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val {
// Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4
// Look up next long candidate (at nextS)
lCandidate = e.bTable[nextHashL]
// Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)}
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur
// Check repeat at s + repOff
const repOff = 1
t2 := s - repeat + repOff
if load3232(src, t2) == uint32(cv>>(8*repOff)) {
ml := e.matchlen(s+4+repOff, t2+4, src) + 4
if ml > l {
t = t2
l = ml
s += repOff
// Not worth checking more.
break
}
}
// If the next long is a candidate, use that...
t2 = lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
// This is ok, but check previous as well.
}
}
// If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l {
t = t2
s = nextS
l = ml
break
}
}
}
break
}
cv = next
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
if false {
if t >= s {
panic(fmt.Sprintln("s-t", s, t))
}
if (s - t) > maxMatchOffset {
panic(fmt.Sprintln("mmo", s-t))
}
if l < baseMatchLength {
panic("bml")
}
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
repeat = s - t
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
// Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur
}
goto emitRemainder
}
// Store every long hash in-between and every second short.
if true {
for i := nextS + 1; i < s-1; i += 2 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
e.table[hash4x64(cv, tableBits)] = t
eLong.Cur, eLong.Prev = t, eLong.Cur
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
}
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s.
cv = load6432(src, s)
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

297
vendor/github.com/klauspost/compress/flate/stateless.go generated vendored Normal file
View file

@ -0,0 +1,297 @@
package flate
import (
"io"
"math"
"sync"
)
const (
maxStatelessBlock = math.MaxInt16
// dictionary will be taken from maxStatelessBlock, so limit it.
maxStatelessDict = 8 << 10
slTableBits = 13
slTableSize = 1 << slTableBits
slTableShift = 32 - slTableBits
)
type statelessWriter struct {
dst io.Writer
closed bool
}
func (s *statelessWriter) Close() error {
if s.closed {
return nil
}
s.closed = true
// Emit EOF block
return StatelessDeflate(s.dst, nil, true, nil)
}
func (s *statelessWriter) Write(p []byte) (n int, err error) {
err = StatelessDeflate(s.dst, p, false, nil)
if err != nil {
return 0, err
}
return len(p), nil
}
func (s *statelessWriter) Reset(w io.Writer) {
s.dst = w
s.closed = false
}
// NewStatelessWriter will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
func NewStatelessWriter(dst io.Writer) io.WriteCloser {
return &statelessWriter{dst: dst}
}
// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
New: func() interface{} {
return newHuffmanBitWriter(nil)
},
}
// StatelessDeflate allows to compress directly to a Writer without retaining state.
// When returning everything will be flushed.
// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
var dst tokens
bw := bitWriterPool.Get().(*huffmanBitWriter)
bw.reset(out)
defer func() {
// don't keep a reference to our output
bw.reset(nil)
bitWriterPool.Put(bw)
}()
if eof && len(in) == 0 {
// Just write an EOF block.
// Could be faster...
bw.writeStoredHeader(0, true)
bw.flush()
return bw.err
}
// Truncate dict
if len(dict) > maxStatelessDict {
dict = dict[len(dict)-maxStatelessDict:]
}
for len(in) > 0 {
todo := in
if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
// combine dict and source
bufLen := len(todo) + len(dict)
combined := make([]byte, bufLen)
copy(combined, dict)
copy(combined[len(dict):], todo)
todo = combined
}
// Compress
statelessEnc(&dst, todo, int16(len(dict)))
isEof := eof && len(in) == 0
if dst.n == 0 {
bw.writeStoredHeader(len(uncompressed), isEof)
if bw.err != nil {
return bw.err
}
bw.writeBytes(uncompressed)
} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
// If we removed less than 1/16th, huffman compress the block.
bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
} else {
bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
}
if len(in) > 0 {
// Retain a dict if we have more
dict = todo[len(todo)-maxStatelessDict:]
dst.Reset()
}
if bw.err != nil {
return bw.err
}
}
if !eof {
// Align, only a stored block can do that.
bw.writeStoredHeader(0, false)
}
bw.flush()
return bw.err
}
func hashSL(u uint32) uint32 {
return (u * 0x1e35a7bd) >> slTableShift
}
func load3216(b []byte, i int16) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load6416(b []byte, i int16) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
func statelessEnc(dst *tokens, src []byte, startAt int16) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
type tableEntry struct {
offset int16
}
var table [slTableSize]tableEntry
// This check isn't in the Snappy implementation, but there, the caller
// instead of the callee handles this case.
if len(src)-int(startAt) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = 0
return
}
// Index until startAt
if startAt > 0 {
cv := load3232(src, 0)
for i := int16(0); i < startAt; i++ {
table[hashSL(cv)] = tableEntry{offset: i}
cv = (cv >> 8) | (uint32(src[i+4]) << 24)
}
}
s := startAt + 1
nextEmit := startAt
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int16(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3216(src, s)
for {
const skipLog = 5
const doEvery = 2
nextS := s
var candidate tableEntry
for {
nextHash := hashSL(cv)
candidate = table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit || nextS <= 0 {
goto emitRemainder
}
now := load6416(src, nextS)
table[nextHash] = tableEntry{offset: s}
nextHash = hashSL(uint32(now))
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
// Do one right away...
cv = uint32(now)
s = nextS
nextS++
candidate = table[nextHash]
now >>= 8
table[nextHash] = tableEntry{offset: s}
if cv == load3216(src, candidate.offset) {
table[nextHash] = tableEntry{offset: nextS}
break
}
cv = uint32(now)
s = nextS
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
t := candidate.offset
l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
// Save the match found
dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load6416(src, s-2)
o := s - 2
prevHash := hashSL(uint32(x))
table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hashSL(uint32(x))
candidate = table[currHash]
table[currHash] = tableEntry{offset: o + 2}
if uint32(x) != load3216(src, candidate.offset) {
cv = uint32(x >> 8)
s++
break
}
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}

375
vendor/github.com/klauspost/compress/flate/token.go generated vendored Normal file
View file

@ -0,0 +1,375 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
)
const (
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
// 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
// is lengthCodes[length - MIN_MATCH_LENGTH]
var lengthCodes = [256]uint8{
0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 28,
}
// lengthCodes1 is length codes, but starting at 1.
var lengthCodes1 = [256]uint8{
1, 2, 3, 4, 5, 6, 7, 8, 9, 9,
10, 10, 11, 11, 12, 12, 13, 13, 13, 13,
14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
16, 16, 17, 17, 17, 17, 17, 17, 17, 17,
18, 18, 18, 18, 18, 18, 18, 18, 19, 19,
19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
20, 20, 20, 20, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 29,
}
var offsetCodes = [256]uint32{
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
}
// offsetCodes14 are offsetCodes, but with 14 added.
var offsetCodes14 = [256]uint32{
14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
}
type token uint32
type tokens struct {
nLits int
extraHist [32]uint16 // codes 256->maxnumlit
offHist [32]uint16 // offset codes
litHist [256]uint16 // codes 0->255
n uint16 // Must be able to contain maxStoreBlockSize
tokens [maxStoreBlockSize + 1]token
}
func (t *tokens) Reset() {
if t.n == 0 {
return
}
t.n = 0
t.nLits = 0
for i := range t.litHist[:] {
t.litHist[i] = 0
}
for i := range t.extraHist[:] {
t.extraHist[i] = 0
}
for i := range t.offHist[:] {
t.offHist[i] = 0
}
}
func (t *tokens) Fill() {
if t.n == 0 {
return
}
for i, v := range t.litHist[:] {
if v == 0 {
t.litHist[i] = 1
t.nLits++
}
}
for i, v := range t.extraHist[:literalCount-256] {
if v == 0 {
t.nLits++
t.extraHist[i] = 1
}
}
for i, v := range t.offHist[:offsetCodeCount] {
if v == 0 {
t.offHist[i] = 1
}
}
}
func indexTokens(in []token) tokens {
var t tokens
t.indexTokens(in)
return t
}
func (t *tokens) indexTokens(in []token) {
t.Reset()
for _, tok := range in {
if tok < matchType {
t.AddLiteral(tok.literal())
continue
}
t.AddMatch(uint32(tok.length()), tok.offset())
}
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst *tokens, lit []byte) {
ol := int(dst.n)
for i, v := range lit {
dst.tokens[(i+ol)&maxStoreBlockSize] = token(v)
dst.litHist[v]++
}
dst.n += uint16(len(lit))
dst.nLits += len(lit)
}
func (t *tokens) AddLiteral(lit byte) {
t.tokens[t.n] = token(lit)
t.litHist[lit]++
t.n++
t.nLits++
}
// from https://stackoverflow.com/a/28730362
func mFastLog2(val float32) float32 {
ux := int32(math.Float32bits(val))
log2 := (float32)(((ux >> 23) & 255) - 128)
ux &= -0x7f800001
ux += 127 << 23
uval := math.Float32frombits(uint32(ux))
log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
return log2
}
// EstimatedBits will return an minimum size estimated by an *optimal*
// compression of the block.
// The size of the block
func (t *tokens) EstimatedBits() int {
shannon := float32(0)
bits := int(0)
nMatches := 0
if t.nLits > 0 {
invTotal := 1.0 / float32(t.nLits)
for _, v := range t.litHist[:] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
}
}
// Just add 15 for EOB
shannon += 15
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
}
}
if nMatches > 0 {
invTotal := 1.0 / float32(nMatches)
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
}
return int(shannon) + bits
}
// AddMatch adds a match to the tokens.
// This function is very sensitive to inlining and right on the border.
func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
if debugDecode {
if xlength >= maxMatchLength+baseMatchLength {
panic(fmt.Errorf("invalid length: %v", xlength))
}
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
t.nLits++
lengthCode := lengthCodes1[uint8(xlength)] & 31
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.offHist[offsetCode(xoffset)&31]++
t.n++
}
// AddMatchLong adds a match to the tokens, potentially longer than max match length.
// Length should NOT have the base subtracted, only offset should.
func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
if debugDecode {
if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oc := offsetCode(xoffset) & 31
for xlength > 0 {
xl := xlength
if xl > 258 {
// We need to have at least baseMatchLength left over for next loop.
xl = 258 - baseMatchLength
}
xlength -= xl
xl -= 3
t.nLits++
lengthCode := lengthCodes1[uint8(xl)] & 31
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.offHist[oc]++
t.n++
}
}
func (t *tokens) AddEOB() {
t.tokens[t.n] = token(endBlockMarker)
t.extraHist[0]++
t.n++
}
func (t *tokens) Slice() []token {
return t.tokens[:t.n]
}
// VarInt returns the tokens as varint encoded bytes.
func (t *tokens) VarInt() []byte {
var b = make([]byte, binary.MaxVarintLen32*int(t.n))
var off int
for _, v := range t.tokens[:t.n] {
off += binary.PutUvarint(b[off:], uint64(v))
}
return b[:off]
}
// FromVarInt restores t to the varint encoded tokens provided.
// Any data in t is removed.
func (t *tokens) FromVarInt(b []byte) error {
var buf = bytes.NewReader(b)
var toks []token
for {
r, err := binary.ReadUvarint(buf)
if err == io.EOF {
break
}
if err != nil {
return err
}
toks = append(toks, token(r))
}
t.indexTokens(toks)
return nil
}
// Returns the type of a token
func (t token) typ() uint32 { return uint32(t) & typeMask }
// Returns the literal of a literal token
func (t token) literal() uint8 { return uint8(t) }
// Returns the extra offset of a match token
func (t token) offset() uint32 { return uint32(t) & offsetMask }
func (t token) length() uint8 { return uint8(t >> lengthShift) }
// The code is never more than 8 bits, but is returned as uint32 for convenience.
func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }
// Returns the offset code corresponding to a specific offset
func offsetCode(off uint32) uint32 {
if false {
if off < uint32(len(offsetCodes)) {
return offsetCodes[off&255]
} else if off>>7 < uint32(len(offsetCodes)) {
return offsetCodes[(off>>7)&255] + 14
} else {
return offsetCodes[(off>>14)&255] + 28
}
}
if off < uint32(len(offsetCodes)) {
return offsetCodes[uint8(off)]
}
return offsetCodes14[uint8(off>>7)]
}

344
vendor/github.com/klauspost/compress/gzip/gunzip.go generated vendored Normal file
View file

@ -0,0 +1,344 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gzip implements reading and writing of gzip format compressed files,
// as specified in RFC 1952.
package gzip
import (
"bufio"
"encoding/binary"
"errors"
"hash/crc32"
"io"
"time"
"github.com/klauspost/compress/flate"
)
const (
gzipID1 = 0x1f
gzipID2 = 0x8b
gzipDeflate = 8
flagText = 1 << 0
flagHdrCrc = 1 << 1
flagExtra = 1 << 2
flagName = 1 << 3
flagComment = 1 << 4
)
var (
// ErrChecksum is returned when reading GZIP data that has an invalid checksum.
ErrChecksum = errors.New("gzip: invalid checksum")
// ErrHeader is returned when reading GZIP data that has an invalid header.
ErrHeader = errors.New("gzip: invalid header")
)
var le = binary.LittleEndian
// noEOF converts io.EOF to io.ErrUnexpectedEOF.
func noEOF(err error) error {
if err == io.EOF {
return io.ErrUnexpectedEOF
}
return err
}
// The gzip file stores a header giving metadata about the compressed file.
// That header is exposed as the fields of the Writer and Reader structs.
//
// Strings must be UTF-8 encoded and may only contain Unicode code points
// U+0001 through U+00FF, due to limitations of the GZIP file format.
type Header struct {
Comment string // comment
Extra []byte // "extra data"
ModTime time.Time // modification time
Name string // file name
OS byte // operating system type
}
// A Reader is an io.Reader that can be read to retrieve
// uncompressed data from a gzip-format compressed file.
//
// In general, a gzip file can be a concatenation of gzip files,
// each with its own header. Reads from the Reader
// return the concatenation of the uncompressed data of each.
// Only the first header is recorded in the Reader fields.
//
// Gzip files store a length and checksum of the uncompressed data.
// The Reader will return a ErrChecksum when Read
// reaches the end of the uncompressed data if it does not
// have the expected length or checksum. Clients should treat data
// returned by Read as tentative until they receive the io.EOF
// marking the end of the data.
type Reader struct {
Header // valid after NewReader or Reader.Reset
r flate.Reader
decompressor io.ReadCloser
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
buf [512]byte
err error
multistream bool
}
// NewReader creates a new Reader reading the given reader.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
//
// It is the caller's responsibility to call Close on the Reader when done.
//
// The Reader.Header fields will be valid in the Reader returned.
func NewReader(r io.Reader) (*Reader, error) {
z := new(Reader)
if err := z.Reset(r); err != nil {
return nil, err
}
return z, nil
}
// Reset discards the Reader z's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *Reader) Reset(r io.Reader) error {
*z = Reader{
decompressor: z.decompressor,
multistream: true,
}
if rr, ok := r.(flate.Reader); ok {
z.r = rr
} else {
z.r = bufio.NewReader(r)
}
z.Header, z.err = z.readHeader()
return z.err
}
// Multistream controls whether the reader supports multistream files.
//
// If enabled (the default), the Reader expects the input to be a sequence
// of individually gzipped data streams, each with its own header and
// trailer, ending at EOF. The effect is that the concatenation of a sequence
// of gzipped files is treated as equivalent to the gzip of the concatenation
// of the sequence. This is standard behavior for gzip readers.
//
// Calling Multistream(false) disables this behavior; disabling the behavior
// can be useful when reading file formats that distinguish individual gzip
// data streams or mix gzip data streams with other data streams.
// In this mode, when the Reader reaches the end of the data stream,
// Read returns io.EOF. If the underlying reader implements io.ByteReader,
// it will be left positioned just after the gzip stream.
// To start the next stream, call z.Reset(r) followed by z.Multistream(false).
// If there is no next stream, z.Reset(r) will return io.EOF.
func (z *Reader) Multistream(ok bool) {
z.multistream = ok
}
// readString reads a NUL-terminated string from z.r.
// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and
// will output a string encoded using UTF-8.
// This method always updates z.digest with the data read.
func (z *Reader) readString() (string, error) {
var err error
needConv := false
for i := 0; ; i++ {
if i >= len(z.buf) {
return "", ErrHeader
}
z.buf[i], err = z.r.ReadByte()
if err != nil {
return "", err
}
if z.buf[i] > 0x7f {
needConv = true
}
if z.buf[i] == 0 {
// Digest covers the NUL terminator.
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1])
// Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1).
if needConv {
s := make([]rune, 0, i)
for _, v := range z.buf[:i] {
s = append(s, rune(v))
}
return string(s), nil
}
return string(z.buf[:i]), nil
}
}
}
// readHeader reads the GZIP header according to section 2.3.1.
// This method does not set z.err.
func (z *Reader) readHeader() (hdr Header, err error) {
if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil {
// RFC 1952, section 2.2, says the following:
// A gzip file consists of a series of "members" (compressed data sets).
//
// Other than this, the specification does not clarify whether a
// "series" is defined as "one or more" or "zero or more". To err on the
// side of caution, Go interprets this to mean "zero or more".
// Thus, it is okay to return io.EOF here.
return hdr, err
}
if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate {
return hdr, ErrHeader
}
flg := z.buf[3]
hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0)
// z.buf[8] is XFL and is currently ignored.
hdr.OS = z.buf[9]
z.digest = crc32.ChecksumIEEE(z.buf[:10])
if flg&flagExtra != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2])
data := make([]byte, le.Uint16(z.buf[:2]))
if _, err = io.ReadFull(z.r, data); err != nil {
return hdr, noEOF(err)
}
z.digest = crc32.Update(z.digest, crc32.IEEETable, data)
hdr.Extra = data
}
var s string
if flg&flagName != 0 {
if s, err = z.readString(); err != nil {
return hdr, err
}
hdr.Name = s
}
if flg&flagComment != 0 {
if s, err = z.readString(); err != nil {
return hdr, err
}
hdr.Comment = s
}
if flg&flagHdrCrc != 0 {
if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil {
return hdr, noEOF(err)
}
digest := le.Uint16(z.buf[:2])
if digest != uint16(z.digest) {
return hdr, ErrHeader
}
}
z.digest = 0
if z.decompressor == nil {
z.decompressor = flate.NewReader(z.r)
} else {
z.decompressor.(flate.Resetter).Reset(z.r, nil)
}
return hdr, nil
}
// Read implements io.Reader, reading uncompressed bytes from its underlying Reader.
func (z *Reader) Read(p []byte) (n int, err error) {
if z.err != nil {
return 0, z.err
}
n, z.err = z.decompressor.Read(p)
z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n])
z.size += uint32(n)
if z.err != io.EOF {
// In the normal case we return here.
return n, z.err
}
// Finished file; check checksum and size.
if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil {
z.err = noEOF(err)
return n, z.err
}
digest := le.Uint32(z.buf[:4])
size := le.Uint32(z.buf[4:8])
if digest != z.digest || size != z.size {
z.err = ErrChecksum
return n, z.err
}
z.digest, z.size = 0, 0
// File is ok; check if there is another.
if !z.multistream {
return n, io.EOF
}
z.err = nil // Remove io.EOF
if _, z.err = z.readHeader(); z.err != nil {
return n, z.err
}
// Read from next file, if necessary.
if n > 0 {
return n, nil
}
return z.Read(p)
}
// Support the io.WriteTo interface for io.Copy and friends.
func (z *Reader) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
crcWriter := crc32.NewIEEE()
for {
if z.err != nil {
if z.err == io.EOF {
return total, nil
}
return total, z.err
}
// We write both to output and digest.
mw := io.MultiWriter(w, crcWriter)
n, err := z.decompressor.(io.WriterTo).WriteTo(mw)
total += n
z.size += uint32(n)
if err != nil {
z.err = err
return total, z.err
}
// Finished file; check checksum + size.
if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
z.err = err
return total, err
}
z.digest = crcWriter.Sum32()
digest := le.Uint32(z.buf[:4])
size := le.Uint32(z.buf[4:8])
if digest != z.digest || size != z.size {
z.err = ErrChecksum
return total, z.err
}
z.digest, z.size = 0, 0
// File is ok; check if there is another.
if !z.multistream {
return total, nil
}
crcWriter.Reset()
z.err = nil // Remove io.EOF
if _, z.err = z.readHeader(); z.err != nil {
if z.err == io.EOF {
return total, nil
}
return total, z.err
}
}
}
// Close closes the Reader. It does not close the underlying io.Reader.
// In order for the GZIP checksum to be verified, the reader must be
// fully consumed until the io.EOF.
func (z *Reader) Close() error { return z.decompressor.Close() }

269
vendor/github.com/klauspost/compress/gzip/gzip.go generated vendored Normal file
View file

@ -0,0 +1,269 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzip
import (
"errors"
"fmt"
"hash/crc32"
"io"
"github.com/klauspost/compress/flate"
)
// These constants are copied from the flate package, so that code that imports
// "compress/gzip" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
ConstantCompression = flate.ConstantCompression
HuffmanOnly = flate.HuffmanOnly
// StatelessCompression will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
StatelessCompression = -3
)
// A Writer is an io.WriteCloser.
// Writes to a Writer are compressed and written to w.
type Writer struct {
Header // written at first call to Write, Flush, or Close
w io.Writer
level int
wroteHeader bool
compressor *flate.Writer
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
closed bool
buf [10]byte
err error
}
// NewWriter returns a new Writer.
// Writes to the returned writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the WriteCloser when done.
// Writes may be buffered and not flushed until Close.
//
// Callers that wish to set the fields in Writer.Header must do so before
// the first call to Write, Flush, or Close.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevel(w, DefaultCompression)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, or any
// integer value between BestSpeed and BestCompression inclusive. The error
// returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
if level < StatelessCompression || level > BestCompression {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.init(w, level)
return z, nil
}
func (z *Writer) init(w io.Writer, level int) {
compressor := z.compressor
if level != StatelessCompression {
if compressor != nil {
compressor.Reset(w)
}
}
*z = Writer{
Header: Header{
OS: 255, // unknown
},
w: w,
level: level,
compressor: compressor,
}
}
// Reset discards the Writer z's state and makes it equivalent to the
// result of its original state from NewWriter or NewWriterLevel, but
// writing to w instead. This permits reusing a Writer rather than
// allocating a new one.
func (z *Writer) Reset(w io.Writer) {
z.init(w, z.level)
}
// writeBytes writes a length-prefixed byte slice to z.w.
func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
return errors.New("gzip.Write: Extra data is too large")
}
le.PutUint16(z.buf[:2], uint16(len(b)))
_, err := z.w.Write(z.buf[:2])
if err != nil {
return err
}
_, err = z.w.Write(b)
return err
}
// writeString writes a UTF-8 string s in GZIP's format to z.w.
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
func (z *Writer) writeString(s string) (err error) {
// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
needconv := false
for _, v := range s {
if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-Latin-1 header string")
}
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
}
if err != nil {
return err
}
// GZIP strings are NUL-terminated.
z.buf[0] = 0
_, err = z.w.Write(z.buf[:1])
return err
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed until the Writer is closed.
func (z *Writer) Write(p []byte) (int, error) {
if z.err != nil {
return 0, z.err
}
var n int
// Write the GZIP header lazily.
if !z.wroteHeader {
z.wroteHeader = true
z.buf[0] = gzipID1
z.buf[1] = gzipID2
z.buf[2] = gzipDeflate
z.buf[3] = 0
if z.Extra != nil {
z.buf[3] |= 0x04
}
if z.Name != "" {
z.buf[3] |= 0x08
}
if z.Comment != "" {
z.buf[3] |= 0x10
}
le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
z.buf[8] = 4
} else {
z.buf[8] = 0
}
z.buf[9] = z.OS
n, z.err = z.w.Write(z.buf[:10])
if z.err != nil {
return n, z.err
}
if z.Extra != nil {
z.err = z.writeBytes(z.Extra)
if z.err != nil {
return n, z.err
}
}
if z.Name != "" {
z.err = z.writeString(z.Name)
if z.err != nil {
return n, z.err
}
}
if z.Comment != "" {
z.err = z.writeString(z.Comment)
if z.err != nil {
return n, z.err
}
}
if z.compressor == nil && z.level != StatelessCompression {
z.compressor, _ = flate.NewWriter(z.w, z.level)
}
}
z.size += uint32(len(p))
z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
if z.level == StatelessCompression {
return len(p), flate.StatelessDeflate(z.w, p, false, nil)
}
n, z.err = z.compressor.Write(p)
return n, z.err
}
// Flush flushes any pending compressed data to the underlying writer.
//
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet. Flush does
// not return until the data has been written. If the underlying
// writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (z *Writer) Flush() error {
if z.err != nil {
return z.err
}
if z.closed || z.level == StatelessCompression {
return nil
}
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
z.err = z.compressor.Flush()
return z.err
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if z.err != nil {
return z.err
}
if z.closed {
return nil
}
z.closed = true
if !z.wroteHeader {
z.Write(nil)
if z.err != nil {
return z.err
}
}
if z.level == StatelessCompression {
z.err = flate.StatelessDeflate(z.w, nil, true, nil)
} else {
z.err = z.compressor.Close()
}
if z.err != nil {
return z.err
}
le.PutUint32(z.buf[:4], z.digest)
le.PutUint32(z.buf[4:8], z.size)
_, z.err = z.w.Write(z.buf[:8])
return z.err
}

34
vendor/github.com/pierrec/lz4/.gitignore generated vendored Normal file
View file

@ -0,0 +1,34 @@
# Created by https://www.gitignore.io/api/macos
### macOS ###
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# End of https://www.gitignore.io/api/macos
cmd/*/*exe
.idea

24
vendor/github.com/pierrec/lz4/.travis.yml generated vendored Normal file
View file

@ -0,0 +1,24 @@
language: go
env:
- GO111MODULE=off
go:
- 1.9.x
- 1.10.x
- 1.11.x
- 1.12.x
- master
matrix:
fast_finish: true
allow_failures:
- go: master
sudo: false
script:
- go test -v -cpu=2
- go test -v -cpu=2 -race
- go test -v -cpu=2 -tags noasm
- go test -v -cpu=2 -race -tags noasm

28
vendor/github.com/pierrec/lz4/LICENSE generated vendored Normal file
View file

@ -0,0 +1,28 @@
Copyright (c) 2015, Pierre Curto
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of xxHash nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

90
vendor/github.com/pierrec/lz4/README.md generated vendored Normal file
View file

@ -0,0 +1,90 @@
# lz4 : LZ4 compression in pure Go
[![GoDoc](https://godoc.org/github.com/pierrec/lz4?status.svg)](https://godoc.org/github.com/pierrec/lz4)
[![Build Status](https://travis-ci.org/pierrec/lz4.svg?branch=master)](https://travis-ci.org/pierrec/lz4)
[![Go Report Card](https://goreportcard.com/badge/github.com/pierrec/lz4)](https://goreportcard.com/report/github.com/pierrec/lz4)
[![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/pierrec/lz4.svg?style=social)](https://github.com/pierrec/lz4/tags)
## Overview
This package provides a streaming interface to [LZ4 data streams](http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html) as well as low level compress and uncompress functions for LZ4 data blocks.
The implementation is based on the reference C [one](https://github.com/lz4/lz4).
## Install
Assuming you have the go toolchain installed:
```
go get github.com/pierrec/lz4
```
There is a command line interface tool to compress and decompress LZ4 files.
```
go install github.com/pierrec/lz4/cmd/lz4c
```
Usage
```
Usage of lz4c:
-version
print the program version
Subcommands:
Compress the given files or from stdin to stdout.
compress [arguments] [<file name> ...]
-bc
enable block checksum
-l int
compression level (0=fastest)
-sc
disable stream checksum
-size string
block max size [64K,256K,1M,4M] (default "4M")
Uncompress the given files or from stdin to stdout.
uncompress [arguments] [<file name> ...]
```
## Example
```
// Compress and uncompress an input string.
s := "hello world"
r := strings.NewReader(s)
// The pipe will uncompress the data from the writer.
pr, pw := io.Pipe()
zw := lz4.NewWriter(pw)
zr := lz4.NewReader(pr)
go func() {
// Compress the input string.
_, _ = io.Copy(zw, r)
_ = zw.Close() // Make sure the writer is closed
_ = pw.Close() // Terminate the pipe
}()
_, _ = io.Copy(os.Stdout, zr)
// Output:
// hello world
```
## Contributing
Contributions are very welcome for bug fixing, performance improvements...!
- Open an issue with a proper description
- Send a pull request with appropriate test case(s)
## Contributors
Thanks to all [contributors](https://github.com/pierrec/lz4/graphs/contributors) so far!
Special thanks to [@Zariel](https://github.com/Zariel) for his asm implementation of the decoder.
Special thanks to [@klauspost](https://github.com/klauspost) for his work on optimizing the code.

387
vendor/github.com/pierrec/lz4/block.go generated vendored Normal file
View file

@ -0,0 +1,387 @@
package lz4
import (
"encoding/binary"
"fmt"
"math/bits"
)
// blockHash hashes the lower 6 bytes into a value < htSize.
func blockHash(x uint64) uint32 {
const prime6bytes = 227718039650203
return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
}
// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
func CompressBlockBound(n int) int {
return n + n/255 + 16
}
// UncompressBlock uncompresses the source buffer into the destination one,
// and returns the uncompressed size.
//
// The destination buffer must be sized appropriately.
//
// An error is returned if the source data is invalid or the destination buffer is too small.
func UncompressBlock(src, dst []byte) (int, error) {
if len(src) == 0 {
return 0, nil
}
if di := decodeBlock(dst, src); di >= 0 {
return di, nil
}
return 0, ErrInvalidSourceShortBuffer
}
// CompressBlock compresses the source buffer into the destination one.
// This is the fast version of LZ4 compression and also the default one.
// The size of hashTable must be at least 64Kb.
//
// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
//
// An error is returned if the destination buffer is too small.
func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) {
if len(hashTable) < htSize {
return 0, fmt.Errorf("hash table too small, should be at least %d in size", htSize)
}
defer recoverBlock(&err)
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compresssion.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
sn, dn := len(src)-mfLimit, len(dst)
if sn <= 0 || dn == 0 {
return 0, nil
}
// Prove to the compiler the table has at least htSize elements.
// The compiler can see that "uint32() >> hashShift" cannot be out of bounds.
hashTable = hashTable[:htSize]
// si: Current position of the search.
// anchor: Position of the current literals.
var si, di, anchor int
// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
for si < sn {
// Hash the next 6 bytes (sequence)...
match := binary.LittleEndian.Uint64(src[si:])
h := blockHash(match)
h2 := blockHash(match >> 8)
// We check a match at s, s+1 and s+2 and pick the first one we get.
// Checking 3 only requires us to load the source one.
ref := hashTable[h]
ref2 := hashTable[h2]
hashTable[h] = si
hashTable[h2] = si + 1
offset := si - ref
// If offset <= 0 we got an old entry in the hash table.
if offset <= 0 || offset >= winSize || // Out of window.
uint32(match) != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
// No match. Start calculating another hash.
// The processor can usually do this out-of-order.
h = blockHash(match >> 16)
ref = hashTable[h]
// Check the second match at si+1
si += 1
offset = si - ref2
if offset <= 0 || offset >= winSize ||
uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
// No match. Check the third match at si+2
si += 1
offset = si - ref
hashTable[h] = si
if offset <= 0 || offset >= winSize ||
uint32(match>>16) != binary.LittleEndian.Uint32(src[ref:]) {
// Skip one extra byte (at si+3) before we check 3 matches again.
si += 2 + (si-anchor)>>adaptSkipLog
continue
}
}
}
// Match found.
lLen := si - anchor // Literal length.
// We already matched 4 bytes.
mLen := 4
// Extend backwards if we can, reducing literals.
tOff := si - offset - 1
for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
si--
tOff--
lLen--
mLen++
}
// Add the match length, so we continue search at the end.
// Use mLen to store the offset base.
si, mLen = si+mLen, si+minMatch
// Find the longest match by looking by batches of 8 bytes.
for si+8 < sn {
x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
if x == 0 {
si += 8
} else {
// Stop is first non-zero byte.
si += bits.TrailingZeros64(x) >> 3
break
}
}
mLen = si - mLen
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF; l -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(l)
}
di++
// Literals.
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen + 2
anchor = si
// Encode offset.
_ = dst[di] // Bound check elimination.
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(mLen)
di++
}
// Check if we can load next values.
if si >= sn {
break
}
// Hash match end-2
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
hashTable[h] = si - 2
}
if anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
for lLen -= 0xF; lLen >= 0xFF; lLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if di >= anchor {
// Incompressible.
return 0, nil
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}
// blockHash hashes 4 bytes into a value < winSize.
func blockHashHC(x uint32) uint32 {
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
return x * hasher >> (32 - winSizeLog)
}
// CompressBlockHC compresses the source buffer src into the destination dst
// with max search depth (use 0 or negative value for no max).
//
// CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
//
// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
//
// An error is returned if the destination buffer is too small.
func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) {
defer recoverBlock(&err)
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compresssion.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
sn, dn := len(src)-mfLimit, len(dst)
if sn <= 0 || dn == 0 {
return 0, nil
}
var si, di int
// hashTable: stores the last position found for a given hash
// chainTable: stores previous positions for a given hash
var hashTable, chainTable [winSize]int
if depth <= 0 {
depth = winSize
}
anchor := si
for si < sn {
// Hash the next 4 bytes (sequence).
match := binary.LittleEndian.Uint32(src[si:])
h := blockHashHC(match)
// Follow the chain until out of window and give the longest match.
mLen := 0
offset := 0
for next, try := hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next = chainTable[next&winMask] {
// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
// must match to improve on the match length.
if src[next+mLen] != src[si+mLen] {
continue
}
ml := 0
// Compare the current position with a previous with the same hash.
for ml < sn-si {
x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
if x == 0 {
ml += 8
} else {
// Stop is first non-zero byte.
ml += bits.TrailingZeros64(x) >> 3
break
}
}
if ml < minMatch || ml <= mLen {
// Match too small (<minMath) or smaller than the current match.
continue
}
// Found a longer match, keep its position and length.
mLen = ml
offset = si - next
// Try another previous position with the same hash.
try--
}
chainTable[si&winMask] = hashTable[h]
hashTable[h] = si
// No match found.
if mLen == 0 {
si += 1 + (si-anchor)>>adaptSkipLog
continue
}
// Match found.
// Update hash/chain tables with overlapping bytes:
// si already hashed, add everything from si+1 up to the match length.
winStart := si + 1
if ws := si + mLen - winSize; ws > winStart {
winStart = ws
}
for si, ml := winStart, si+mLen; si < ml; {
match >>= 8
match |= uint32(src[si+3]) << 24
h := blockHashHC(match)
chainTable[si&winMask] = hashTable[h]
hashTable[h] = si
si++
}
lLen := si - anchor
si += mLen
mLen -= minMatch // Match length does not include minMatch.
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF; l -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(l)
}
di++
// Literals.
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen
anchor = si
// Encode offset.
di += 2
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(mLen)
di++
}
}
if anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
lLen -= 0xF
for ; lLen >= 0xFF; lLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if di >= anchor {
// Incompressible.
return 0, nil
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}

23
vendor/github.com/pierrec/lz4/debug.go generated vendored Normal file
View file

@ -0,0 +1,23 @@
// +build lz4debug
package lz4
import (
"fmt"
"os"
"path/filepath"
"runtime"
)
const debugFlag = true
func debug(args ...interface{}) {
_, file, line, _ := runtime.Caller(1)
file = filepath.Base(file)
f := fmt.Sprintf("LZ4: %s:%d %s", file, line, args[0])
if f[len(f)-1] != '\n' {
f += "\n"
}
fmt.Fprintf(os.Stderr, f, args[1:]...)
}

7
vendor/github.com/pierrec/lz4/debug_stub.go generated vendored Normal file
View file

@ -0,0 +1,7 @@
// +build !lz4debug
package lz4
const debugFlag = false
func debug(args ...interface{}) {}

8
vendor/github.com/pierrec/lz4/decode_amd64.go generated vendored Normal file
View file

@ -0,0 +1,8 @@
// +build !appengine
// +build gc
// +build !noasm
package lz4
//go:noescape
func decodeBlock(dst, src []byte) int

375
vendor/github.com/pierrec/lz4/decode_amd64.s generated vendored Normal file
View file

@ -0,0 +1,375 @@
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// AX scratch
// BX scratch
// CX scratch
// DX token
//
// DI &dst
// SI &src
// R8 &dst + len(dst)
// R9 &src + len(src)
// R11 &dst
// R12 short output end
// R13 short input end
// func decodeBlock(dst, src []byte) int
// using 50 bytes of stack currently
TEXT ·decodeBlock(SB), NOSPLIT, $64-56
MOVQ dst_base+0(FP), DI
MOVQ DI, R11
MOVQ dst_len+8(FP), R8
ADDQ DI, R8
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R9
ADDQ SI, R9
// shortcut ends
// short output end
MOVQ R8, R12
SUBQ $32, R12
// short input end
MOVQ R9, R13
SUBQ $16, R13
loop:
// for si < len(src)
CMPQ SI, R9
JGE end
// token := uint32(src[si])
MOVBQZX (SI), DX
INCQ SI
// lit_len = token >> 4
// if lit_len > 0
// CX = lit_len
MOVQ DX, CX
SHRQ $4, CX
// if lit_len != 0xF
CMPQ CX, $0xF
JEQ lit_len_loop_pre
CMPQ DI, R12
JGE lit_len_loop_pre
CMPQ SI, R13
JGE lit_len_loop_pre
// copy shortcut
// A two-stage shortcut for the most common case:
// 1) If the literal length is 0..14, and there is enough space,
// enter the shortcut and copy 16 bytes on behalf of the literals
// (in the fast mode, only 8 bytes can be safely copied this way).
// 2) Further if the match length is 4..18, copy 18 bytes in a similar
// manner; but we ensure that there's enough space in the output for
// those 18 bytes earlier, upon entering the shortcut (in other words,
// there is a combined check for both stages).
// copy literal
MOVOU (SI), X0
MOVOU X0, (DI)
ADDQ CX, DI
ADDQ CX, SI
MOVQ DX, CX
ANDQ $0xF, CX
// The second stage: prepare for match copying, decode full info.
// If it doesn't work out, the info won't be wasted.
// offset := uint16(data[:2])
MOVWQZX (SI), DX
ADDQ $2, SI
MOVQ DI, AX
SUBQ DX, AX
CMPQ AX, DI
JGT err_short_buf
// if we can't do the second stage then jump straight to read the
// match length, we already have the offset.
CMPQ CX, $0xF
JEQ match_len_loop_pre
CMPQ DX, $8
JLT match_len_loop_pre
CMPQ AX, R11
JLT err_short_buf
// memcpy(op + 0, match + 0, 8);
MOVQ (AX), BX
MOVQ BX, (DI)
// memcpy(op + 8, match + 8, 8);
MOVQ 8(AX), BX
MOVQ BX, 8(DI)
// memcpy(op +16, match +16, 2);
MOVW 16(AX), BX
MOVW BX, 16(DI)
ADDQ $4, DI // minmatch
ADDQ CX, DI
// shortcut complete, load next token
JMP loop
lit_len_loop_pre:
// if lit_len > 0
CMPQ CX, $0
JEQ offset
CMPQ CX, $0xF
JNE copy_literal
lit_len_loop:
// for src[si] == 0xFF
CMPB (SI), $0xFF
JNE lit_len_finalise
// bounds check src[si+1]
MOVQ SI, AX
ADDQ $1, AX
CMPQ AX, R9
JGT err_short_buf
// lit_len += 0xFF
ADDQ $0xFF, CX
INCQ SI
JMP lit_len_loop
lit_len_finalise:
// lit_len += int(src[si])
// si++
MOVBQZX (SI), AX
ADDQ AX, CX
INCQ SI
copy_literal:
// bounds check src and dst
MOVQ SI, AX
ADDQ CX, AX
CMPQ AX, R9
JGT err_short_buf
MOVQ DI, AX
ADDQ CX, AX
CMPQ AX, R8
JGT err_short_buf
// whats a good cut off to call memmove?
CMPQ CX, $16
JGT memmove_lit
// if len(dst[di:]) < 16
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
JLT memmove_lit
// if len(src[si:]) < 16
MOVQ R9, AX
SUBQ SI, AX
CMPQ AX, $16
JLT memmove_lit
MOVOU (SI), X0
MOVOU X0, (DI)
JMP finish_lit_copy
memmove_lit:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ CX, 16(SP)
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP) // need len to inc SI, DI after
MOVB DX, 48(SP)
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
MOVB 48(SP), DX
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
finish_lit_copy:
ADDQ CX, SI
ADDQ CX, DI
CMPQ SI, R9
JGE end
offset:
// CX := mLen
// free up DX to use for offset
MOVQ DX, CX
MOVQ SI, AX
ADDQ $2, AX
CMPQ AX, R9
JGT err_short_buf
// offset
// DX := int(src[si]) | int(src[si+1])<<8
MOVWQZX (SI), DX
ADDQ $2, SI
// 0 offset is invalid
CMPQ DX, $0
JEQ err_corrupt
ANDB $0xF, CX
match_len_loop_pre:
// if mlen != 0xF
CMPB CX, $0xF
JNE copy_match
match_len_loop:
// for src[si] == 0xFF
// lit_len += 0xFF
CMPB (SI), $0xFF
JNE match_len_finalise
// bounds check src[si+1]
MOVQ SI, AX
ADDQ $1, AX
CMPQ AX, R9
JGT err_short_buf
ADDQ $0xFF, CX
INCQ SI
JMP match_len_loop
match_len_finalise:
// lit_len += int(src[si])
// si++
MOVBQZX (SI), AX
ADDQ AX, CX
INCQ SI
copy_match:
// mLen += minMatch
ADDQ $4, CX
// check we have match_len bytes left in dst
// di+match_len < len(dst)
MOVQ DI, AX
ADDQ CX, AX
CMPQ AX, R8
JGT err_short_buf
// DX = offset
// CX = match_len
// BX = &dst + (di - offset)
MOVQ DI, BX
SUBQ DX, BX
// check BX is within dst
// if BX < &dst
CMPQ BX, R11
JLT err_short_buf
// if offset + match_len < di
MOVQ BX, AX
ADDQ CX, AX
CMPQ DI, AX
JGT copy_interior_match
// AX := len(dst[:di])
// MOVQ DI, AX
// SUBQ R11, AX
// copy 16 bytes at a time
// if di-offset < 16 copy 16-(di-offset) bytes to di
// then do the remaining
copy_match_loop:
// for match_len >= 0
// dst[di] = dst[i]
// di++
// i++
MOVB (BX), AX
MOVB AX, (DI)
INCQ DI
INCQ BX
DECQ CX
CMPQ CX, $0
JGT copy_match_loop
JMP loop
copy_interior_match:
CMPQ CX, $16
JGT memmove_match
// if len(dst[di:]) < 16
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
JLT memmove_match
MOVOU (BX), X0
MOVOU X0, (DI)
ADDQ CX, DI
JMP loop
memmove_match:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ BX, 8(SP)
MOVQ CX, 16(SP)
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP) // need len to inc SI, DI after
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11 // TODO: make these sensible numbers
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
ADDQ CX, DI
JMP loop
err_corrupt:
MOVQ $-1, ret+48(FP)
RET
err_short_buf:
MOVQ $-2, ret+48(FP)
RET
end:
SUBQ R11, DI
MOVQ DI, ret+48(FP)
RET

98
vendor/github.com/pierrec/lz4/decode_other.go generated vendored Normal file
View file

@ -0,0 +1,98 @@
// +build !amd64 appengine !gc noasm
package lz4
func decodeBlock(dst, src []byte) (ret int) {
const hasError = -2
defer func() {
if recover() != nil {
ret = hasError
}
}()
var si, di int
for {
// Literals and match lengths (token).
b := int(src[si])
si++
// Literals.
if lLen := b >> 4; lLen > 0 {
switch {
case lLen < 0xF && si+16 < len(src):
// Shortcut 1
// if we have enough room in src and dst, and the literals length
// is small enough (0..14) then copy all 16 bytes, even if not all
// are part of the literals.
copy(dst[di:], src[si:si+16])
si += lLen
di += lLen
if mLen := b & 0xF; mLen < 0xF {
// Shortcut 2
// if the match length (4..18) fits within the literals, then copy
// all 18 bytes, even if not all are part of the literals.
mLen += 4
if offset := int(src[si]) | int(src[si+1])<<8; mLen <= offset {
i := di - offset
end := i + 18
if end > len(dst) {
// The remaining buffer may not hold 18 bytes.
// See https://github.com/pierrec/lz4/issues/51.
end = len(dst)
}
copy(dst[di:], dst[i:end])
si += 2
di += mLen
continue
}
}
case lLen == 0xF:
for src[si] == 0xFF {
lLen += 0xFF
si++
}
lLen += int(src[si])
si++
fallthrough
default:
copy(dst[di:di+lLen], src[si:si+lLen])
si += lLen
di += lLen
}
}
if si >= len(src) {
return di
}
offset := int(src[si]) | int(src[si+1])<<8
if offset == 0 {
return hasError
}
si += 2
// Match.
mLen := b & 0xF
if mLen == 0xF {
for src[si] == 0xFF {
mLen += 0xFF
si++
}
mLen += int(src[si])
si++
}
mLen += minMatch
// Copy the match.
expanded := dst[di-offset:]
if mLen > offset {
// Efficiently copy the match dst[di-offset:di] into the dst slice.
bytesToCopy := offset * (mLen / offset)
for n := offset; n <= bytesToCopy+offset; n *= 2 {
copy(expanded[n:], expanded[:n])
}
di += bytesToCopy
mLen -= bytesToCopy
}
di += copy(dst[di:di+mLen], expanded[:mLen])
}
}

30
vendor/github.com/pierrec/lz4/errors.go generated vendored Normal file
View file

@ -0,0 +1,30 @@
package lz4
import (
"errors"
"fmt"
"os"
rdebug "runtime/debug"
)
var (
// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
// block is corrupted or the destination buffer is not large enough for the uncompressed data.
ErrInvalidSourceShortBuffer = errors.New("lz4: invalid source or destination buffer too short")
// ErrInvalid is returned when reading an invalid LZ4 archive.
ErrInvalid = errors.New("lz4: bad magic number")
// ErrBlockDependency is returned when attempting to decompress an archive created with block dependency.
ErrBlockDependency = errors.New("lz4: block dependency not supported")
// ErrUnsupportedSeek is returned when attempting to Seek any way but forward from the current position.
ErrUnsupportedSeek = errors.New("lz4: can only seek forward from io.SeekCurrent")
)
func recoverBlock(e *error) {
if r := recover(); r != nil && *e == nil {
if debugFlag {
fmt.Fprintln(os.Stderr, r)
rdebug.PrintStack()
}
*e = ErrInvalidSourceShortBuffer
}
}

View file

@ -0,0 +1,223 @@
// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
// (https://github.com/Cyan4973/XXH/)
package xxh32
import (
"encoding/binary"
)
const (
prime1 uint32 = 2654435761
prime2 uint32 = 2246822519
prime3 uint32 = 3266489917
prime4 uint32 = 668265263
prime5 uint32 = 374761393
primeMask = 0xFFFFFFFF
prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535
)
// XXHZero represents an xxhash32 object with seed 0.
type XXHZero struct {
v1 uint32
v2 uint32
v3 uint32
v4 uint32
totalLen uint64
buf [16]byte
bufused int
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (xxh XXHZero) Sum(b []byte) []byte {
h32 := xxh.Sum32()
return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
}
// Reset resets the Hash to its initial state.
func (xxh *XXHZero) Reset() {
xxh.v1 = prime1plus2
xxh.v2 = prime2
xxh.v3 = 0
xxh.v4 = prime1minus
xxh.totalLen = 0
xxh.bufused = 0
}
// Size returns the number of bytes returned by Sum().
func (xxh *XXHZero) Size() int {
return 4
}
// BlockSize gives the minimum number of bytes accepted by Write().
func (xxh *XXHZero) BlockSize() int {
return 1
}
// Write adds input bytes to the Hash.
// It never returns an error.
func (xxh *XXHZero) Write(input []byte) (int, error) {
if xxh.totalLen == 0 {
xxh.Reset()
}
n := len(input)
m := xxh.bufused
xxh.totalLen += uint64(n)
r := len(xxh.buf) - m
if n < r {
copy(xxh.buf[m:], input)
xxh.bufused += len(input)
return n, nil
}
p := 0
// Causes compiler to work directly from registers instead of stack:
v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4
if m > 0 {
// some data left from previous update
copy(xxh.buf[xxh.bufused:], input[:r])
xxh.bufused += len(input) - r
// fast rotl(13)
buf := xxh.buf[:16] // BCE hint.
v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
p = r
xxh.bufused = 0
}
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4
copy(xxh.buf[xxh.bufused:], input[p:])
xxh.bufused += len(input) - p
return n, nil
}
// Sum32 returns the 32 bits Hash value.
func (xxh *XXHZero) Sum32() uint32 {
h32 := uint32(xxh.totalLen)
if h32 >= 16 {
h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4)
} else {
h32 += prime5
}
p := 0
n := xxh.bufused
buf := xxh.buf
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for ; p < n; p++ {
h32 += uint32(buf[p]) * prime5
h32 = rol11(h32) * prime1
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// ChecksumZero returns the 32bits Hash value.
func ChecksumZero(input []byte) uint32 {
n := len(input)
h32 := uint32(n)
if n < 16 {
h32 += prime5
} else {
v1 := prime1plus2
v2 := prime2
v3 := uint32(0)
v4 := prime1minus
p := 0
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
input = input[p:]
n -= p
h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
}
p := 0
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for p < n {
h32 += uint32(input[p]) * prime5
h32 = rol11(h32) * prime1
p++
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// Uint32Zero hashes x with seed 0.
func Uint32Zero(x uint32) uint32 {
h := prime5 + 4 + x*prime3
h = rol17(h) * prime4
h ^= h >> 15
h *= prime2
h ^= h >> 13
h *= prime3
h ^= h >> 16
return h
}
func rol1(u uint32) uint32 {
return u<<1 | u>>31
}
func rol7(u uint32) uint32 {
return u<<7 | u>>25
}
func rol11(u uint32) uint32 {
return u<<11 | u>>21
}
func rol12(u uint32) uint32 {
return u<<12 | u>>20
}
func rol13(u uint32) uint32 {
return u<<13 | u>>19
}
func rol17(u uint32) uint32 {
return u<<17 | u>>15
}
func rol18(u uint32) uint32 {
return u<<18 | u>>14
}

113
vendor/github.com/pierrec/lz4/lz4.go generated vendored Normal file
View file

@ -0,0 +1,113 @@
// Package lz4 implements reading and writing lz4 compressed data (a frame),
// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html.
//
// Although the block level compression and decompression functions are exposed and are fully compatible
// with the lz4 block format definition, they are low level and should not be used directly.
// For a complete description of an lz4 compressed block, see:
// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
//
// See https://github.com/Cyan4973/lz4 for the reference C implementation.
//
package lz4
import "math/bits"
import "sync"
const (
// Extension is the LZ4 frame file name extension
Extension = ".lz4"
// Version is the LZ4 frame format version
Version = 1
frameMagic uint32 = 0x184D2204
frameSkipMagic uint32 = 0x184D2A50
// The following constants are used to setup the compression algorithm.
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
winSizeLog = 16 // LZ4 64Kb window size limit
winSize = 1 << winSizeLog
winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
compressedBlockFlag = 1 << 31
compressedBlockMask = compressedBlockFlag - 1
// hashLog determines the size of the hash table used to quickly find a previous match position.
// Its value influences the compression speed and memory usage, the lower the faster,
// but at the expense of the compression ratio.
// 16 seems to be the best compromise for fast compression.
hashLog = 16
htSize = 1 << hashLog
mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
)
// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb.
const (
blockSize64K = 1 << (16 + 2*iota)
blockSize256K
blockSize1M
blockSize4M
)
var (
// Keep a pool of buffers for each valid block sizes.
bsMapValue = [...]*sync.Pool{
newBufferPool(2 * blockSize64K),
newBufferPool(2 * blockSize256K),
newBufferPool(2 * blockSize1M),
newBufferPool(2 * blockSize4M),
}
)
// newBufferPool returns a pool for buffers of the given size.
func newBufferPool(size int) *sync.Pool {
return &sync.Pool{
New: func() interface{} {
return make([]byte, size)
},
}
}
// getBuffer returns a buffer to its pool.
func getBuffer(size int) []byte {
idx := blockSizeValueToIndex(size) - 4
return bsMapValue[idx].Get().([]byte)
}
// putBuffer returns a buffer to its pool.
func putBuffer(size int, buf []byte) {
if cap(buf) > 0 {
idx := blockSizeValueToIndex(size) - 4
bsMapValue[idx].Put(buf[:cap(buf)])
}
}
func blockSizeIndexToValue(i byte) int {
return 1 << (16 + 2*uint(i))
}
func isValidBlockSize(size int) bool {
const blockSizeMask = blockSize64K | blockSize256K | blockSize1M | blockSize4M
return size&blockSizeMask > 0 && bits.OnesCount(uint(size)) == 1
}
func blockSizeValueToIndex(size int) byte {
return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2)
}
// Header describes the various flags that can be set on a Writer or obtained from a Reader.
// The default values match those of the LZ4 frame format definition
// (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
//
// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls.
// It is the caller responsibility to check them if necessary.
type Header struct {
BlockChecksum bool // Compressed blocks checksum flag.
NoChecksum bool // Frame checksum flag.
BlockMaxSize int // Size of the uncompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB.
Size uint64 // Frame total size. It is _not_ computed by the Writer.
CompressionLevel int // Compression level (higher is better, use 0 for fastest compression).
done bool // Header processed flag (Read or Write and checked).
}
func (h *Header) Reset() {
h.done = false
}

29
vendor/github.com/pierrec/lz4/lz4_go1.10.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
//+build go1.10
package lz4
import (
"fmt"
"strings"
)
func (h Header) String() string {
var s strings.Builder
s.WriteString(fmt.Sprintf("%T{", h))
if h.BlockChecksum {
s.WriteString("BlockChecksum: true ")
}
if h.NoChecksum {
s.WriteString("NoChecksum: true ")
}
if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 {
s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs))
}
if l := h.CompressionLevel; l != 0 {
s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l))
}
s.WriteByte('}')
return s.String()
}

29
vendor/github.com/pierrec/lz4/lz4_notgo1.10.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
//+build !go1.10
package lz4
import (
"bytes"
"fmt"
)
func (h Header) String() string {
var s bytes.Buffer
s.WriteString(fmt.Sprintf("%T{", h))
if h.BlockChecksum {
s.WriteString("BlockChecksum: true ")
}
if h.NoChecksum {
s.WriteString("NoChecksum: true ")
}
if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 {
s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs))
}
if l := h.CompressionLevel; l != 0 {
s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l))
}
s.WriteByte('}')
return s.String()
}

335
vendor/github.com/pierrec/lz4/reader.go generated vendored Normal file
View file

@ -0,0 +1,335 @@
package lz4
import (
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"github.com/pierrec/lz4/internal/xxh32"
)
// Reader implements the LZ4 frame decoder.
// The Header is set after the first call to Read().
// The Header may change between Read() calls in case of concatenated frames.
type Reader struct {
Header
// Handler called when a block has been successfully read.
// It provides the number of bytes read.
OnBlockDone func(size int)
buf [8]byte // Scrap buffer.
pos int64 // Current position in src.
src io.Reader // Source.
zdata []byte // Compressed data.
data []byte // Uncompressed data.
idx int // Index of unread bytes into data.
checksum xxh32.XXHZero // Frame hash.
skip int64 // Bytes to skip before next read.
dpos int64 // Position in dest
}
// NewReader returns a new LZ4 frame decoder.
// No access to the underlying io.Reader is performed.
func NewReader(src io.Reader) *Reader {
r := &Reader{src: src}
return r
}
// readHeader checks the frame magic number and parses the frame descriptoz.
// Skippable frames are supported even as a first frame although the LZ4
// specifications recommends skippable frames not to be used as first frames.
func (z *Reader) readHeader(first bool) error {
defer z.checksum.Reset()
buf := z.buf[:]
for {
magic, err := z.readUint32()
if err != nil {
z.pos += 4
if !first && err == io.ErrUnexpectedEOF {
return io.EOF
}
return err
}
if magic == frameMagic {
break
}
if magic>>8 != frameSkipMagic>>8 {
return ErrInvalid
}
skipSize, err := z.readUint32()
if err != nil {
return err
}
z.pos += 4
m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
if err != nil {
return err
}
z.pos += m
}
// Header.
if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
return err
}
z.pos += 8
b := buf[0]
if v := b >> 6; v != Version {
return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
}
if b>>5&1 == 0 {
return ErrBlockDependency
}
z.BlockChecksum = b>>4&1 > 0
frameSize := b>>3&1 > 0
z.NoChecksum = b>>2&1 == 0
bmsID := buf[1] >> 4 & 0x7
if bmsID < 4 || bmsID > 7 {
return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
}
bSize := blockSizeIndexToValue(bmsID - 4)
z.BlockMaxSize = bSize
// Allocate the compressed/uncompressed buffers.
// The compressed buffer cannot exceed the uncompressed one.
if n := 2 * bSize; cap(z.zdata) < n {
z.zdata = make([]byte, n, n)
}
if debugFlag {
debug("header block max size id=%d size=%d", bmsID, bSize)
}
z.zdata = z.zdata[:bSize]
z.data = z.zdata[:cap(z.zdata)][bSize:]
z.idx = len(z.data)
_, _ = z.checksum.Write(buf[0:2])
if frameSize {
buf := buf[:8]
if _, err := io.ReadFull(z.src, buf); err != nil {
return err
}
z.Size = binary.LittleEndian.Uint64(buf)
z.pos += 8
_, _ = z.checksum.Write(buf)
}
// Header checksum.
if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
return err
}
z.pos++
if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
}
z.Header.done = true
if debugFlag {
debug("header read: %v", z.Header)
}
return nil
}
// Read decompresses data from the underlying source into the supplied buffer.
//
// Since there can be multiple streams concatenated, Header values may
// change between calls to Read(). If that is the case, no data is actually read from
// the underlying io.Reader, to allow for potential input buffer resizing.
func (z *Reader) Read(buf []byte) (int, error) {
if debugFlag {
debug("Read buf len=%d", len(buf))
}
if !z.Header.done {
if err := z.readHeader(true); err != nil {
return 0, err
}
if debugFlag {
debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
}
}
if len(buf) == 0 {
return 0, nil
}
if z.idx == len(z.data) {
// No data ready for reading, process the next block.
if debugFlag {
debug("reading block from writer")
}
// Reset uncompressed buffer
z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
// Block length: 0 = end of frame, highest bit set: uncompressed.
bLen, err := z.readUint32()
if err != nil {
return 0, err
}
z.pos += 4
if bLen == 0 {
// End of frame reached.
if !z.NoChecksum {
// Validate the frame checksum.
checksum, err := z.readUint32()
if err != nil {
return 0, err
}
if debugFlag {
debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
}
z.pos += 4
if h := z.checksum.Sum32(); checksum != h {
return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
}
}
// Get ready for the next concatenated frame and keep the position.
pos := z.pos
z.Reset(z.src)
z.pos = pos
// Since multiple frames can be concatenated, check for more.
return 0, z.readHeader(false)
}
if debugFlag {
debug("raw block size %d", bLen)
}
if bLen&compressedBlockFlag > 0 {
// Uncompressed block.
bLen &= compressedBlockMask
if debugFlag {
debug("uncompressed block size %d", bLen)
}
if int(bLen) > cap(z.data) {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
z.data = z.data[:bLen]
if _, err := io.ReadFull(z.src, z.data); err != nil {
return 0, err
}
z.pos += int64(bLen)
if z.OnBlockDone != nil {
z.OnBlockDone(int(bLen))
}
if z.BlockChecksum {
checksum, err := z.readUint32()
if err != nil {
return 0, err
}
z.pos += 4
if h := xxh32.ChecksumZero(z.data); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
} else {
// Compressed block.
if debugFlag {
debug("compressed block size %d", bLen)
}
if int(bLen) > cap(z.data) {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
zdata := z.zdata[:bLen]
if _, err := io.ReadFull(z.src, zdata); err != nil {
return 0, err
}
z.pos += int64(bLen)
if z.BlockChecksum {
checksum, err := z.readUint32()
if err != nil {
return 0, err
}
z.pos += 4
if h := xxh32.ChecksumZero(zdata); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
n, err := UncompressBlock(zdata, z.data)
if err != nil {
return 0, err
}
z.data = z.data[:n]
if z.OnBlockDone != nil {
z.OnBlockDone(n)
}
}
if !z.NoChecksum {
_, _ = z.checksum.Write(z.data)
if debugFlag {
debug("current frame checksum %x", z.checksum.Sum32())
}
}
z.idx = 0
}
if z.skip > int64(len(z.data[z.idx:])) {
z.skip -= int64(len(z.data[z.idx:]))
z.dpos += int64(len(z.data[z.idx:]))
z.idx = len(z.data)
return 0, nil
}
z.idx += int(z.skip)
z.dpos += z.skip
z.skip = 0
n := copy(buf, z.data[z.idx:])
z.idx += n
z.dpos += int64(n)
if debugFlag {
debug("copied %d bytes to input", n)
}
return n, nil
}
// Seek implements io.Seeker, but supports seeking forward from the current
// position only. Any other seek will return an error. Allows skipping output
// bytes which aren't needed, which in some scenarios is faster than reading
// and discarding them.
// Note this may cause future calls to Read() to read 0 bytes if all of the
// data they would have returned is skipped.
func (z *Reader) Seek(offset int64, whence int) (int64, error) {
if offset < 0 || whence != io.SeekCurrent {
return z.dpos + z.skip, ErrUnsupportedSeek
}
z.skip += offset
return z.dpos + z.skip, nil
}
// Reset discards the Reader's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *Reader) Reset(r io.Reader) {
z.Header = Header{}
z.pos = 0
z.src = r
z.zdata = z.zdata[:0]
z.data = z.data[:0]
z.idx = 0
z.checksum.Reset()
}
// readUint32 reads an uint32 into the supplied buffer.
// The idea is to make use of the already allocated buffers avoiding additional allocations.
func (z *Reader) readUint32() (uint32, error) {
buf := z.buf[:4]
_, err := io.ReadFull(z.src, buf)
x := binary.LittleEndian.Uint32(buf)
return x, err
}

408
vendor/github.com/pierrec/lz4/writer.go generated vendored Normal file
View file

@ -0,0 +1,408 @@
package lz4
import (
"encoding/binary"
"fmt"
"github.com/pierrec/lz4/internal/xxh32"
"io"
"runtime"
)
// zResult contains the results of compressing a block.
type zResult struct {
size uint32 // Block header
data []byte // Compressed data
checksum uint32 // Data checksum
}
// Writer implements the LZ4 frame encoder.
type Writer struct {
Header
// Handler called when a block has been successfully written out.
// It provides the number of bytes written.
OnBlockDone func(size int)
buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
dst io.Writer // Destination.
checksum xxh32.XXHZero // Frame checksum.
data []byte // Data to be compressed + buffer for compressed data.
idx int // Index into data.
hashtable [winSize]int // Hash table used in CompressBlock().
// For concurrency.
c chan chan zResult // Channel for block compression goroutines and writer goroutine.
err error // Any error encountered while writing to the underlying destination.
}
// NewWriter returns a new LZ4 frame encoder.
// No access to the underlying io.Writer is performed.
// The supplied Header is checked at the first Write.
// It is ok to change it before the first Write but then not until a Reset() is performed.
func NewWriter(dst io.Writer) *Writer {
z := new(Writer)
z.Reset(dst)
return z
}
// WithConcurrency sets the number of concurrent go routines used for compression.
// A negative value sets the concurrency to GOMAXPROCS.
func (z *Writer) WithConcurrency(n int) *Writer {
switch {
case n == 0 || n == 1:
z.c = nil
return z
case n < 0:
n = runtime.GOMAXPROCS(0)
}
z.c = make(chan chan zResult, n)
// Writer goroutine managing concurrent block compression goroutines.
go func() {
// Process next block compression item.
for c := range z.c {
// Read the next compressed block result.
// Waiting here ensures that the blocks are output in the order they were sent.
// The incoming channel is always closed as it indicates to the caller that
// the block has been processed.
res := <-c
n := len(res.data)
if n == 0 {
// Notify the block compression routine that we are done with its result.
// This is used when a sentinel block is sent to terminate the compression.
close(c)
return
}
// Write the block.
if err := z.writeUint32(res.size); err != nil && z.err == nil {
z.err = err
}
if _, err := z.dst.Write(res.data); err != nil && z.err == nil {
z.err = err
}
if z.BlockChecksum {
if err := z.writeUint32(res.checksum); err != nil && z.err == nil {
z.err = err
}
}
if isCompressed := res.size&compressedBlockFlag == 0; isCompressed {
// It is now safe to release the buffer as no longer in use by any goroutine.
putBuffer(cap(res.data), res.data)
}
if h := z.OnBlockDone; h != nil {
h(n)
}
close(c)
}
}()
return z
}
// newBuffers instantiates new buffers which size matches the one in Header.
// The returned buffers are for decompression and compression respectively.
func (z *Writer) newBuffers() {
bSize := z.Header.BlockMaxSize
buf := getBuffer(bSize)
z.data = buf[:bSize] // Uncompressed buffer is the first half.
}
// freeBuffers puts the writer's buffers back to the pool.
func (z *Writer) freeBuffers() {
// Put the buffer back into the pool, if any.
putBuffer(z.Header.BlockMaxSize, z.data)
z.data = nil
}
// writeHeader builds and writes the header (magic+header) to the underlying io.Writer.
func (z *Writer) writeHeader() error {
// Default to 4Mb if BlockMaxSize is not set.
if z.Header.BlockMaxSize == 0 {
z.Header.BlockMaxSize = blockSize4M
}
// The only option that needs to be validated.
bSize := z.Header.BlockMaxSize
if !isValidBlockSize(z.Header.BlockMaxSize) {
return fmt.Errorf("lz4: invalid block max size: %d", bSize)
}
// Allocate the compressed/uncompressed buffers.
// The compressed buffer cannot exceed the uncompressed one.
z.newBuffers()
z.idx = 0
// Size is optional.
buf := z.buf[:]
// Set the fixed size data: magic number, block max size and flags.
binary.LittleEndian.PutUint32(buf[0:], frameMagic)
flg := byte(Version << 6)
flg |= 1 << 5 // No block dependency.
if z.Header.BlockChecksum {
flg |= 1 << 4
}
if z.Header.Size > 0 {
flg |= 1 << 3
}
if !z.Header.NoChecksum {
flg |= 1 << 2
}
buf[4] = flg
buf[5] = blockSizeValueToIndex(z.Header.BlockMaxSize) << 4
// Current buffer size: magic(4) + flags(1) + block max size (1).
n := 6
// Optional items.
if z.Header.Size > 0 {
binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
n += 8
}
// The header checksum includes the flags, block max size and optional Size.
buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
z.checksum.Reset()
// Header ready, write it out.
if _, err := z.dst.Write(buf[0 : n+1]); err != nil {
return err
}
z.Header.done = true
if debugFlag {
debug("wrote header %v", z.Header)
}
return nil
}
// Write compresses data from the supplied buffer into the underlying io.Writer.
// Write does not return until the data has been written.
func (z *Writer) Write(buf []byte) (int, error) {
if !z.Header.done {
if err := z.writeHeader(); err != nil {
return 0, err
}
}
if debugFlag {
debug("input buffer len=%d index=%d", len(buf), z.idx)
}
zn := len(z.data)
var n int
for len(buf) > 0 {
if z.idx == 0 && len(buf) >= zn {
// Avoid a copy as there is enough data for a block.
if err := z.compressBlock(buf[:zn]); err != nil {
return n, err
}
n += zn
buf = buf[zn:]
continue
}
// Accumulate the data to be compressed.
m := copy(z.data[z.idx:], buf)
n += m
z.idx += m
buf = buf[m:]
if debugFlag {
debug("%d bytes copied to buf, current index %d", n, z.idx)
}
if z.idx < len(z.data) {
// Buffer not filled.
if debugFlag {
debug("need more data for compression")
}
return n, nil
}
// Buffer full.
if err := z.compressBlock(z.data); err != nil {
return n, err
}
z.idx = 0
}
return n, nil
}
// compressBlock compresses a block.
func (z *Writer) compressBlock(data []byte) error {
if !z.NoChecksum {
_, _ = z.checksum.Write(data)
}
if z.c != nil {
c := make(chan zResult)
z.c <- c // Send now to guarantee order
go writerCompressBlock(c, z.Header, data)
return nil
}
zdata := z.data[z.Header.BlockMaxSize:cap(z.data)]
// The compressed block size cannot exceed the input's.
var zn int
if level := z.Header.CompressionLevel; level != 0 {
zn, _ = CompressBlockHC(data, zdata, level)
} else {
zn, _ = CompressBlock(data, zdata, z.hashtable[:])
}
var bLen uint32
if debugFlag {
debug("block compression %d => %d", len(data), zn)
}
if zn > 0 && zn < len(data) {
// Compressible and compressed size smaller than uncompressed: ok!
bLen = uint32(zn)
zdata = zdata[:zn]
} else {
// Uncompressed block.
bLen = uint32(len(data)) | compressedBlockFlag
zdata = data
}
if debugFlag {
debug("block compression to be written len=%d data len=%d", bLen, len(zdata))
}
// Write the block.
if err := z.writeUint32(bLen); err != nil {
return err
}
written, err := z.dst.Write(zdata)
if err != nil {
return err
}
if h := z.OnBlockDone; h != nil {
h(written)
}
if !z.BlockChecksum {
if debugFlag {
debug("current frame checksum %x", z.checksum.Sum32())
}
return nil
}
checksum := xxh32.ChecksumZero(zdata)
if debugFlag {
debug("block checksum %x", checksum)
defer func() { debug("current frame checksum %x", z.checksum.Sum32()) }()
}
return z.writeUint32(checksum)
}
// Flush flushes any pending compressed data to the underlying writer.
// Flush does not return until the data has been written.
// If the underlying writer returns an error, Flush returns that error.
func (z *Writer) Flush() error {
if debugFlag {
debug("flush with index %d", z.idx)
}
if z.idx == 0 {
return nil
}
data := z.data[:z.idx]
z.idx = 0
if z.c == nil {
return z.compressBlock(data)
}
if !z.NoChecksum {
_, _ = z.checksum.Write(data)
}
c := make(chan zResult)
z.c <- c
writerCompressBlock(c, z.Header, data)
return nil
}
func (z *Writer) close() error {
if z.c == nil {
return nil
}
// Send a sentinel block (no data to compress) to terminate the writer main goroutine.
c := make(chan zResult)
z.c <- c
c <- zResult{}
// Wait for the main goroutine to complete.
<-c
// At this point the main goroutine has shut down or is about to return.
z.c = nil
return z.err
}
// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if !z.Header.done {
if err := z.writeHeader(); err != nil {
return err
}
}
if err := z.Flush(); err != nil {
return err
}
if err := z.close(); err != nil {
return err
}
z.freeBuffers()
if debugFlag {
debug("writing last empty block")
}
if err := z.writeUint32(0); err != nil {
return err
}
if z.NoChecksum {
return nil
}
checksum := z.checksum.Sum32()
if debugFlag {
debug("stream checksum %x", checksum)
}
return z.writeUint32(checksum)
}
// Reset clears the state of the Writer z such that it is equivalent to its
// initial state from NewWriter, but instead writing to w.
// No access to the underlying io.Writer is performed.
func (z *Writer) Reset(w io.Writer) {
n := cap(z.c)
_ = z.close()
z.freeBuffers()
z.Header.Reset()
z.dst = w
z.checksum.Reset()
z.idx = 0
z.err = nil
z.WithConcurrency(n)
}
// writeUint32 writes a uint32 to the underlying writer.
func (z *Writer) writeUint32(x uint32) error {
buf := z.buf[:4]
binary.LittleEndian.PutUint32(buf, x)
_, err := z.dst.Write(buf)
return err
}
// writerCompressBlock compresses data into a pooled buffer and writes its result
// out to the input channel.
func writerCompressBlock(c chan zResult, header Header, data []byte) {
zdata := getBuffer(header.BlockMaxSize)
// The compressed block size cannot exceed the input's.
var zn int
if level := header.CompressionLevel; level != 0 {
zn, _ = CompressBlockHC(data, zdata, level)
} else {
var hashTable [winSize]int
zn, _ = CompressBlock(data, zdata, hashTable[:])
}
var res zResult
if zn > 0 && zn < len(data) {
res.size = uint32(zn)
res.data = zdata[:zn]
} else {
res.size = uint32(len(data)) | compressedBlockFlag
res.data = data
}
if header.BlockChecksum {
res.checksum = xxh32.ChecksumZero(res.data)
}
c <- res
}

25
vendor/github.com/ulikunitz/xz/.gitignore generated vendored Normal file
View file

@ -0,0 +1,25 @@
# .gitignore
TODO.html
README.html
lzma/writer.txt
lzma/reader.txt
cmd/gxz/gxz
cmd/xb/xb
# test executables
*.test
# profile files
*.out
# vim swap file
.*.swp
# executables on windows
*.exe
# default compression test file
enwik8*

26
vendor/github.com/ulikunitz/xz/LICENSE generated vendored Normal file
View file

@ -0,0 +1,26 @@
Copyright (c) 2014-2020 Ulrich Kunitz
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* My name, Ulrich Kunitz, may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

73
vendor/github.com/ulikunitz/xz/README.md generated vendored Normal file
View file

@ -0,0 +1,73 @@
# Package xz
This Go language package supports the reading and writing of xz
compressed streams. It includes also a gxz command for compressing and
decompressing data. The package is completely written in Go and doesn't
have any dependency on any C code.
The package is currently under development. There might be bugs and APIs
are not considered stable. At this time the package cannot compete with
the xz tool regarding compression speed and size. The algorithms there
have been developed over a long time and are highly optimized. However
there are a number of improvements planned and I'm very optimistic about
parallel compression and decompression. Stay tuned!
## Using the API
The following example program shows how to use the API.
```go
package main
import (
"bytes"
"io"
"log"
"os"
"github.com/ulikunitz/xz"
)
func main() {
const text = "The quick brown fox jumps over the lazy dog.\n"
var buf bytes.Buffer
// compress text
w, err := xz.NewWriter(&buf)
if err != nil {
log.Fatalf("xz.NewWriter error %s", err)
}
if _, err := io.WriteString(w, text); err != nil {
log.Fatalf("WriteString error %s", err)
}
if err := w.Close(); err != nil {
log.Fatalf("w.Close error %s", err)
}
// decompress buffer and write output to stdout
r, err := xz.NewReader(&buf)
if err != nil {
log.Fatalf("NewReader error %s", err)
}
if _, err = io.Copy(os.Stdout, r); err != nil {
log.Fatalf("io.Copy error %s", err)
}
}
```
## Using the gxz compression tool
The package includes a gxz command line utility for compression and
decompression.
Use following command for installation:
$ go get github.com/ulikunitz/xz/cmd/gxz
To test it call the following command.
$ gxz bigfile
After some time a much smaller file bigfile.xz will replace bigfile.
To decompress it use the following command.
$ gxz -d bigfile.xz

332
vendor/github.com/ulikunitz/xz/TODO.md generated vendored Normal file
View file

@ -0,0 +1,332 @@
# TODO list
## Release v0.5.x
1. Support check flag in gxz command.
## Release v0.6
1. Review encoder and check for lzma improvements under xz.
2. Fix binary tree matcher.
3. Compare compression ratio with xz tool using comparable parameters
and optimize parameters
4. Do some optimizations
- rename operation action and make it a simple type of size 8
- make maxMatches, wordSize parameters
- stop searching after a certain length is found (parameter sweetLen)
## Release v0.7
1. Optimize code
2. Do statistical analysis to get linear presets.
3. Test sync.Pool compatability for xz and lzma Writer and Reader
3. Fuzz optimized code.
## Release v0.8
1. Support parallel go routines for writing and reading xz files.
2. Support a ReaderAt interface for xz files with small block sizes.
3. Improve compatibility between gxz and xz
4. Provide manual page for gxz
## Release v0.9
1. Improve documentation
2. Fuzz again
## Release v1.0
1. Full functioning gxz
2. Add godoc URL to README.md (godoc.org)
3. Resolve all issues.
4. Define release candidates.
5. Public announcement.
## Package lzma
### Release v0.6
- Rewrite Encoder into a simple greedy one-op-at-a-time encoder
including
+ simple scan at the dictionary head for the same byte
+ use the killer byte (requiring matches to get longer, the first
test should be the byte that would make the match longer)
## Optimizations
- There may be a lot of false sharing in lzma.State; check whether this
can be improved by reorganizing the internal structure of it.
- Check whether batching encoding and decoding improves speed.
### DAG optimizations
- Use full buffer to create minimal bit-length above range encoder.
- Might be too slow (see v0.4)
### Different match finders
- hashes with 2, 3 characters additional to 4 characters
- binary trees with 2-7 characters (uint64 as key, use uint32 as
pointers into a an array)
- rb-trees with 2-7 characters (uint64 as key, use uint32 as pointers
into an array with bit-steeling for the colors)
## Release Procedure
- execute goch -l for all packages; probably with lower param like 0.5.
- check orthography with gospell
- Write release notes in doc/relnotes.
- Update README.md
- xb copyright . in xz directory to ensure all new files have Copyright
header
- VERSION=<version> go generate github.com/ulikunitz/xz/... to update
version files
- Execute test for Linux/amd64, Linux/x86 and Windows/amd64.
- Update TODO.md - write short log entry
- git checkout master && git merge dev
- git tag -a <version>
- git push
## Log
### 2020-02-24
Release v0.5.7 supports the check-ID None and fixes
[issue #27](https://github.com/ulikunitz/xz/issues/27).
### 2019-02-20
Release v0.5.6 supports the go.mod file.
### 2018-10-28
Release v0.5.5 fixes issues #19 observing ErrLimit outputs.
### 2017-06-05
Release v0.5.4 fixes issues #15 of another problem with the padding size
check for the xz block header. I removed the check completely.
### 2017-02-15
Release v0.5.3 fixes issue #12 regarding the decompression of an empty
XZ stream. Many thanks to Tomasz Kłak, who reported the issue.
### 2016-12-02
Release v0.5.2 became necessary to allow the decoding of xz files with
4-byte padding in the block header. Many thanks to Greg, who reported
the issue.
### 2016-07-23
Release v0.5.1 became necessary to fix problems with 32-bit platforms.
Many thanks to Bruno Brigas, who reported the issue.
### 2016-07-04
Release v0.5 provides improvements to the compressor and provides support for
the decompression of xz files with multiple xz streams.
### 2016-01-31
Another compression rate increase by checking the byte at length of the
best match first, before checking the whole prefix. This makes the
compressor even faster. We have now a large time budget to beat the
compression ratio of the xz tool. For enwik8 we have now over 40 seconds
to reduce the compressed file size for another 7 MiB.
### 2016-01-30
I simplified the encoder. Speed and compression rate increased
dramatically. A high compression rate affects also the decompression
speed. The approach with the buffer and optimizing for operation
compression rate has not been successful. Going for the maximum length
appears to be the best approach.
### 2016-01-28
The release v0.4 is ready. It provides a working xz implementation,
which is rather slow, but works and is interoperable with the xz tool.
It is an important milestone.
### 2016-01-10
I have the first working implementation of an xz reader and writer. I'm
happy about reaching this milestone.
### 2015-12-02
I'm now ready to implement xz because, I have a working LZMA2
implementation. I decided today that v0.4 will use the slow encoder
using the operations buffer to be able to go back, if I intend to do so.
### 2015-10-21
I have restarted the work on the library. While trying to implement
LZMA2, I discovered that I need to resimplify the encoder and decoder
functions. The option approach is too complicated. Using a limited byte
writer and not caring for written bytes at all and not to try to handle
uncompressed data simplifies the LZMA encoder and decoder much.
Processing uncompressed data and handling limits is a feature of the
LZMA2 format not of LZMA.
I learned an interesting method from the LZO format. If the last copy is
too far away they are moving the head one 2 bytes and not 1 byte to
reduce processing times.
### 2015-08-26
I have now reimplemented the lzma package. The code is reasonably fast,
but can still be optimized. The next step is to implement LZMA2 and then
xz.
### 2015-07-05
Created release v0.3. The version is the foundation for a full xz
implementation that is the target of v0.4.
### 2015-06-11
The gflag package has been developed because I couldn't use flag and
pflag for a fully compatible support of gzip's and lzma's options. It
seems to work now quite nicely.
### 2015-06-05
The overflow issue was interesting to research, however Henry S. Warren
Jr. Hacker's Delight book was very helpful as usual and had the issue
explained perfectly. Fefe's information on his website was based on the
C FAQ and quite bad, because it didn't address the issue of -MININT ==
MININT.
### 2015-06-04
It has been a productive day. I improved the interface of lzma.Reader
and lzma.Writer and fixed the error handling.
### 2015-06-01
By computing the bit length of the LZMA operations I was able to
improve the greedy algorithm implementation. By using an 8 MByte buffer
the compression rate was not as good as for xz but already better then
gzip default.
Compression is currently slow, but this is something we will be able to
improve over time.
### 2015-05-26
Checked the license of ogier/pflag. The binary lzmago binary should
include the license terms for the pflag library.
I added the endorsement clause as used by Google for the Go sources the
LICENSE file.
### 2015-05-22
The package lzb contains now the basic implementation for creating or
reading LZMA byte streams. It allows the support for the implementation
of the DAG-shortest-path algorithm for the compression function.
### 2015-04-23
Completed yesterday the lzbase classes. I'm a little bit concerned that
using the components may require too much code, but on the other hand
there is a lot of flexibility.
### 2015-04-22
Implemented Reader and Writer during the Bayern game against Porto. The
second half gave me enough time.
### 2015-04-21
While showering today morning I discovered that the design for OpEncoder
and OpDecoder doesn't work, because encoding/decoding might depend on
the current status of the dictionary. This is not exactly the right way
to start the day.
Therefore we need to keep the Reader and Writer design. This time around
we simplify it by ignoring size limits. These can be added by wrappers
around the Reader and Writer interfaces. The Parameters type isn't
needed anymore.
However I will implement a ReaderState and WriterState type to use
static typing to ensure the right State object is combined with the
right lzbase.Reader and lzbase.Writer.
As a start I have implemented ReaderState and WriterState to ensure
that the state for reading is only used by readers and WriterState only
used by Writers.
### 2015-04-20
Today I implemented the OpDecoder and tested OpEncoder and OpDecoder.
### 2015-04-08
Came up with a new simplified design for lzbase. I implemented already
the type State that replaces OpCodec.
### 2015-04-06
The new lzma package is now fully usable and lzmago is using it now. The
old lzma package has been completely removed.
### 2015-04-05
Implemented lzma.Reader and tested it.
### 2015-04-04
Implemented baseReader by adapting code form lzma.Reader.
### 2015-04-03
The opCodec has been copied yesterday to lzma2. opCodec has a high
number of dependencies on other files in lzma2. Therefore I had to copy
almost all files from lzma.
### 2015-03-31
Removed only a TODO item.
However in Francesco Campoy's presentation "Go for Javaneros
(Javaïstes?)" is the the idea that using an embedded field E, all the
methods of E will be defined on T. If E is an interface T satisfies E.
https://talks.golang.org/2014/go4java.slide#51
I have never used this, but it seems to be a cool idea.
### 2015-03-30
Finished the type writerDict and wrote a simple test.
### 2015-03-25
I started to implement the writerDict.
### 2015-03-24
After thinking long about the LZMA2 code and several false starts, I
have now a plan to create a self-sufficient lzma2 package that supports
the classic LZMA format as well as LZMA2. The core idea is to support a
baseReader and baseWriter type that support the basic LZMA stream
without any headers. Both types must support the reuse of dictionaries
and the opCodec.
### 2015-01-10
1. Implemented simple lzmago tool
2. Tested tool against large 4.4G file
- compression worked correctly; tested decompression with lzma
- decompression hits a full buffer condition
3. Fixed a bug in the compressor and wrote a test for it
4. Executed full cycle for 4.4 GB file; performance can be improved ;-)
### 2015-01-11
- Release v0.2 because of the working LZMA encoder and decoder

74
vendor/github.com/ulikunitz/xz/bits.go generated vendored Normal file
View file

@ -0,0 +1,74 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xz
import (
"errors"
"io"
)
// putUint32LE puts the little-endian representation of x into the first
// four bytes of p.
func putUint32LE(p []byte, x uint32) {
p[0] = byte(x)
p[1] = byte(x >> 8)
p[2] = byte(x >> 16)
p[3] = byte(x >> 24)
}
// putUint64LE puts the little-endian representation of x into the first
// eight bytes of p.
func putUint64LE(p []byte, x uint64) {
p[0] = byte(x)
p[1] = byte(x >> 8)
p[2] = byte(x >> 16)
p[3] = byte(x >> 24)
p[4] = byte(x >> 32)
p[5] = byte(x >> 40)
p[6] = byte(x >> 48)
p[7] = byte(x >> 56)
}
// uint32LE converts a little endian representation to an uint32 value.
func uint32LE(p []byte) uint32 {
return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 |
uint32(p[3])<<24
}
// putUvarint puts a uvarint representation of x into the byte slice.
func putUvarint(p []byte, x uint64) int {
i := 0
for x >= 0x80 {
p[i] = byte(x) | 0x80
x >>= 7
i++
}
p[i] = byte(x)
return i + 1
}
// errOverflow indicates an overflow of the 64-bit unsigned integer.
var errOverflowU64 = errors.New("xz: uvarint overflows 64-bit unsigned integer")
// readUvarint reads a uvarint from the given byte reader.
func readUvarint(r io.ByteReader) (x uint64, n int, err error) {
var s uint
i := 0
for {
b, err := r.ReadByte()
if err != nil {
return x, i, err
}
i++
if b < 0x80 {
if i > 10 || i == 10 && b > 1 {
return x, i, errOverflowU64
}
return x | uint64(b)<<s, i, nil
}
x |= uint64(b&0x7f) << s
s += 7
}
}

54
vendor/github.com/ulikunitz/xz/crc.go generated vendored Normal file
View file

@ -0,0 +1,54 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xz
import (
"hash"
"hash/crc32"
"hash/crc64"
)
// crc32Hash implements the hash.Hash32 interface with Sum returning the
// crc32 value in little-endian encoding.
type crc32Hash struct {
hash.Hash32
}
// Sum returns the crc32 value as little endian.
func (h crc32Hash) Sum(b []byte) []byte {
p := make([]byte, 4)
putUint32LE(p, h.Hash32.Sum32())
b = append(b, p...)
return b
}
// newCRC32 returns a CRC-32 hash that returns the 64-bit value in
// little-endian encoding using the IEEE polynomial.
func newCRC32() hash.Hash {
return crc32Hash{Hash32: crc32.NewIEEE()}
}
// crc64Hash implements the Hash64 interface with Sum returning the
// CRC-64 value in little-endian encoding.
type crc64Hash struct {
hash.Hash64
}
// Sum returns the CRC-64 value in little-endian encoding.
func (h crc64Hash) Sum(b []byte) []byte {
p := make([]byte, 8)
putUint64LE(p, h.Hash64.Sum64())
b = append(b, p...)
return b
}
// crc64Table is used to create a CRC-64 hash.
var crc64Table = crc64.MakeTable(crc64.ECMA)
// newCRC64 returns a CRC-64 hash that returns the 64-bit value in
// little-endian encoding using the ECMA polynomial.
func newCRC64() hash.Hash {
return crc64Hash{Hash64: crc64.New(crc64Table)}
}

732
vendor/github.com/ulikunitz/xz/format.go generated vendored Normal file
View file

@ -0,0 +1,732 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xz
import (
"bytes"
"crypto/sha256"
"errors"
"fmt"
"hash"
"hash/crc32"
"io"
"github.com/ulikunitz/xz/lzma"
)
// allZeros checks whether a given byte slice has only zeros.
func allZeros(p []byte) bool {
for _, c := range p {
if c != 0 {
return false
}
}
return true
}
// padLen returns the length of the padding required for the given
// argument.
func padLen(n int64) int {
k := int(n % 4)
if k > 0 {
k = 4 - k
}
return k
}
/*** Header ***/
// headerMagic stores the magic bytes for the header
var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00}
// HeaderLen provides the length of the xz file header.
const HeaderLen = 12
// Constants for the checksum methods supported by xz.
const (
None byte = 0x0
CRC32 = 0x1
CRC64 = 0x4
SHA256 = 0xa
)
// errInvalidFlags indicates that flags are invalid.
var errInvalidFlags = errors.New("xz: invalid flags")
// verifyFlags returns the error errInvalidFlags if the value is
// invalid.
func verifyFlags(flags byte) error {
switch flags {
case None, CRC32, CRC64, SHA256:
return nil
default:
return errInvalidFlags
}
}
// flagstrings maps flag values to strings.
var flagstrings = map[byte]string{
None: "None",
CRC32: "CRC-32",
CRC64: "CRC-64",
SHA256: "SHA-256",
}
// flagString returns the string representation for the given flags.
func flagString(flags byte) string {
s, ok := flagstrings[flags]
if !ok {
return "invalid"
}
return s
}
// newHashFunc returns a function that creates hash instances for the
// hash method encoded in flags.
func newHashFunc(flags byte) (newHash func() hash.Hash, err error) {
switch flags {
case None:
newHash = newNoneHash
case CRC32:
newHash = newCRC32
case CRC64:
newHash = newCRC64
case SHA256:
newHash = sha256.New
default:
err = errInvalidFlags
}
return
}
// header provides the actual content of the xz file header: the flags.
type header struct {
flags byte
}
// Errors returned by readHeader.
var errHeaderMagic = errors.New("xz: invalid header magic bytes")
// ValidHeader checks whether data is a correct xz file header. The
// length of data must be HeaderLen.
func ValidHeader(data []byte) bool {
var h header
err := h.UnmarshalBinary(data)
return err == nil
}
// String returns a string representation of the flags.
func (h header) String() string {
return flagString(h.flags)
}
// UnmarshalBinary reads header from the provided data slice.
func (h *header) UnmarshalBinary(data []byte) error {
// header length
if len(data) != HeaderLen {
return errors.New("xz: wrong file header length")
}
// magic header
if !bytes.Equal(headerMagic, data[:6]) {
return errHeaderMagic
}
// checksum
crc := crc32.NewIEEE()
crc.Write(data[6:8])
if uint32LE(data[8:]) != crc.Sum32() {
return errors.New("xz: invalid checksum for file header")
}
// stream flags
if data[6] != 0 {
return errInvalidFlags
}
flags := data[7]
if err := verifyFlags(flags); err != nil {
return err
}
h.flags = flags
return nil
}
// MarshalBinary generates the xz file header.
func (h *header) MarshalBinary() (data []byte, err error) {
if err = verifyFlags(h.flags); err != nil {
return nil, err
}
data = make([]byte, 12)
copy(data, headerMagic)
data[7] = h.flags
crc := crc32.NewIEEE()
crc.Write(data[6:8])
putUint32LE(data[8:], crc.Sum32())
return data, nil
}
/*** Footer ***/
// footerLen defines the length of the footer.
const footerLen = 12
// footerMagic contains the footer magic bytes.
var footerMagic = []byte{'Y', 'Z'}
// footer represents the content of the xz file footer.
type footer struct {
indexSize int64
flags byte
}
// String prints a string representation of the footer structure.
func (f footer) String() string {
return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize)
}
// Minimum and maximum for the size of the index (backward size).
const (
minIndexSize = 4
maxIndexSize = (1 << 32) * 4
)
// MarshalBinary converts footer values into an xz file footer. Note
// that the footer value is checked for correctness.
func (f *footer) MarshalBinary() (data []byte, err error) {
if err = verifyFlags(f.flags); err != nil {
return nil, err
}
if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) {
return nil, errors.New("xz: index size out of range")
}
if f.indexSize%4 != 0 {
return nil, errors.New(
"xz: index size not aligned to four bytes")
}
data = make([]byte, footerLen)
// backward size (index size)
s := (f.indexSize / 4) - 1
putUint32LE(data[4:], uint32(s))
// flags
data[9] = f.flags
// footer magic
copy(data[10:], footerMagic)
// CRC-32
crc := crc32.NewIEEE()
crc.Write(data[4:10])
putUint32LE(data, crc.Sum32())
return data, nil
}
// UnmarshalBinary sets the footer value by unmarshalling an xz file
// footer.
func (f *footer) UnmarshalBinary(data []byte) error {
if len(data) != footerLen {
return errors.New("xz: wrong footer length")
}
// magic bytes
if !bytes.Equal(data[10:], footerMagic) {
return errors.New("xz: footer magic invalid")
}
// CRC-32
crc := crc32.NewIEEE()
crc.Write(data[4:10])
if uint32LE(data) != crc.Sum32() {
return errors.New("xz: footer checksum error")
}
var g footer
// backward size (index size)
g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4
// flags
if data[8] != 0 {
return errInvalidFlags
}
g.flags = data[9]
if err := verifyFlags(g.flags); err != nil {
return err
}
*f = g
return nil
}
/*** Block Header ***/
// blockHeader represents the content of an xz block header.
type blockHeader struct {
compressedSize int64
uncompressedSize int64
filters []filter
}
// String converts the block header into a string.
func (h blockHeader) String() string {
var buf bytes.Buffer
first := true
if h.compressedSize >= 0 {
fmt.Fprintf(&buf, "compressed size %d", h.compressedSize)
first = false
}
if h.uncompressedSize >= 0 {
if !first {
buf.WriteString(" ")
}
fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize)
first = false
}
for _, f := range h.filters {
if !first {
buf.WriteString(" ")
}
fmt.Fprintf(&buf, "filter %s", f)
first = false
}
return buf.String()
}
// Masks for the block flags.
const (
filterCountMask = 0x03
compressedSizePresent = 0x40
uncompressedSizePresent = 0x80
reservedBlockFlags = 0x3C
)
// errIndexIndicator signals that an index indicator (0x00) has been found
// instead of an expected block header indicator.
var errIndexIndicator = errors.New("xz: found index indicator")
// readBlockHeader reads the block header.
func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) {
var buf bytes.Buffer
buf.Grow(20)
// block header size
z, err := io.CopyN(&buf, r, 1)
n = int(z)
if err != nil {
return nil, n, err
}
s := buf.Bytes()[0]
if s == 0 {
return nil, n, errIndexIndicator
}
// read complete header
headerLen := (int(s) + 1) * 4
buf.Grow(headerLen - 1)
z, err = io.CopyN(&buf, r, int64(headerLen-1))
n += int(z)
if err != nil {
return nil, n, err
}
// unmarshal block header
h = new(blockHeader)
if err = h.UnmarshalBinary(buf.Bytes()); err != nil {
return nil, n, err
}
return h, n, nil
}
// readSizeInBlockHeader reads the uncompressed or compressed size
// fields in the block header. The present value informs the function
// whether the respective field is actually present in the header.
func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) {
if !present {
return -1, nil
}
x, _, err := readUvarint(r)
if err != nil {
return 0, err
}
if x >= 1<<63 {
return 0, errors.New("xz: size overflow in block header")
}
return int64(x), nil
}
// UnmarshalBinary unmarshals the block header.
func (h *blockHeader) UnmarshalBinary(data []byte) error {
// Check header length
s := data[0]
if data[0] == 0 {
return errIndexIndicator
}
headerLen := (int(s) + 1) * 4
if len(data) != headerLen {
return fmt.Errorf("xz: data length %d; want %d", len(data),
headerLen)
}
n := headerLen - 4
// Check CRC-32
crc := crc32.NewIEEE()
crc.Write(data[:n])
if crc.Sum32() != uint32LE(data[n:]) {
return errors.New("xz: checksum error for block header")
}
// Block header flags
flags := data[1]
if flags&reservedBlockFlags != 0 {
return errors.New("xz: reserved block header flags set")
}
r := bytes.NewReader(data[2:n])
// Compressed size
var err error
h.compressedSize, err = readSizeInBlockHeader(
r, flags&compressedSizePresent != 0)
if err != nil {
return err
}
// Uncompressed size
h.uncompressedSize, err = readSizeInBlockHeader(
r, flags&uncompressedSizePresent != 0)
if err != nil {
return err
}
h.filters, err = readFilters(r, int(flags&filterCountMask)+1)
if err != nil {
return err
}
// Check padding
// Since headerLen is a multiple of 4 we don't need to check
// alignment.
k := r.Len()
// The standard spec says that the padding should have not more
// than 3 bytes. However we found paddings of 4 or 5 in the
// wild. See https://github.com/ulikunitz/xz/pull/11 and
// https://github.com/ulikunitz/xz/issues/15
//
// The only reasonable approach seems to be to ignore the
// padding size. We still check that all padding bytes are zero.
if !allZeros(data[n-k : n]) {
return errPadding
}
return nil
}
// MarshalBinary marshals the binary header.
func (h *blockHeader) MarshalBinary() (data []byte, err error) {
if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) {
return nil, errors.New("xz: filter count wrong")
}
for i, f := range h.filters {
if i < len(h.filters)-1 {
if f.id() == lzmaFilterID {
return nil, errors.New(
"xz: LZMA2 filter is not the last")
}
} else {
// last filter
if f.id() != lzmaFilterID {
return nil, errors.New("xz: " +
"last filter must be the LZMA2 filter")
}
}
}
var buf bytes.Buffer
// header size must set at the end
buf.WriteByte(0)
// flags
flags := byte(len(h.filters) - 1)
if h.compressedSize >= 0 {
flags |= compressedSizePresent
}
if h.uncompressedSize >= 0 {
flags |= uncompressedSizePresent
}
buf.WriteByte(flags)
p := make([]byte, 10)
if h.compressedSize >= 0 {
k := putUvarint(p, uint64(h.compressedSize))
buf.Write(p[:k])
}
if h.uncompressedSize >= 0 {
k := putUvarint(p, uint64(h.uncompressedSize))
buf.Write(p[:k])
}
for _, f := range h.filters {
fp, err := f.MarshalBinary()
if err != nil {
return nil, err
}
buf.Write(fp)
}
// padding
for i := padLen(int64(buf.Len())); i > 0; i-- {
buf.WriteByte(0)
}
// crc place holder
buf.Write(p[:4])
data = buf.Bytes()
if len(data)%4 != 0 {
panic("data length not aligned")
}
s := len(data)/4 - 1
if !(1 < s && s <= 255) {
panic("wrong block header size")
}
data[0] = byte(s)
crc := crc32.NewIEEE()
crc.Write(data[:len(data)-4])
putUint32LE(data[len(data)-4:], crc.Sum32())
return data, nil
}
// Constants used for marshalling and unmarshalling filters in the xz
// block header.
const (
minFilters = 1
maxFilters = 4
minReservedID = 1 << 62
)
// filter represents a filter in the block header.
type filter interface {
id() uint64
UnmarshalBinary(data []byte) error
MarshalBinary() (data []byte, err error)
reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error)
writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error)
// filter must be last filter
last() bool
}
// readFilter reads a block filter from the block header. At this point
// in time only the LZMA2 filter is supported.
func readFilter(r io.Reader) (f filter, err error) {
br := lzma.ByteReader(r)
// index
id, _, err := readUvarint(br)
if err != nil {
return nil, err
}
var data []byte
switch id {
case lzmaFilterID:
data = make([]byte, lzmaFilterLen)
data[0] = lzmaFilterID
if _, err = io.ReadFull(r, data[1:]); err != nil {
return nil, err
}
f = new(lzmaFilter)
default:
if id >= minReservedID {
return nil, errors.New(
"xz: reserved filter id in block stream header")
}
return nil, errors.New("xz: invalid filter id")
}
if err = f.UnmarshalBinary(data); err != nil {
return nil, err
}
return f, err
}
// readFilters reads count filters. At this point in time only the count
// 1 is supported.
func readFilters(r io.Reader, count int) (filters []filter, err error) {
if count != 1 {
return nil, errors.New("xz: unsupported filter count")
}
f, err := readFilter(r)
if err != nil {
return nil, err
}
return []filter{f}, err
}
// writeFilters writes the filters.
func writeFilters(w io.Writer, filters []filter) (n int, err error) {
for _, f := range filters {
p, err := f.MarshalBinary()
if err != nil {
return n, err
}
k, err := w.Write(p)
n += k
if err != nil {
return n, err
}
}
return n, nil
}
/*** Index ***/
// record describes a block in the xz file index.
type record struct {
unpaddedSize int64
uncompressedSize int64
}
// readRecord reads an index record.
func readRecord(r io.ByteReader) (rec record, n int, err error) {
u, k, err := readUvarint(r)
n += k
if err != nil {
return rec, n, err
}
rec.unpaddedSize = int64(u)
if rec.unpaddedSize < 0 {
return rec, n, errors.New("xz: unpadded size negative")
}
u, k, err = readUvarint(r)
n += k
if err != nil {
return rec, n, err
}
rec.uncompressedSize = int64(u)
if rec.uncompressedSize < 0 {
return rec, n, errors.New("xz: uncompressed size negative")
}
return rec, n, nil
}
// MarshalBinary converts an index record in its binary encoding.
func (rec *record) MarshalBinary() (data []byte, err error) {
// maximum length of a uvarint is 10
p := make([]byte, 20)
n := putUvarint(p, uint64(rec.unpaddedSize))
n += putUvarint(p[n:], uint64(rec.uncompressedSize))
return p[:n], nil
}
// writeIndex writes the index, a sequence of records.
func writeIndex(w io.Writer, index []record) (n int64, err error) {
crc := crc32.NewIEEE()
mw := io.MultiWriter(w, crc)
// index indicator
k, err := mw.Write([]byte{0})
n += int64(k)
if err != nil {
return n, err
}
// number of records
p := make([]byte, 10)
k = putUvarint(p, uint64(len(index)))
k, err = mw.Write(p[:k])
n += int64(k)
if err != nil {
return n, err
}
// list of records
for _, rec := range index {
p, err := rec.MarshalBinary()
if err != nil {
return n, err
}
k, err = mw.Write(p)
n += int64(k)
if err != nil {
return n, err
}
}
// index padding
k, err = mw.Write(make([]byte, padLen(int64(n))))
n += int64(k)
if err != nil {
return n, err
}
// crc32 checksum
putUint32LE(p, crc.Sum32())
k, err = w.Write(p[:4])
n += int64(k)
return n, err
}
// readIndexBody reads the index from the reader. It assumes that the
// index indicator has already been read.
func readIndexBody(r io.Reader) (records []record, n int64, err error) {
crc := crc32.NewIEEE()
// index indicator
crc.Write([]byte{0})
br := lzma.ByteReader(io.TeeReader(r, crc))
// number of records
u, k, err := readUvarint(br)
n += int64(k)
if err != nil {
return nil, n, err
}
recLen := int(u)
if recLen < 0 || uint64(recLen) != u {
return nil, n, errors.New("xz: record number overflow")
}
// list of records
records = make([]record, recLen)
for i := range records {
records[i], k, err = readRecord(br)
n += int64(k)
if err != nil {
return nil, n, err
}
}
p := make([]byte, padLen(int64(n+1)), 4)
k, err = io.ReadFull(br.(io.Reader), p)
n += int64(k)
if err != nil {
return nil, n, err
}
if !allZeros(p) {
return nil, n, errors.New("xz: non-zero byte in index padding")
}
// crc32
s := crc.Sum32()
p = p[:4]
k, err = io.ReadFull(br.(io.Reader), p)
n += int64(k)
if err != nil {
return records, n, err
}
if uint32LE(p) != s {
return nil, n, errors.New("xz: wrong checksum for index")
}
return records, n, nil
}

BIN
vendor/github.com/ulikunitz/xz/fox-check-none.xz generated vendored Normal file

Binary file not shown.

BIN
vendor/github.com/ulikunitz/xz/fox.xz generated vendored Normal file

Binary file not shown.

3
vendor/github.com/ulikunitz/xz/go.mod generated vendored Normal file
View file

@ -0,0 +1,3 @@
module github.com/ulikunitz/xz
go 1.12

View file

@ -0,0 +1,181 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// CyclicPoly provides a cyclic polynomial rolling hash.
type CyclicPoly struct {
h uint64
p []uint64
i int
}
// ror rotates the unsigned 64-bit integer to right. The argument s must be
// less than 64.
func ror(x uint64, s uint) uint64 {
return (x >> s) | (x << (64 - s))
}
// NewCyclicPoly creates a new instance of the CyclicPoly structure. The
// argument n gives the number of bytes for which a hash will be executed.
// This number must be positive; the method panics if this isn't the case.
func NewCyclicPoly(n int) *CyclicPoly {
if n < 1 {
panic("argument n must be positive")
}
return &CyclicPoly{p: make([]uint64, 0, n)}
}
// Len returns the length of the byte sequence for which a hash is generated.
func (r *CyclicPoly) Len() int {
return cap(r.p)
}
// RollByte hashes the next byte and returns a hash value. The complete becomes
// available after at least Len() bytes have been hashed.
func (r *CyclicPoly) RollByte(x byte) uint64 {
y := hash[x]
if len(r.p) < cap(r.p) {
r.h = ror(r.h, 1) ^ y
r.p = append(r.p, y)
} else {
r.h ^= ror(r.p[r.i], uint(cap(r.p)-1))
r.h = ror(r.h, 1) ^ y
r.p[r.i] = y
r.i = (r.i + 1) % cap(r.p)
}
return r.h
}
// Stores the hash for the individual bytes.
var hash = [256]uint64{
0x2e4fc3f904065142, 0xc790984cfbc99527,
0x879f95eb8c62f187, 0x3b61be86b5021ef2,
0x65a896a04196f0a5, 0xc5b307b80470b59e,
0xd3bff376a70df14b, 0xc332f04f0b3f1701,
0x753b5f0e9abf3e0d, 0xb41538fdfe66ef53,
0x1906a10c2c1c0208, 0xfb0c712a03421c0d,
0x38be311a65c9552b, 0xfee7ee4ca6445c7e,
0x71aadeded184f21e, 0xd73426fccda23b2d,
0x29773fb5fb9600b5, 0xce410261cd32981a,
0xfe2848b3c62dbc2d, 0x459eaaff6e43e11c,
0xc13e35fc9c73a887, 0xf30ed5c201e76dbc,
0xa5f10b3910482cea, 0x2945d59be02dfaad,
0x06ee334ff70571b5, 0xbabf9d8070f44380,
0xee3e2e9912ffd27c, 0x2a7118d1ea6b8ea7,
0x26183cb9f7b1664c, 0xea71dac7da068f21,
0xea92eca5bd1d0bb7, 0x415595862defcd75,
0x248a386023c60648, 0x9cf021ab284b3c8a,
0xfc9372df02870f6c, 0x2b92d693eeb3b3fc,
0x73e799d139dc6975, 0x7b15ae312486363c,
0xb70e5454a2239c80, 0x208e3fb31d3b2263,
0x01f563cabb930f44, 0x2ac4533d2a3240d8,
0x84231ed1064f6f7c, 0xa9f020977c2a6d19,
0x213c227271c20122, 0x09fe8a9a0a03d07a,
0x4236dc75bcaf910c, 0x460a8b2bead8f17e,
0xd9b27be1aa07055f, 0xd202d5dc4b11c33e,
0x70adb010543bea12, 0xcdae938f7ea6f579,
0x3f3d870208672f4d, 0x8e6ccbce9d349536,
0xe4c0871a389095ae, 0xf5f2a49152bca080,
0x9a43f9b97269934e, 0xc17b3753cb6f475c,
0xd56d941e8e206bd4, 0xac0a4f3e525eda00,
0xa06d5a011912a550, 0x5537ed19537ad1df,
0xa32fe713d611449d, 0x2a1d05b47c3b579f,
0x991d02dbd30a2a52, 0x39e91e7e28f93eb0,
0x40d06adb3e92c9ac, 0x9b9d3afde1c77c97,
0x9a3f3f41c02c616f, 0x22ecd4ba00f60c44,
0x0b63d5d801708420, 0x8f227ca8f37ffaec,
0x0256278670887c24, 0x107e14877dbf540b,
0x32c19f2786ac1c05, 0x1df5b12bb4bc9c61,
0xc0cac129d0d4c4e2, 0x9fdb52ee9800b001,
0x31f601d5d31c48c4, 0x72ff3c0928bcaec7,
0xd99264421147eb03, 0x535a2d6d38aefcfe,
0x6ba8b4454a916237, 0xfa39366eaae4719c,
0x10f00fd7bbb24b6f, 0x5bd23185c76c84d4,
0xb22c3d7e1b00d33f, 0x3efc20aa6bc830a8,
0xd61c2503fe639144, 0x30ce625441eb92d3,
0xe5d34cf359e93100, 0xa8e5aa13f2b9f7a5,
0x5c2b8d851ca254a6, 0x68fb6c5e8b0d5fdf,
0xc7ea4872c96b83ae, 0x6dd5d376f4392382,
0x1be88681aaa9792f, 0xfef465ee1b6c10d9,
0x1f98b65ed43fcb2e, 0x4d1ca11eb6e9a9c9,
0x7808e902b3857d0b, 0x171c9c4ea4607972,
0x58d66274850146df, 0x42b311c10d3981d1,
0x647fa8c621c41a4c, 0xf472771c66ddfedc,
0x338d27e3f847b46b, 0x6402ce3da97545ce,
0x5162db616fc38638, 0x9c83be97bc22a50e,
0x2d3d7478a78d5e72, 0xe621a9b938fd5397,
0x9454614eb0f81c45, 0x395fb6e742ed39b6,
0x77dd9179d06037bf, 0xc478d0fee4d2656d,
0x35d9d6cb772007af, 0x83a56e92c883f0f6,
0x27937453250c00a1, 0x27bd6ebc3a46a97d,
0x9f543bf784342d51, 0xd158f38c48b0ed52,
0x8dd8537c045f66b4, 0x846a57230226f6d5,
0x6b13939e0c4e7cdf, 0xfca25425d8176758,
0x92e5fc6cd52788e6, 0x9992e13d7a739170,
0x518246f7a199e8ea, 0xf104c2a71b9979c7,
0x86b3ffaabea4768f, 0x6388061cf3e351ad,
0x09d9b5295de5bbb5, 0x38bf1638c2599e92,
0x1d759846499e148d, 0x4c0ff015e5f96ef4,
0xa41a94cfa270f565, 0x42d76f9cb2326c0b,
0x0cf385dd3c9c23ba, 0x0508a6c7508d6e7a,
0x337523aabbe6cf8d, 0x646bb14001d42b12,
0xc178729d138adc74, 0xf900ef4491f24086,
0xee1a90d334bb5ac4, 0x9755c92247301a50,
0xb999bf7c4ff1b610, 0x6aeeb2f3b21e8fc9,
0x0fa8084cf91ac6ff, 0x10d226cf136e6189,
0xd302057a07d4fb21, 0x5f03800e20a0fcc3,
0x80118d4ae46bd210, 0x58ab61a522843733,
0x51edd575c5432a4b, 0x94ee6ff67f9197f7,
0x765669e0e5e8157b, 0xa5347830737132f0,
0x3ba485a69f01510c, 0x0b247d7b957a01c3,
0x1b3d63449fd807dc, 0x0fdc4721c30ad743,
0x8b535ed3829b2b14, 0xee41d0cad65d232c,
0xe6a99ed97a6a982f, 0x65ac6194c202003d,
0x692accf3a70573eb, 0xcc3c02c3e200d5af,
0x0d419e8b325914a3, 0x320f160f42c25e40,
0x00710d647a51fe7a, 0x3c947692330aed60,
0x9288aa280d355a7a, 0xa1806a9b791d1696,
0x5d60e38496763da1, 0x6c69e22e613fd0f4,
0x977fc2a5aadffb17, 0xfb7bd063fc5a94ba,
0x460c17992cbaece1, 0xf7822c5444d3297f,
0x344a9790c69b74aa, 0xb80a42e6cae09dce,
0x1b1361eaf2b1e757, 0xd84c1e758e236f01,
0x88e0b7be347627cc, 0x45246009b7a99490,
0x8011c6dd3fe50472, 0xc341d682bffb99d7,
0x2511be93808e2d15, 0xd5bc13d7fd739840,
0x2a3cd030679ae1ec, 0x8ad9898a4b9ee157,
0x3245fef0a8eaf521, 0x3d6d8dbbb427d2b0,
0x1ed146d8968b3981, 0x0c6a28bf7d45f3fc,
0x4a1fd3dbcee3c561, 0x4210ff6a476bf67e,
0xa559cce0d9199aac, 0xde39d47ef3723380,
0xe5b69d848ce42e35, 0xefa24296f8e79f52,
0x70190b59db9a5afc, 0x26f166cdb211e7bf,
0x4deaf2df3c6b8ef5, 0xf171dbdd670f1017,
0xb9059b05e9420d90, 0x2f0da855c9388754,
0x611d5e9ab77949cc, 0x2912038ac01163f4,
0x0231df50402b2fba, 0x45660fc4f3245f58,
0xb91cc97c7c8dac50, 0xb72d2aafe4953427,
0xfa6463f87e813d6b, 0x4515f7ee95d5c6a2,
0x1310e1c1a48d21c3, 0xad48a7810cdd8544,
0x4d5bdfefd5c9e631, 0xa43ed43f1fdcb7de,
0xe70cfc8fe1ee9626, 0xef4711b0d8dda442,
0xb80dd9bd4dab6c93, 0xa23be08d31ba4d93,
0x9b37db9d0335a39c, 0x494b6f870f5cfebc,
0x6d1b3c1149dda943, 0x372c943a518c1093,
0xad27af45e77c09c4, 0x3b6f92b646044604,
0xac2917909f5fcf4f, 0x2069a60e977e5557,
0x353a469e71014de5, 0x24be356281f55c15,
0x2b6d710ba8e9adea, 0x404ad1751c749c29,
0xed7311bf23d7f185, 0xba4f6976b4acc43e,
0x32d7198d2bc39000, 0xee667019014d6e01,
0x494ef3e128d14c83, 0x1f95a152baecd6be,
0x201648dff1f483a5, 0x68c28550c8384af6,
0x5fc834a6824a7f48, 0x7cd06cb7365eaf28,
0xd82bbd95e9b30909, 0x234f0d1694c53f6d,
0xd2fb7f4a96d83f4a, 0xff0d5da83acac05e,
0xf8f6b97f5585080a, 0x74236084be57b95b,
0xa25e40c03bbc36ad, 0x6b6e5c14ce88465b,
0x4378ffe93e1528c5, 0x94ca92a17118e2d2,
}

14
vendor/github.com/ulikunitz/xz/internal/hash/doc.go generated vendored Normal file
View file

@ -0,0 +1,14 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package hash provides rolling hashes.
Rolling hashes have to be used for maintaining the positions of n-byte
sequences in the dictionary buffer.
The package provides currently the Rabin-Karp rolling hash and a Cyclic
Polynomial hash. Both support the Hashes method to be used with an interface.
*/
package hash

View file

@ -0,0 +1,66 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// A is the default constant for Robin-Karp rolling hash. This is a random
// prime.
const A = 0x97b548add41d5da1
// RabinKarp supports the computation of a rolling hash.
type RabinKarp struct {
A uint64
// a^n
aOldest uint64
h uint64
p []byte
i int
}
// NewRabinKarp creates a new RabinKarp value. The argument n defines the
// length of the byte sequence to be hashed. The default constant will will be
// used.
func NewRabinKarp(n int) *RabinKarp {
return NewRabinKarpConst(n, A)
}
// NewRabinKarpConst creates a new RabinKarp value. The argument n defines the
// length of the byte sequence to be hashed. The argument a provides the
// constant used to compute the hash.
func NewRabinKarpConst(n int, a uint64) *RabinKarp {
if n <= 0 {
panic("number of bytes n must be positive")
}
aOldest := uint64(1)
// There are faster methods. For the small n required by the LZMA
// compressor O(n) is sufficient.
for i := 0; i < n; i++ {
aOldest *= a
}
return &RabinKarp{
A: a, aOldest: aOldest,
p: make([]byte, 0, n),
}
}
// Len returns the length of the byte sequence.
func (r *RabinKarp) Len() int {
return cap(r.p)
}
// RollByte computes the hash after x has been added.
func (r *RabinKarp) RollByte(x byte) uint64 {
if len(r.p) < cap(r.p) {
r.h += uint64(x)
r.h *= r.A
r.p = append(r.p, x)
} else {
r.h -= uint64(r.p[r.i]) * r.aOldest
r.h += uint64(x)
r.h *= r.A
r.p[r.i] = x
r.i = (r.i + 1) % cap(r.p)
}
return r.h
}

29
vendor/github.com/ulikunitz/xz/internal/hash/roller.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// Roller provides an interface for rolling hashes. The hash value will become
// valid after hash has been called Len times.
type Roller interface {
Len() int
RollByte(x byte) uint64
}
// Hashes computes all hash values for the array p. Note that the state of the
// roller is changed.
func Hashes(r Roller, p []byte) []uint64 {
n := r.Len()
if len(p) < n {
return nil
}
h := make([]uint64, len(p)-n+1)
for i := 0; i < n-1; i++ {
r.RollByte(p[i])
}
for i := range h {
h[i] = r.RollByte(p[i+n-1])
}
return h
}

457
vendor/github.com/ulikunitz/xz/internal/xlog/xlog.go generated vendored Normal file
View file

@ -0,0 +1,457 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package xlog provides a simple logging package that allows to disable
// certain message categories. It defines a type, Logger, with multiple
// methods for formatting output. The package has also a predefined
// 'standard' Logger accessible through helper function Print[f|ln],
// Fatal[f|ln], Panic[f|ln], Warn[f|ln], Print[f|ln] and Debug[f|ln]
// that are easier to use then creating a Logger manually. That logger
// writes to standard error and prints the date and time of each logged
// message, which can be configured using the function SetFlags.
//
// The Fatal functions call os.Exit(1) after the message is output
// unless not suppressed by the flags. The Panic functions call panic
// after the writing the log message unless suppressed.
package xlog
import (
"fmt"
"io"
"os"
"runtime"
"sync"
"time"
)
// The flags define what information is prefixed to each log entry
// generated by the Logger. The Lno* versions allow the suppression of
// specific output. The bits are or'ed together to control what will be
// printed. There is no control over the order of the items printed and
// the format. The full format is:
//
// 2009-01-23 01:23:23.123123 /a/b/c/d.go:23: message
//
const (
Ldate = 1 << iota // the date: 2009-01-23
Ltime // the time: 01:23:23
Lmicroseconds // microsecond resolution: 01:23:23.123123
Llongfile // full file name and line number: /a/b/c/d.go:23
Lshortfile // final file name element and line number: d.go:23
Lnopanic // suppresses output from Panic[f|ln] but not the panic call
Lnofatal // suppresses output from Fatal[f|ln] but not the exit
Lnowarn // suppresses output from Warn[f|ln]
Lnoprint // suppresses output from Print[f|ln]
Lnodebug // suppresses output from Debug[f|ln]
// initial values for the standard logger
Lstdflags = Ldate | Ltime | Lnodebug
)
// A Logger represents an active logging object that generates lines of
// output to an io.Writer. Each logging operation if not suppressed
// makes a single call to the Writer's Write method. A Logger can be
// used simultaneously from multiple goroutines; it guarantees to
// serialize access to the Writer.
type Logger struct {
mu sync.Mutex // ensures atomic writes; and protects the following
// fields
prefix string // prefix to write at beginning of each line
flag int // properties
out io.Writer // destination for output
buf []byte // for accumulating text to write
}
// New creates a new Logger. The out argument sets the destination to
// which the log output will be written. The prefix appears at the
// beginning of each log line. The flag argument defines the logging
// properties.
func New(out io.Writer, prefix string, flag int) *Logger {
return &Logger{out: out, prefix: prefix, flag: flag}
}
// std is the standard logger used by the package scope functions.
var std = New(os.Stderr, "", Lstdflags)
// itoa converts the integer to ASCII. A negative widths will avoid
// zero-padding. The function supports only non-negative integers.
func itoa(buf *[]byte, i int, wid int) {
var u = uint(i)
if u == 0 && wid <= 1 {
*buf = append(*buf, '0')
return
}
var b [32]byte
bp := len(b)
for ; u > 0 || wid > 0; u /= 10 {
bp--
wid--
b[bp] = byte(u%10) + '0'
}
*buf = append(*buf, b[bp:]...)
}
// formatHeader puts the header into the buf field of the buffer.
func (l *Logger) formatHeader(t time.Time, file string, line int) {
l.buf = append(l.buf, l.prefix...)
if l.flag&(Ldate|Ltime|Lmicroseconds) != 0 {
if l.flag&Ldate != 0 {
year, month, day := t.Date()
itoa(&l.buf, year, 4)
l.buf = append(l.buf, '-')
itoa(&l.buf, int(month), 2)
l.buf = append(l.buf, '-')
itoa(&l.buf, day, 2)
l.buf = append(l.buf, ' ')
}
if l.flag&(Ltime|Lmicroseconds) != 0 {
hour, min, sec := t.Clock()
itoa(&l.buf, hour, 2)
l.buf = append(l.buf, ':')
itoa(&l.buf, min, 2)
l.buf = append(l.buf, ':')
itoa(&l.buf, sec, 2)
if l.flag&Lmicroseconds != 0 {
l.buf = append(l.buf, '.')
itoa(&l.buf, t.Nanosecond()/1e3, 6)
}
l.buf = append(l.buf, ' ')
}
}
if l.flag&(Lshortfile|Llongfile) != 0 {
if l.flag&Lshortfile != 0 {
short := file
for i := len(file) - 1; i > 0; i-- {
if file[i] == '/' {
short = file[i+1:]
break
}
}
file = short
}
l.buf = append(l.buf, file...)
l.buf = append(l.buf, ':')
itoa(&l.buf, line, -1)
l.buf = append(l.buf, ": "...)
}
}
func (l *Logger) output(calldepth int, now time.Time, s string) error {
var file string
var line int
if l.flag&(Lshortfile|Llongfile) != 0 {
l.mu.Unlock()
var ok bool
_, file, line, ok = runtime.Caller(calldepth)
if !ok {
file = "???"
line = 0
}
l.mu.Lock()
}
l.buf = l.buf[:0]
l.formatHeader(now, file, line)
l.buf = append(l.buf, s...)
if len(s) == 0 || s[len(s)-1] != '\n' {
l.buf = append(l.buf, '\n')
}
_, err := l.out.Write(l.buf)
return err
}
// Output writes the string s with the header controlled by the flags to
// the l.out writer. A newline will be appended if s doesn't end in a
// newline. Calldepth is used to recover the PC, although all current
// calls of Output use the call depth 2. Access to the function is serialized.
func (l *Logger) Output(calldepth, noflag int, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprint(v...)
return l.output(calldepth+1, now, s)
}
// Outputf works like output but formats the output like Printf.
func (l *Logger) Outputf(calldepth int, noflag int, format string, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprintf(format, v...)
return l.output(calldepth+1, now, s)
}
// Outputln works like output but formats the output like Println.
func (l *Logger) Outputln(calldepth int, noflag int, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprintln(v...)
return l.output(calldepth+1, now, s)
}
// Panic prints the message like Print and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panic(v ...interface{}) {
l.Output(2, Lnopanic, v...)
s := fmt.Sprint(v...)
panic(s)
}
// Panic prints the message like Print and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panic(v ...interface{}) {
std.Output(2, Lnopanic, v...)
s := fmt.Sprint(v...)
panic(s)
}
// Panicf prints the message like Printf and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panicf(format string, v ...interface{}) {
l.Outputf(2, Lnopanic, format, v...)
s := fmt.Sprintf(format, v...)
panic(s)
}
// Panicf prints the message like Printf and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panicf(format string, v ...interface{}) {
std.Outputf(2, Lnopanic, format, v...)
s := fmt.Sprintf(format, v...)
panic(s)
}
// Panicln prints the message like Println and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panicln(v ...interface{}) {
l.Outputln(2, Lnopanic, v...)
s := fmt.Sprintln(v...)
panic(s)
}
// Panicln prints the message like Println and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panicln(v ...interface{}) {
std.Outputln(2, Lnopanic, v...)
s := fmt.Sprintln(v...)
panic(s)
}
// Fatal prints the message like Print and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatal(v ...interface{}) {
l.Output(2, Lnofatal, v...)
os.Exit(1)
}
// Fatal prints the message like Print and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatal(v ...interface{}) {
std.Output(2, Lnofatal, v...)
os.Exit(1)
}
// Fatalf prints the message like Printf and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatalf(format string, v ...interface{}) {
l.Outputf(2, Lnofatal, format, v...)
os.Exit(1)
}
// Fatalf prints the message like Printf and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatalf(format string, v ...interface{}) {
std.Outputf(2, Lnofatal, format, v...)
os.Exit(1)
}
// Fatalln prints the message like Println and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatalln(format string, v ...interface{}) {
l.Outputln(2, Lnofatal, v...)
os.Exit(1)
}
// Fatalln prints the message like Println and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatalln(format string, v ...interface{}) {
std.Outputln(2, Lnofatal, v...)
os.Exit(1)
}
// Warn prints the message like Print. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warn(v ...interface{}) {
l.Output(2, Lnowarn, v...)
}
// Warn prints the message like Print. The printing might be suppressed
// by the flag Lnowarn.
func Warn(v ...interface{}) {
std.Output(2, Lnowarn, v...)
}
// Warnf prints the message like Printf. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warnf(format string, v ...interface{}) {
l.Outputf(2, Lnowarn, format, v...)
}
// Warnf prints the message like Printf. The printing might be suppressed
// by the flag Lnowarn.
func Warnf(format string, v ...interface{}) {
std.Outputf(2, Lnowarn, format, v...)
}
// Warnln prints the message like Println. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warnln(v ...interface{}) {
l.Outputln(2, Lnowarn, v...)
}
// Warnln prints the message like Println. The printing might be suppressed
// by the flag Lnowarn.
func Warnln(v ...interface{}) {
std.Outputln(2, Lnowarn, v...)
}
// Print prints the message like fmt.Print. The printing might be suppressed
// by the flag Lnoprint.
func (l *Logger) Print(v ...interface{}) {
l.Output(2, Lnoprint, v...)
}
// Print prints the message like fmt.Print. The printing might be suppressed
// by the flag Lnoprint.
func Print(v ...interface{}) {
std.Output(2, Lnoprint, v...)
}
// Printf prints the message like fmt.Printf. The printing might be suppressed
// by the flag Lnoprint.
func (l *Logger) Printf(format string, v ...interface{}) {
l.Outputf(2, Lnoprint, format, v...)
}
// Printf prints the message like fmt.Printf. The printing might be suppressed
// by the flag Lnoprint.
func Printf(format string, v ...interface{}) {
std.Outputf(2, Lnoprint, format, v...)
}
// Println prints the message like fmt.Println. The printing might be
// suppressed by the flag Lnoprint.
func (l *Logger) Println(v ...interface{}) {
l.Outputln(2, Lnoprint, v...)
}
// Println prints the message like fmt.Println. The printing might be
// suppressed by the flag Lnoprint.
func Println(v ...interface{}) {
std.Outputln(2, Lnoprint, v...)
}
// Debug prints the message like Print. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debug(v ...interface{}) {
l.Output(2, Lnodebug, v...)
}
// Debug prints the message like Print. The printing might be suppressed
// by the flag Lnodebug.
func Debug(v ...interface{}) {
std.Output(2, Lnodebug, v...)
}
// Debugf prints the message like Printf. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debugf(format string, v ...interface{}) {
l.Outputf(2, Lnodebug, format, v...)
}
// Debugf prints the message like Printf. The printing might be suppressed
// by the flag Lnodebug.
func Debugf(format string, v ...interface{}) {
std.Outputf(2, Lnodebug, format, v...)
}
// Debugln prints the message like Println. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debugln(v ...interface{}) {
l.Outputln(2, Lnodebug, v...)
}
// Debugln prints the message like Println. The printing might be suppressed
// by the flag Lnodebug.
func Debugln(v ...interface{}) {
std.Outputln(2, Lnodebug, v...)
}
// Flags returns the current flags used by the logger.
func (l *Logger) Flags() int {
l.mu.Lock()
defer l.mu.Unlock()
return l.flag
}
// Flags returns the current flags used by the standard logger.
func Flags() int {
return std.Flags()
}
// SetFlags sets the flags of the logger.
func (l *Logger) SetFlags(flag int) {
l.mu.Lock()
defer l.mu.Unlock()
l.flag = flag
}
// SetFlags sets the flags for the standard logger.
func SetFlags(flag int) {
std.SetFlags(flag)
}
// Prefix returns the prefix used by the logger.
func (l *Logger) Prefix() string {
l.mu.Lock()
defer l.mu.Unlock()
return l.prefix
}
// Prefix returns the prefix used by the standard logger of the package.
func Prefix() string {
return std.Prefix()
}
// SetPrefix sets the prefix for the logger.
func (l *Logger) SetPrefix(prefix string) {
l.mu.Lock()
defer l.mu.Unlock()
l.prefix = prefix
}
// SetPrefix sets the prefix of the standard logger of the package.
func SetPrefix(prefix string) {
std.SetPrefix(prefix)
}
// SetOutput sets the output of the logger.
func (l *Logger) SetOutput(w io.Writer) {
l.mu.Lock()
defer l.mu.Unlock()
l.out = w
}
// SetOutput sets the output for the standard logger of the package.
func SetOutput(w io.Writer) {
std.SetOutput(w)
}

523
vendor/github.com/ulikunitz/xz/lzma/bintree.go generated vendored Normal file
View file

@ -0,0 +1,523 @@
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"bufio"
"errors"
"fmt"
"io"
"unicode"
)
// node represents a node in the binary tree.
type node struct {
// x is the search value
x uint32
// p parent node
p uint32
// l left child
l uint32
// r right child
r uint32
}
// wordLen is the number of bytes represented by the v field of a node.
const wordLen = 4
// binTree supports the identification of the next operation based on a
// binary tree.
//
// Nodes will be identified by their index into the ring buffer.
type binTree struct {
dict *encoderDict
// ring buffer of nodes
node []node
// absolute offset of the entry for the next node. Position 4
// byte larger.
hoff int64
// front position in the node ring buffer
front uint32
// index of the root node
root uint32
// current x value
x uint32
// preallocated array
data []byte
}
// null represents the nonexistent index. We can't use zero because it
// would always exist or we would need to decrease the index for each
// reference.
const null uint32 = 1<<32 - 1
// newBinTree initializes the binTree structure. The capacity defines
// the size of the buffer and defines the maximum distance for which
// matches will be found.
func newBinTree(capacity int) (t *binTree, err error) {
if capacity < 1 {
return nil, errors.New(
"newBinTree: capacity must be larger than zero")
}
if int64(capacity) >= int64(null) {
return nil, errors.New(
"newBinTree: capacity must less 2^{32}-1")
}
t = &binTree{
node: make([]node, capacity),
hoff: -int64(wordLen),
root: null,
data: make([]byte, maxMatchLen),
}
return t, nil
}
func (t *binTree) SetDict(d *encoderDict) { t.dict = d }
// WriteByte writes a single byte into the binary tree.
func (t *binTree) WriteByte(c byte) error {
t.x = (t.x << 8) | uint32(c)
t.hoff++
if t.hoff < 0 {
return nil
}
v := t.front
if int64(v) < t.hoff {
// We are overwriting old nodes stored in the tree.
t.remove(v)
}
t.node[v].x = t.x
t.add(v)
t.front++
if int64(t.front) >= int64(len(t.node)) {
t.front = 0
}
return nil
}
// Writes writes a sequence of bytes into the binTree structure.
func (t *binTree) Write(p []byte) (n int, err error) {
for _, c := range p {
t.WriteByte(c)
}
return len(p), nil
}
// add puts the node v into the tree. The node must not be part of the
// tree before.
func (t *binTree) add(v uint32) {
vn := &t.node[v]
// Set left and right to null indices.
vn.l, vn.r = null, null
// If the binary tree is empty make v the root.
if t.root == null {
t.root = v
vn.p = null
return
}
x := vn.x
p := t.root
// Search for the right leave link and add the new node.
for {
pn := &t.node[p]
if x <= pn.x {
if pn.l == null {
pn.l = v
vn.p = p
return
}
p = pn.l
} else {
if pn.r == null {
pn.r = v
vn.p = p
return
}
p = pn.r
}
}
}
// parent returns the parent node index of v and the pointer to v value
// in the parent.
func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) {
if t.root == v {
return null, &t.root
}
p = t.node[v].p
if t.node[p].l == v {
ptr = &t.node[p].l
} else {
ptr = &t.node[p].r
}
return
}
// Remove node v.
func (t *binTree) remove(v uint32) {
vn := &t.node[v]
p, ptr := t.parent(v)
l, r := vn.l, vn.r
if l == null {
// Move the right child up.
*ptr = r
if r != null {
t.node[r].p = p
}
return
}
if r == null {
// Move the left child up.
*ptr = l
t.node[l].p = p
return
}
// Search the in-order predecessor u.
un := &t.node[l]
ur := un.r
if ur == null {
// In order predecessor is l. Move it up.
un.r = r
t.node[r].p = l
un.p = p
*ptr = l
return
}
var u uint32
for {
// Look for the max value in the tree where l is root.
u = ur
ur = t.node[u].r
if ur == null {
break
}
}
// replace u with ul
un = &t.node[u]
ul := un.l
up := un.p
t.node[up].r = ul
if ul != null {
t.node[ul].p = up
}
// replace v by u
un.l, un.r = l, r
t.node[l].p = u
t.node[r].p = u
*ptr = u
un.p = p
}
// search looks for the node that have the value x or for the nodes that
// brace it. The node highest in the tree with the value x will be
// returned. All other nodes with the same value live in left subtree of
// the returned node.
func (t *binTree) search(v uint32, x uint32) (a, b uint32) {
a, b = null, null
if v == null {
return
}
for {
vn := &t.node[v]
if x <= vn.x {
if x == vn.x {
return v, v
}
b = v
if vn.l == null {
return
}
v = vn.l
} else {
a = v
if vn.r == null {
return
}
v = vn.r
}
}
}
// max returns the node with maximum value in the subtree with v as
// root.
func (t *binTree) max(v uint32) uint32 {
if v == null {
return null
}
for {
r := t.node[v].r
if r == null {
return v
}
v = r
}
}
// min returns the node with the minimum value in the subtree with v as
// root.
func (t *binTree) min(v uint32) uint32 {
if v == null {
return null
}
for {
l := t.node[v].l
if l == null {
return v
}
v = l
}
}
// pred returns the in-order predecessor of node v.
func (t *binTree) pred(v uint32) uint32 {
if v == null {
return null
}
u := t.max(t.node[v].l)
if u != null {
return u
}
for {
p := t.node[v].p
if p == null {
return null
}
if t.node[p].r == v {
return p
}
v = p
}
}
// succ returns the in-order successor of node v.
func (t *binTree) succ(v uint32) uint32 {
if v == null {
return null
}
u := t.min(t.node[v].r)
if u != null {
return u
}
for {
p := t.node[v].p
if p == null {
return null
}
if t.node[p].l == v {
return p
}
v = p
}
}
// xval converts the first four bytes of a into an 32-bit unsigned
// integer in big-endian order.
func xval(a []byte) uint32 {
var x uint32
switch len(a) {
default:
x |= uint32(a[3])
fallthrough
case 3:
x |= uint32(a[2]) << 8
fallthrough
case 2:
x |= uint32(a[1]) << 16
fallthrough
case 1:
x |= uint32(a[0]) << 24
case 0:
}
return x
}
// dumpX converts value x into a four-letter string.
func dumpX(x uint32) string {
a := make([]byte, 4)
for i := 0; i < 4; i++ {
c := byte(x >> uint((3-i)*8))
if unicode.IsGraphic(rune(c)) {
a[i] = c
} else {
a[i] = '.'
}
}
return string(a)
}
// dumpNode writes a representation of the node v into the io.Writer.
func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) {
if v == null {
return
}
vn := &t.node[v]
t.dumpNode(w, vn.r, indent+2)
for i := 0; i < indent; i++ {
fmt.Fprint(w, " ")
}
if vn.p == null {
fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x))
} else {
fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p)
}
t.dumpNode(w, vn.l, indent+2)
}
// dump prints a representation of the binary tree into the writer.
func (t *binTree) dump(w io.Writer) error {
bw := bufio.NewWriter(w)
t.dumpNode(bw, t.root, 0)
return bw.Flush()
}
func (t *binTree) distance(v uint32) int {
dist := int(t.front) - int(v)
if dist <= 0 {
dist += len(t.node)
}
return dist
}
type matchParams struct {
rep [4]uint32
// length when match will be accepted
nAccept int
// nodes to check
check int
// finish if length get shorter
stopShorter bool
}
func (t *binTree) match(m match, distIter func() (int, bool), p matchParams,
) (r match, checked int, accepted bool) {
buf := &t.dict.buf
for {
if checked >= p.check {
return m, checked, true
}
dist, ok := distIter()
if !ok {
return m, checked, false
}
checked++
if m.n > 0 {
i := buf.rear - dist + m.n - 1
if i < 0 {
i += len(buf.data)
} else if i >= len(buf.data) {
i -= len(buf.data)
}
if buf.data[i] != t.data[m.n-1] {
if p.stopShorter {
return m, checked, false
}
continue
}
}
n := buf.matchLen(dist, t.data)
switch n {
case 0:
if p.stopShorter {
return m, checked, false
}
continue
case 1:
if uint32(dist-minDistance) != p.rep[0] {
continue
}
}
if n < m.n || (n == m.n && int64(dist) >= m.distance) {
continue
}
m = match{int64(dist), n}
if n >= p.nAccept {
return m, checked, true
}
}
}
func (t *binTree) NextOp(rep [4]uint32) operation {
// retrieve maxMatchLen data
n, _ := t.dict.buf.Peek(t.data[:maxMatchLen])
if n == 0 {
panic("no data in buffer")
}
t.data = t.data[:n]
var (
m match
x, u, v uint32
iterPred, iterSucc func() (int, bool)
)
p := matchParams{
rep: rep,
nAccept: maxMatchLen,
check: 32,
}
i := 4
iterSmall := func() (dist int, ok bool) {
i--
if i <= 0 {
return 0, false
}
return i, true
}
m, checked, accepted := t.match(m, iterSmall, p)
if accepted {
goto end
}
p.check -= checked
x = xval(t.data)
u, v = t.search(t.root, x)
if u == v && len(t.data) == 4 {
iter := func() (dist int, ok bool) {
if u == null {
return 0, false
}
dist = t.distance(u)
u, v = t.search(t.node[u].l, x)
if u != v {
u = null
}
return dist, true
}
m, _, _ = t.match(m, iter, p)
goto end
}
p.stopShorter = true
iterSucc = func() (dist int, ok bool) {
if v == null {
return 0, false
}
dist = t.distance(v)
v = t.succ(v)
return dist, true
}
m, checked, accepted = t.match(m, iterSucc, p)
if accepted {
goto end
}
p.check -= checked
iterPred = func() (dist int, ok bool) {
if u == null {
return 0, false
}
dist = t.distance(u)
u = t.pred(u)
return dist, true
}
m, _, _ = t.match(m, iterPred, p)
end:
if m.n == 0 {
return lit{t.data[0]}
}
return m
}

Some files were not shown because too many files have changed in this diff Show more