forked from TrueCloudLab/restic
Update chunker
This commit is contained in:
parent
bb7b9ef3fc
commit
3db569c45a
7 changed files with 299 additions and 257 deletions
4
vendor/manifest
vendored
4
vendor/manifest
vendored
|
@ -40,8 +40,8 @@
|
|||
{
|
||||
"importpath": "github.com/restic/chunker",
|
||||
"repository": "https://github.com/restic/chunker",
|
||||
"revision": "fc45043175c38d59374024a38fb7123c40a64f20",
|
||||
"branch": "HEAD"
|
||||
"revision": "16c849a106e0a50d658e8f5e49a01f6728f4f92c",
|
||||
"branch": "master"
|
||||
},
|
||||
{
|
||||
"importpath": "golang.org/x/crypto/pbkdf2",
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
[![GoDoc](https://godoc.org/github.com/restic/chunker?status.svg)](http://godoc.org/github.com/restic/chunker)
|
||||
[![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker)
|
||||
|
||||
Content Defined Chunking (CDC) based on a rolling Rabin Checksum.
|
||||
|
||||
Part of https://github.com/restic/restic.
|
||||
The package `chunker` implements content-defined-chunking (CDC) based on a
|
||||
rolling Rabin Hash. The library is part of the [restic backup
|
||||
program](https://github.com/restic/restic).
|
||||
|
||||
An introduction to Content Defined Chunking can be found in the restic blog
|
||||
post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc/).
|
||||
post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc).
|
||||
|
||||
You can find the API documentation at
|
||||
https://godoc.org/github.com/restic/chunker
|
||||
|
|
110
vendor/src/github.com/restic/chunker/chunker.go
vendored
110
vendor/src/github.com/restic/chunker/chunker.go
vendored
|
@ -2,14 +2,13 @@ package chunker
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"hash"
|
||||
"io"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
KiB = 1024
|
||||
MiB = 1024 * KiB
|
||||
kiB = 1024
|
||||
miB = 1024 * kiB
|
||||
|
||||
// WindowSize is the size of the sliding window.
|
||||
windowSize = 64
|
||||
|
@ -17,20 +16,16 @@ const (
|
|||
// aim to create chunks of 20 bits or about 1MiB on average.
|
||||
averageBits = 20
|
||||
|
||||
// MinSize is the minimal size of a chunk.
|
||||
MinSize = 512 * KiB
|
||||
// MaxSize is the maximal size of a chunk.
|
||||
MaxSize = 8 * MiB
|
||||
// MinSize is the default minimal size of a chunk.
|
||||
MinSize = 512 * kiB
|
||||
// MaxSize is the default maximal size of a chunk.
|
||||
MaxSize = 8 * miB
|
||||
|
||||
splitmask = (1 << averageBits) - 1
|
||||
|
||||
chunkerBufSize = 512 * KiB
|
||||
chunkerBufSize = 512 * kiB
|
||||
)
|
||||
|
||||
var bufPool = sync.Pool{
|
||||
New: func() interface{} { return make([]byte, chunkerBufSize) },
|
||||
}
|
||||
|
||||
type tables struct {
|
||||
out [256]Pol
|
||||
mod [256]Pol
|
||||
|
@ -52,15 +47,13 @@ type Chunk struct {
|
|||
Start uint
|
||||
Length uint
|
||||
Cut uint64
|
||||
Digest []byte
|
||||
}
|
||||
|
||||
func (c Chunk) Reader(r io.ReaderAt) io.Reader {
|
||||
return io.NewSectionReader(r, int64(c.Start), int64(c.Length))
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// Chunker splits content with Rabin Fingerprints.
|
||||
type Chunker struct {
|
||||
MinSize, MaxSize uint
|
||||
|
||||
pol Pol
|
||||
polShift uint
|
||||
tables *tables
|
||||
|
@ -82,17 +75,17 @@ type Chunker struct {
|
|||
pre uint // wait for this many bytes before start calculating an new chunk
|
||||
|
||||
digest uint64
|
||||
h hash.Hash
|
||||
}
|
||||
|
||||
// New returns a new Chunker based on polynomial p that reads from rd
|
||||
// with bufsize and pass all data to hash along the way.
|
||||
func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker {
|
||||
func New(rd io.Reader, pol Pol) *Chunker {
|
||||
c := &Chunker{
|
||||
buf: bufPool.Get().([]byte),
|
||||
h: h,
|
||||
pol: pol,
|
||||
rd: rd,
|
||||
buf: make([]byte, chunkerBufSize),
|
||||
pol: pol,
|
||||
rd: rd,
|
||||
MinSize: MinSize,
|
||||
MaxSize: MaxSize,
|
||||
}
|
||||
|
||||
c.reset()
|
||||
|
@ -100,6 +93,19 @@ func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker {
|
|||
return c
|
||||
}
|
||||
|
||||
// Reset reinitializes the chunker with a new reader and polynomial.
|
||||
func (c *Chunker) Reset(rd io.Reader, pol Pol) {
|
||||
*c = Chunker{
|
||||
buf: c.buf,
|
||||
pol: pol,
|
||||
rd: rd,
|
||||
MinSize: c.MinSize,
|
||||
MaxSize: c.MaxSize,
|
||||
}
|
||||
|
||||
c.reset()
|
||||
}
|
||||
|
||||
func (c *Chunker) reset() {
|
||||
c.polShift = uint(c.pol.Deg() - 8)
|
||||
c.fillTables()
|
||||
|
@ -115,12 +121,8 @@ func (c *Chunker) reset() {
|
|||
c.slide(1)
|
||||
c.start = c.pos
|
||||
|
||||
if c.h != nil {
|
||||
c.h.Reset()
|
||||
}
|
||||
|
||||
// do not start a new chunk unless at least MinSize bytes have been read
|
||||
c.pre = MinSize - windowSize
|
||||
c.pre = c.MinSize - windowSize
|
||||
}
|
||||
|
||||
// Calculate out_table and mod_table for optimization. Must be called only
|
||||
|
@ -179,12 +181,13 @@ func (c *Chunker) fillTables() {
|
|||
}
|
||||
|
||||
// Next returns the position and length of the next chunk of data. If an error
|
||||
// occurs while reading, the error is returned with a nil chunk. The state of
|
||||
// the current chunk is undefined. When the last chunk has been returned, all
|
||||
// subsequent calls yield a nil chunk and an io.EOF error.
|
||||
func (c *Chunker) Next() (*Chunk, error) {
|
||||
// occurs while reading, the error is returned. Afterwards, the state of the
|
||||
// current chunk is undefined. When the last chunk has been returned, all
|
||||
// subsequent calls yield an io.EOF error.
|
||||
func (c *Chunker) Next(data []byte) (Chunk, error) {
|
||||
data = data[:0]
|
||||
if c.tables == nil {
|
||||
return nil, errors.New("polynomial is not set")
|
||||
return Chunk{}, errors.New("polynomial is not set")
|
||||
}
|
||||
|
||||
for {
|
||||
|
@ -203,22 +206,19 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||
if err == io.EOF && !c.closed {
|
||||
c.closed = true
|
||||
|
||||
// return the buffer to the pool
|
||||
bufPool.Put(c.buf)
|
||||
|
||||
// return current chunk, if any bytes have been processed
|
||||
if c.count > 0 {
|
||||
return &Chunk{
|
||||
return Chunk{
|
||||
Start: c.start,
|
||||
Length: c.count,
|
||||
Cut: c.digest,
|
||||
Digest: c.hashDigest(),
|
||||
Data: data,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return Chunk{}, err
|
||||
}
|
||||
|
||||
c.bpos = 0
|
||||
|
@ -230,7 +230,7 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||
n := c.bmax - c.bpos
|
||||
if c.pre > uint(n) {
|
||||
c.pre -= uint(n)
|
||||
c.updateHash(c.buf[c.bpos:c.bmax])
|
||||
data = append(data, c.buf[c.bpos:c.bmax]...)
|
||||
|
||||
c.count += uint(n)
|
||||
c.pos += uint(n)
|
||||
|
@ -239,7 +239,7 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||
continue
|
||||
}
|
||||
|
||||
c.updateHash(c.buf[c.bpos : c.bpos+c.pre])
|
||||
data = append(data, c.buf[c.bpos:c.bpos+c.pre]...)
|
||||
|
||||
c.bpos += c.pre
|
||||
c.count += c.pre
|
||||
|
@ -264,22 +264,22 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||
// end inline
|
||||
|
||||
add++
|
||||
if add < MinSize {
|
||||
if add < c.MinSize {
|
||||
continue
|
||||
}
|
||||
|
||||
if (c.digest&splitmask) == 0 || add >= MaxSize {
|
||||
i := add - c.count - 1
|
||||
c.updateHash(c.buf[c.bpos : c.bpos+uint(i)+1])
|
||||
data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...)
|
||||
c.count = add
|
||||
c.pos += uint(i) + 1
|
||||
c.bpos += uint(i) + 1
|
||||
|
||||
chunk := &Chunk{
|
||||
chunk := Chunk{
|
||||
Start: c.start,
|
||||
Length: c.count,
|
||||
Cut: c.digest,
|
||||
Digest: c.hashDigest(),
|
||||
Data: data,
|
||||
}
|
||||
|
||||
c.reset()
|
||||
|
@ -290,7 +290,7 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||
|
||||
steps := c.bmax - c.bpos
|
||||
if steps > 0 {
|
||||
c.updateHash(c.buf[c.bpos : c.bpos+steps])
|
||||
data = append(data, c.buf[c.bpos:c.bpos+steps]...)
|
||||
}
|
||||
c.count += steps
|
||||
c.pos += steps
|
||||
|
@ -298,24 +298,6 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *Chunker) updateHash(data []byte) {
|
||||
if c.h != nil {
|
||||
// the hashes from crypto/sha* do not return an error
|
||||
_, err := c.h.Write(data)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Chunker) hashDigest() []byte {
|
||||
if c.h == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return c.h.Sum(nil)
|
||||
}
|
||||
|
||||
func (c *Chunker) append(b byte) {
|
||||
index := c.digest >> c.polShift
|
||||
c.digest <<= 8
|
||||
|
|
226
vendor/src/github.com/restic/chunker/chunker_test.go
vendored
226
vendor/src/github.com/restic/chunker/chunker_test.go
vendored
|
@ -1,19 +1,13 @@
|
|||
package chunker_test
|
||||
package chunker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"hash"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
. "github.com/restic/restic/test"
|
||||
)
|
||||
|
||||
func parseDigest(s string) []byte {
|
||||
|
@ -32,7 +26,7 @@ type chunk struct {
|
|||
}
|
||||
|
||||
// polynomial used for all the tests below
|
||||
const testPol = chunker.Pol(0x3DA3358B4DC173)
|
||||
const testPol = Pol(0x3DA3358B4DC173)
|
||||
|
||||
// created for 32MB of random data out of math/rand's Uint32() seeded by
|
||||
// constant 23
|
||||
|
@ -68,59 +62,51 @@ var chunks1 = []chunk{
|
|||
|
||||
// test if nullbytes are correctly split, even if length is a multiple of MinSize.
|
||||
var chunks2 = []chunk{
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
|
||||
}
|
||||
|
||||
func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk {
|
||||
chunks := []*chunker.Chunk{}
|
||||
func testWithData(t *testing.T, chnker *Chunker, testChunks []chunk, checkDigest bool) []Chunk {
|
||||
chunks := []Chunk{}
|
||||
|
||||
pos := uint(0)
|
||||
for i, chunk := range testChunks {
|
||||
c, err := chnker.Next()
|
||||
c, err := chnker.Next(nil)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Error returned with chunk %d: %v", i, err)
|
||||
}
|
||||
|
||||
if c == nil {
|
||||
t.Fatalf("Nil chunk returned")
|
||||
if c.Start != pos {
|
||||
t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
|
||||
i, pos, c.Start)
|
||||
}
|
||||
|
||||
if c != nil {
|
||||
if c.Start != pos {
|
||||
t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
|
||||
i, pos, c.Start)
|
||||
}
|
||||
if c.Length != chunk.Length {
|
||||
t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
|
||||
i, chunk.Length, c.Length)
|
||||
}
|
||||
|
||||
if c.Length != chunk.Length {
|
||||
t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
|
||||
i, chunk.Length, c.Length)
|
||||
}
|
||||
if c.Cut != chunk.CutFP {
|
||||
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
|
||||
i, len(chunks)-1, chunk.CutFP, c.Cut)
|
||||
}
|
||||
|
||||
if c.Cut != chunk.CutFP {
|
||||
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
|
||||
i, len(chunks)-1, chunk.CutFP, c.Cut)
|
||||
}
|
||||
|
||||
if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) {
|
||||
if checkDigest {
|
||||
digest := hashData(c.Data)
|
||||
if !bytes.Equal(chunk.Digest, digest) {
|
||||
t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
|
||||
i, len(chunks)-1, chunk.Digest, c.Digest)
|
||||
i, len(chunks)-1, chunk.Digest, digest)
|
||||
}
|
||||
|
||||
pos += c.Length
|
||||
chunks = append(chunks, c)
|
||||
}
|
||||
|
||||
pos += c.Length
|
||||
chunks = append(chunks, c)
|
||||
}
|
||||
|
||||
c, err := chnker.Next()
|
||||
|
||||
if c != nil {
|
||||
t.Fatal("additional non-nil chunk returned")
|
||||
}
|
||||
|
||||
_, err := chnker.Next(nil)
|
||||
if err != io.EOF {
|
||||
t.Fatal("wrong error returned after last chunk")
|
||||
}
|
||||
|
@ -143,39 +129,32 @@ func getRandom(seed, count int) []byte {
|
|||
return buf
|
||||
}
|
||||
|
||||
func hashData(d []byte) []byte {
|
||||
h := sha256.New()
|
||||
h.Write(d)
|
||||
return h.Sum(nil)
|
||||
}
|
||||
|
||||
func TestChunker(t *testing.T) {
|
||||
// setup data source
|
||||
buf := getRandom(23, 32*1024*1024)
|
||||
ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
|
||||
chunks := testWithData(t, ch, chunks1)
|
||||
|
||||
// test reader
|
||||
for i, c := range chunks {
|
||||
rd := c.Reader(bytes.NewReader(buf))
|
||||
|
||||
h := sha256.New()
|
||||
n, err := io.Copy(h, rd)
|
||||
if err != nil {
|
||||
t.Fatalf("io.Copy(): %v", err)
|
||||
}
|
||||
|
||||
if uint(n) != chunks1[i].Length {
|
||||
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
|
||||
chunks1[i].Length, n)
|
||||
}
|
||||
|
||||
d := h.Sum(nil)
|
||||
if !bytes.Equal(d, chunks1[i].Digest) {
|
||||
t.Fatalf("wrong hash returned: expected %02x, got %02x",
|
||||
chunks1[i].Digest, d)
|
||||
}
|
||||
}
|
||||
ch := New(bytes.NewReader(buf), testPol)
|
||||
testWithData(t, ch, chunks1, true)
|
||||
|
||||
// setup nullbyte data source
|
||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
|
||||
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
|
||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
|
||||
ch = New(bytes.NewReader(buf), testPol)
|
||||
|
||||
testWithData(t, ch, chunks2)
|
||||
testWithData(t, ch, chunks2, true)
|
||||
}
|
||||
|
||||
func TestChunkerReset(t *testing.T) {
|
||||
buf := getRandom(23, 32*1024*1024)
|
||||
ch := New(bytes.NewReader(buf), testPol)
|
||||
testWithData(t, ch, chunks1, true)
|
||||
|
||||
ch.Reset(bytes.NewReader(buf), testPol)
|
||||
testWithData(t, ch, chunks1, true)
|
||||
}
|
||||
|
||||
func TestChunkerWithRandomPolynomial(t *testing.T) {
|
||||
|
@ -184,67 +163,64 @@ func TestChunkerWithRandomPolynomial(t *testing.T) {
|
|||
|
||||
// generate a new random polynomial
|
||||
start := time.Now()
|
||||
p, err := chunker.RandomPolynomial()
|
||||
OK(t, err)
|
||||
p, err := RandomPolynomial()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Logf("generating random polynomial took %v", time.Since(start))
|
||||
|
||||
start = time.Now()
|
||||
ch := chunker.New(bytes.NewReader(buf), p, sha256.New())
|
||||
ch := New(bytes.NewReader(buf), p)
|
||||
t.Logf("creating chunker took %v", time.Since(start))
|
||||
|
||||
// make sure that first chunk is different
|
||||
c, err := ch.Next()
|
||||
c, err := ch.Next(nil)
|
||||
|
||||
Assert(t, c.Cut != chunks1[0].CutFP,
|
||||
"Cut point is the same")
|
||||
Assert(t, c.Length != chunks1[0].Length,
|
||||
"Length is the same")
|
||||
Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest),
|
||||
"Digest is the same")
|
||||
if c.Cut == chunks1[0].CutFP {
|
||||
t.Fatal("Cut point is the same")
|
||||
}
|
||||
|
||||
if c.Length == chunks1[0].Length {
|
||||
t.Fatal("Length is the same")
|
||||
}
|
||||
|
||||
if bytes.Equal(hashData(c.Data), chunks1[0].Digest) {
|
||||
t.Fatal("Digest is the same")
|
||||
}
|
||||
}
|
||||
|
||||
func TestChunkerWithoutHash(t *testing.T) {
|
||||
// setup data source
|
||||
buf := getRandom(23, 32*1024*1024)
|
||||
|
||||
ch := chunker.New(bytes.NewReader(buf), testPol, nil)
|
||||
chunks := testWithData(t, ch, chunks1)
|
||||
ch := New(bytes.NewReader(buf), testPol)
|
||||
chunks := testWithData(t, ch, chunks1, false)
|
||||
|
||||
// test reader
|
||||
for i, c := range chunks {
|
||||
rd := c.Reader(bytes.NewReader(buf))
|
||||
|
||||
buf2, err := ioutil.ReadAll(rd)
|
||||
if err != nil {
|
||||
t.Fatalf("io.Copy(): %v", err)
|
||||
}
|
||||
|
||||
if uint(len(buf2)) != chunks1[i].Length {
|
||||
if uint(len(c.Data)) != chunks1[i].Length {
|
||||
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
|
||||
chunks1[i].Length, uint(len(buf2)))
|
||||
chunks1[i].Length, len(c.Data))
|
||||
}
|
||||
|
||||
if uint(len(buf2)) != chunks1[i].Length {
|
||||
t.Fatalf("wrong number of bytes returned: expected %02x, got %02x",
|
||||
chunks[i].Length, len(buf2))
|
||||
}
|
||||
|
||||
if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) {
|
||||
if !bytes.Equal(buf[c.Start:c.Start+c.Length], c.Data) {
|
||||
t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
|
||||
buf[c.Start:c.Start+c.Length], buf2)
|
||||
buf[c.Start:c.Start+c.Length], c.Data)
|
||||
}
|
||||
}
|
||||
|
||||
// setup nullbyte data source
|
||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
|
||||
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
|
||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
|
||||
ch = New(bytes.NewReader(buf), testPol)
|
||||
|
||||
testWithData(t, ch, chunks2)
|
||||
testWithData(t, ch, chunks2, false)
|
||||
}
|
||||
|
||||
func benchmarkChunker(b *testing.B, hash hash.Hash) {
|
||||
size := 10 * 1024 * 1024
|
||||
func benchmarkChunker(b *testing.B, checkDigest bool) {
|
||||
size := 32 * 1024 * 1024
|
||||
rd := bytes.NewReader(getRandom(23, size))
|
||||
ch := New(rd, testPol)
|
||||
buf := make([]byte, MaxSize)
|
||||
|
||||
b.ResetTimer()
|
||||
b.SetBytes(int64(size))
|
||||
|
@ -253,11 +229,16 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
|
|||
for i := 0; i < b.N; i++ {
|
||||
chunks = 0
|
||||
|
||||
rd.Seek(0, 0)
|
||||
ch := chunker.New(rd, testPol, hash)
|
||||
_, err := rd.Seek(0, 0)
|
||||
if err != nil {
|
||||
b.Fatalf("Seek() return error %v", err)
|
||||
}
|
||||
|
||||
ch.Reset(rd, testPol)
|
||||
|
||||
cur := 0
|
||||
for {
|
||||
_, err := ch.Next()
|
||||
chunk, err := ch.Next(buf)
|
||||
|
||||
if err == io.EOF {
|
||||
break
|
||||
|
@ -267,7 +248,26 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
|
|||
b.Fatalf("Unexpected error occurred: %v", err)
|
||||
}
|
||||
|
||||
if chunk.Length != chunks1[cur].Length {
|
||||
b.Errorf("wrong chunk length, want %d, got %d",
|
||||
chunks1[cur].Length, chunk.Length)
|
||||
}
|
||||
|
||||
if chunk.Cut != chunks1[cur].CutFP {
|
||||
b.Errorf("wrong cut fingerprint, want 0x%x, got 0x%x",
|
||||
chunks1[cur].CutFP, chunk.Cut)
|
||||
}
|
||||
|
||||
if checkDigest {
|
||||
h := hashData(chunk.Data)
|
||||
if !bytes.Equal(h, chunks1[cur].Digest) {
|
||||
b.Errorf("wrong digest, want %x, got %x",
|
||||
chunks1[cur].Digest, h)
|
||||
}
|
||||
}
|
||||
|
||||
chunks++
|
||||
cur++
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -275,24 +275,22 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
|
|||
}
|
||||
|
||||
func BenchmarkChunkerWithSHA256(b *testing.B) {
|
||||
benchmarkChunker(b, sha256.New())
|
||||
}
|
||||
|
||||
func BenchmarkChunkerWithMD5(b *testing.B) {
|
||||
benchmarkChunker(b, md5.New())
|
||||
benchmarkChunker(b, true)
|
||||
}
|
||||
|
||||
func BenchmarkChunker(b *testing.B) {
|
||||
benchmarkChunker(b, nil)
|
||||
benchmarkChunker(b, false)
|
||||
}
|
||||
|
||||
func BenchmarkNewChunker(b *testing.B) {
|
||||
p, err := chunker.RandomPolynomial()
|
||||
OK(b, err)
|
||||
p, err := RandomPolynomial()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
chunker.New(bytes.NewBuffer(nil), p, nil)
|
||||
New(bytes.NewBuffer(nil), p)
|
||||
}
|
||||
}
|
||||
|
|
39
vendor/src/github.com/restic/chunker/example_test.go
vendored
Normal file
39
vendor/src/github.com/restic/chunker/example_test.go
vendored
Normal file
|
@ -0,0 +1,39 @@
|
|||
package chunker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
func ExampleChunker() {
|
||||
// generate 32MiB of deterministic pseudo-random data
|
||||
data := getRandom(23, 32*1024*1024)
|
||||
|
||||
// create a chunker
|
||||
chunker := New(bytes.NewReader(data), Pol(0x3DA3358B4DC173))
|
||||
|
||||
// reuse this buffer
|
||||
buf := make([]byte, 8*1024*1024)
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
chunk, err := chunker.Next(buf)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
fmt.Printf("%d %02x\n", chunk.Length, sha256.Sum256(chunk.Data))
|
||||
}
|
||||
|
||||
// Output:
|
||||
// 2163460 4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d
|
||||
// 643703 5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407
|
||||
// 1528956 a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba
|
||||
// 1955808 c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824
|
||||
// 2222372 6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56
|
||||
}
|
|
@ -258,13 +258,15 @@ func qp(p uint, g Pol) Pol {
|
|||
return res.Add(2).Mod(g)
|
||||
}
|
||||
|
||||
func (p Pol) MarshalJSON() ([]byte, error) {
|
||||
buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16)
|
||||
// MarshalJSON returns the JSON representation of the Pol.
|
||||
func (x Pol) MarshalJSON() ([]byte, error) {
|
||||
buf := strconv.AppendUint([]byte{'"'}, uint64(x), 16)
|
||||
buf = append(buf, '"')
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func (p *Pol) UnmarshalJSON(data []byte) error {
|
||||
// UnmarshalJSON parses a Pol from the JSON data.
|
||||
func (x *Pol) UnmarshalJSON(data []byte) error {
|
||||
if len(data) < 2 {
|
||||
return errors.New("invalid string for polynomial")
|
||||
}
|
||||
|
@ -272,7 +274,7 @@ func (p *Pol) UnmarshalJSON(data []byte) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*p = Pol(n)
|
||||
*x = Pol(n)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,16 +1,13 @@
|
|||
package chunker_test
|
||||
package chunker
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/chunker"
|
||||
. "github.com/restic/restic/test"
|
||||
)
|
||||
|
||||
var polAddTests = []struct {
|
||||
x, y chunker.Pol
|
||||
sum chunker.Pol
|
||||
x, y Pol
|
||||
sum Pol
|
||||
}{
|
||||
{23, 16, 23 ^ 16},
|
||||
{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
|
||||
|
@ -18,24 +15,29 @@ var polAddTests = []struct {
|
|||
}
|
||||
|
||||
func TestPolAdd(t *testing.T) {
|
||||
for _, test := range polAddTests {
|
||||
Equals(t, test.sum, test.x.Add(test.y))
|
||||
Equals(t, test.sum, test.y.Add(test.x))
|
||||
for i, test := range polAddTests {
|
||||
if test.sum != test.x.Add(test.y) {
|
||||
t.Errorf("test %d failed: sum != x+y", i)
|
||||
}
|
||||
|
||||
if test.sum != test.y.Add(test.x) {
|
||||
t.Errorf("test %d failed: sum != y+x", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseBin(s string) chunker.Pol {
|
||||
func parseBin(s string) Pol {
|
||||
i, err := strconv.ParseUint(s, 2, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return chunker.Pol(i)
|
||||
return Pol(i)
|
||||
}
|
||||
|
||||
var polMulTests = []struct {
|
||||
x, y chunker.Pol
|
||||
res chunker.Pol
|
||||
x, y Pol
|
||||
res Pol
|
||||
}{
|
||||
{1, 2, 2},
|
||||
{
|
||||
|
@ -78,13 +80,15 @@ var polMulTests = []struct {
|
|||
func TestPolMul(t *testing.T) {
|
||||
for i, test := range polMulTests {
|
||||
m := test.x.Mul(test.y)
|
||||
Assert(t, test.res == m,
|
||||
"TestPolMul failed for test %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
if test.res != m {
|
||||
t.Errorf("TestPolMul failed for test %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
}
|
||||
m = test.y.Mul(test.x)
|
||||
Assert(t, test.res == test.y.Mul(test.x),
|
||||
"TestPolMul failed for %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
if test.res != test.y.Mul(test.x) {
|
||||
t.Errorf("TestPolMul failed for %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -95,21 +99,21 @@ func TestPolMulOverflow(t *testing.T) {
|
|||
|
||||
if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
|
||||
return
|
||||
} else {
|
||||
t.Logf("invalid error raised: %v", err)
|
||||
// re-raise error if not overflow
|
||||
panic(err)
|
||||
}
|
||||
|
||||
t.Logf("invalid error raised: %v", err)
|
||||
// re-raise error if not overflow
|
||||
panic(err)
|
||||
}()
|
||||
|
||||
x := chunker.Pol(1 << 63)
|
||||
x := Pol(1 << 63)
|
||||
x.Mul(2)
|
||||
t.Fatal("overflow test did not panic")
|
||||
}
|
||||
|
||||
var polDivTests = []struct {
|
||||
x, y chunker.Pol
|
||||
res chunker.Pol
|
||||
x, y Pol
|
||||
res Pol
|
||||
}{
|
||||
{10, 50, 0},
|
||||
{0, 1, 0},
|
||||
|
@ -139,15 +143,16 @@ var polDivTests = []struct {
|
|||
func TestPolDiv(t *testing.T) {
|
||||
for i, test := range polDivTests {
|
||||
m := test.x.Div(test.y)
|
||||
Assert(t, test.res == m,
|
||||
"TestPolDiv failed for test %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
if test.res != m {
|
||||
t.Errorf("TestPolDiv failed for test %d: %v * %v: want %v, got %v",
|
||||
i, test.x, test.y, test.res, m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var polModTests = []struct {
|
||||
x, y chunker.Pol
|
||||
res chunker.Pol
|
||||
x, y Pol
|
||||
res Pol
|
||||
}{
|
||||
{10, 50, 10},
|
||||
{0, 1, 0},
|
||||
|
@ -175,14 +180,17 @@ var polModTests = []struct {
|
|||
}
|
||||
|
||||
func TestPolModt(t *testing.T) {
|
||||
for _, test := range polModTests {
|
||||
Equals(t, test.res, test.x.Mod(test.y))
|
||||
for i, test := range polModTests {
|
||||
res := test.x.Mod(test.y)
|
||||
if test.res != res {
|
||||
t.Errorf("test %d failed: want %v, got %v", i, test.res, res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPolDivMod(t *testing.B) {
|
||||
f := chunker.Pol(0x2482734cacca49)
|
||||
g := chunker.Pol(0x3af4b284899)
|
||||
f := Pol(0x2482734cacca49)
|
||||
g := Pol(0x3af4b284899)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
g.DivMod(f)
|
||||
|
@ -190,8 +198,8 @@ func BenchmarkPolDivMod(t *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkPolDiv(t *testing.B) {
|
||||
f := chunker.Pol(0x2482734cacca49)
|
||||
g := chunker.Pol(0x3af4b284899)
|
||||
f := Pol(0x2482734cacca49)
|
||||
g := Pol(0x3af4b284899)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
g.Div(f)
|
||||
|
@ -199,8 +207,8 @@ func BenchmarkPolDiv(t *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkPolMod(t *testing.B) {
|
||||
f := chunker.Pol(0x2482734cacca49)
|
||||
g := chunker.Pol(0x3af4b284899)
|
||||
f := Pol(0x2482734cacca49)
|
||||
g := Pol(0x3af4b284899)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
g.Mod(f)
|
||||
|
@ -208,7 +216,7 @@ func BenchmarkPolMod(t *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkPolDeg(t *testing.B) {
|
||||
f := chunker.Pol(0x3af4b284899)
|
||||
f := Pol(0x3af4b284899)
|
||||
d := f.Deg()
|
||||
if d != 41 {
|
||||
t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
|
||||
|
@ -221,25 +229,31 @@ func BenchmarkPolDeg(t *testing.B) {
|
|||
}
|
||||
|
||||
func TestRandomPolynomial(t *testing.T) {
|
||||
_, err := chunker.RandomPolynomial()
|
||||
OK(t, err)
|
||||
_, err := RandomPolynomial()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRandomPolynomial(t *testing.B) {
|
||||
for i := 0; i < t.N; i++ {
|
||||
_, err := chunker.RandomPolynomial()
|
||||
OK(t, err)
|
||||
_, err := RandomPolynomial()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpandPolynomial(t *testing.T) {
|
||||
pol := chunker.Pol(0x3DA3358B4DC173)
|
||||
pol := Pol(0x3DA3358B4DC173)
|
||||
s := pol.Expand()
|
||||
Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s)
|
||||
if s != "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1" {
|
||||
t.Fatal("wrong result")
|
||||
}
|
||||
}
|
||||
|
||||
var polIrredTests = []struct {
|
||||
f chunker.Pol
|
||||
f Pol
|
||||
irred bool
|
||||
}{
|
||||
{0x38f1e565e288df, false},
|
||||
|
@ -270,15 +284,16 @@ var polIrredTests = []struct {
|
|||
|
||||
func TestPolIrreducible(t *testing.T) {
|
||||
for _, test := range polIrredTests {
|
||||
Assert(t, test.f.Irreducible() == test.irred,
|
||||
"Irreducibility test for Polynomial %v failed: got %v, wanted %v",
|
||||
test.f, test.f.Irreducible(), test.irred)
|
||||
if test.f.Irreducible() != test.irred {
|
||||
t.Errorf("Irreducibility test for Polynomial %v failed: got %v, wanted %v",
|
||||
test.f, test.f.Irreducible(), test.irred)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkPolIrreducible(b *testing.B) {
|
||||
// find first irreducible polynomial
|
||||
var pol chunker.Pol
|
||||
var pol Pol
|
||||
for _, test := range polIrredTests {
|
||||
if test.irred {
|
||||
pol = test.f
|
||||
|
@ -287,15 +302,16 @@ func BenchmarkPolIrreducible(b *testing.B) {
|
|||
}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
Assert(b, pol.Irreducible(),
|
||||
"Irreducibility test for Polynomial %v failed", pol)
|
||||
if !pol.Irreducible() {
|
||||
b.Errorf("Irreducibility test for Polynomial %v failed", pol)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var polGCDTests = []struct {
|
||||
f1 chunker.Pol
|
||||
f2 chunker.Pol
|
||||
gcd chunker.Pol
|
||||
f1 Pol
|
||||
f2 Pol
|
||||
gcd Pol
|
||||
}{
|
||||
{10, 50, 2},
|
||||
{0, 1, 1},
|
||||
|
@ -345,21 +361,24 @@ var polGCDTests = []struct {
|
|||
func TestPolGCD(t *testing.T) {
|
||||
for i, test := range polGCDTests {
|
||||
gcd := test.f1.GCD(test.f2)
|
||||
Assert(t, test.gcd == gcd,
|
||||
"GCD test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, gcd, test.gcd)
|
||||
if test.gcd != gcd {
|
||||
t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, gcd, test.gcd)
|
||||
}
|
||||
|
||||
gcd = test.f2.GCD(test.f1)
|
||||
Assert(t, test.gcd == gcd,
|
||||
"GCD test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, gcd, test.gcd)
|
||||
if test.gcd != gcd {
|
||||
t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, gcd, test.gcd)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var polMulModTests = []struct {
|
||||
f1 chunker.Pol
|
||||
f2 chunker.Pol
|
||||
g chunker.Pol
|
||||
mod chunker.Pol
|
||||
f1 Pol
|
||||
f2 Pol
|
||||
g Pol
|
||||
mod Pol
|
||||
}{
|
||||
{
|
||||
0x1230,
|
||||
|
@ -378,8 +397,9 @@ var polMulModTests = []struct {
|
|||
func TestPolMulMod(t *testing.T) {
|
||||
for i, test := range polMulModTests {
|
||||
mod := test.f1.MulMod(test.f2, test.g)
|
||||
Assert(t, mod == test.mod,
|
||||
"MulMod test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, mod, test.mod)
|
||||
if mod != test.mod {
|
||||
t.Errorf("MulMod test %d (%+v) failed: got %v, wanted %v",
|
||||
i, test, mod, test.mod)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue