Merge pull request #472 from restic/update-chunker

Update chunker
This commit is contained in:
Alexander Neumann 2016-02-24 21:25:15 +01:00
commit 77d85cee52
11 changed files with 352 additions and 303 deletions

View file

@ -1,7 +1,7 @@
package restic
import (
"crypto/sha256"
"bytes"
"encoding/json"
"fmt"
"io"
@ -11,13 +11,14 @@ import (
"sync"
"time"
"github.com/restic/chunker"
"restic/backend"
"restic/debug"
"restic/pack"
"restic/pipe"
"restic/repository"
"github.com/restic/chunker"
"github.com/juju/errors"
)
@ -154,12 +155,11 @@ type saveResult struct {
bytes uint64
}
func (arch *Archiver) saveChunk(chunk *chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) {
hash := chunk.Digest
id := backend.ID{}
copy(id[:], hash)
func (arch *Archiver) saveChunk(chunk chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) {
defer freeBuf(chunk.Data)
err := arch.Save(pack.Data, id, chunk.Length, chunk.Reader(file))
id := backend.Hash(chunk.Data)
err := arch.Save(pack.Data, id, chunk.Length, bytes.NewReader(chunk.Data))
// TODO handle error
if err != nil {
panic(err)
@ -220,11 +220,11 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error {
return err
}
chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial, sha256.New())
chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial)
resultChannels := [](<-chan saveResult){}
for {
chunk, err := chnker.Next()
chunk, err := chnker.Next(getBuf())
if err == io.EOF {
break
}

View file

@ -2,12 +2,10 @@ package restic_test
import (
"bytes"
"crypto/sha256"
"io"
"testing"
"time"
"github.com/restic/chunker"
"restic"
"restic/backend"
"restic/checker"
@ -15,6 +13,8 @@ import (
"restic/pack"
"restic/repository"
. "restic/test"
"github.com/restic/chunker"
)
var testPol = chunker.Pol(0x3DA3358B4DC173)
@ -24,17 +24,12 @@ type Rdr interface {
io.ReaderAt
}
type chunkedData struct {
buf []byte
chunks []*chunker.Chunk
}
func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) {
rd.Seek(0, 0)
ch := chunker.New(rd, testPol, sha256.New())
ch := chunker.New(rd, testPol)
for {
chunk, err := ch.Next()
chunk, err := ch.Next(buf)
if err == io.EOF {
break
@ -43,12 +38,10 @@ func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.K
OK(b, err)
// reduce length of buf
buf = buf[:chunk.Length]
n, err := io.ReadFull(chunk.Reader(rd), buf)
OK(b, err)
Assert(b, uint(n) == chunk.Length, "invalid length: got %d, expected %d", n, chunk.Length)
Assert(b, uint(len(chunk.Data)) == chunk.Length,
"invalid length: got %d, expected %d", len(chunk.Data), chunk.Length)
_, err = crypto.Encrypt(key, buf2, buf)
_, err = crypto.Encrypt(key, buf2, chunk.Data)
OK(b, err)
}
}
@ -72,18 +65,16 @@ func BenchmarkChunkEncrypt(b *testing.B) {
}
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) {
ch := chunker.New(rd, testPol, sha256.New())
ch := chunker.New(rd, testPol)
for {
chunk, err := ch.Next()
chunk, err := ch.Next(buf)
if err == io.EOF {
break
}
// reduce length of chunkBuf
buf = buf[:chunk.Length]
io.ReadFull(chunk.Reader(rd), buf)
crypto.Encrypt(key, buf, buf)
crypto.Encrypt(key, chunk.Data, chunk.Data)
}
}
@ -258,8 +249,7 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
duplication := 7
arch := restic.NewArchiver(repo)
data, chunks := getRandomData(seed, dataSizeMb*1024*1024)
reader := bytes.NewReader(data)
chunks := getRandomData(seed, dataSizeMb*1024*1024)
errChannels := [](<-chan error){}
@ -272,18 +262,15 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
errChan := make(chan error)
errChannels = append(errChannels, errChan)
go func(reader *bytes.Reader, c *chunker.Chunk, errChan chan<- error) {
go func(c chunker.Chunk, errChan chan<- error) {
barrier <- struct{}{}
hash := c.Digest
id := backend.ID{}
copy(id[:], hash)
time.Sleep(time.Duration(hash[0]))
err := arch.Save(pack.Data, id, c.Length, c.Reader(reader))
id := backend.Hash(c.Data)
time.Sleep(time.Duration(id[0]))
err := arch.Save(pack.Data, id, c.Length, bytes.NewReader(c.Data))
<-barrier
errChan <- err
}(reader, c, errChan)
}(c, errChan)
}
}
@ -298,20 +285,20 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
assertNoUnreferencedPacks(t, chkr)
}
func getRandomData(seed int, size int) ([]byte, []*chunker.Chunk) {
func getRandomData(seed int, size int) []chunker.Chunk {
buf := Random(seed, size)
chunks := []*chunker.Chunk{}
chunker := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
var chunks []chunker.Chunk
chunker := chunker.New(bytes.NewReader(buf), testPol)
for {
c, err := chunker.Next()
c, err := chunker.Next(nil)
if err == io.EOF {
break
}
chunks = append(chunks, c)
}
return buf, chunks
return chunks
}
func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker {

21
src/restic/buffer_pool.go Normal file
View file

@ -0,0 +1,21 @@
package restic
import (
"sync"
"github.com/restic/chunker"
)
var bufPool = sync.Pool{
New: func() interface{} {
return make([]byte, chunker.MinSize)
},
}
func getBuf() []byte {
return bufPool.Get().([]byte)
}
func freeBuf(data []byte) {
bufPool.Put(data)
}

View file

@ -3,7 +3,6 @@ package repository
import (
"sync"
"github.com/restic/chunker"
"restic/backend"
"restic/crypto"
"restic/debug"
@ -18,8 +17,8 @@ type packerManager struct {
packs []*pack.Packer
}
const minPackSize = 4 * chunker.MiB
const maxPackSize = 16 * chunker.MiB
const minPackSize = 4 * 1024 * 1024
const maxPackSize = 16 * 1024 * 1024
const maxPackers = 200
// findPacker returns a packer for a new blob of size bytes. Either a new one is

4
vendor/manifest vendored
View file

@ -40,8 +40,8 @@
{
"importpath": "github.com/restic/chunker",
"repository": "https://github.com/restic/chunker",
"revision": "fc45043175c38d59374024a38fb7123c40a64f20",
"branch": "HEAD"
"revision": "16c849a106e0a50d658e8f5e49a01f6728f4f92c",
"branch": "master"
},
{
"importpath": "golang.org/x/crypto/pbkdf2",

View file

@ -1,11 +1,12 @@
[![GoDoc](https://godoc.org/github.com/restic/chunker?status.svg)](http://godoc.org/github.com/restic/chunker)
[![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker)
Content Defined Chunking (CDC) based on a rolling Rabin Checksum.
Part of https://github.com/restic/restic.
The package `chunker` implements content-defined-chunking (CDC) based on a
rolling Rabin Hash. The library is part of the [restic backup
program](https://github.com/restic/restic).
An introduction to Content Defined Chunking can be found in the restic blog
post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc/).
post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc).
You can find the API documentation at
https://godoc.org/github.com/restic/chunker

View file

@ -2,14 +2,13 @@ package chunker
import (
"errors"
"hash"
"io"
"sync"
)
const (
KiB = 1024
MiB = 1024 * KiB
kiB = 1024
miB = 1024 * kiB
// WindowSize is the size of the sliding window.
windowSize = 64
@ -17,20 +16,16 @@ const (
// aim to create chunks of 20 bits or about 1MiB on average.
averageBits = 20
// MinSize is the minimal size of a chunk.
MinSize = 512 * KiB
// MaxSize is the maximal size of a chunk.
MaxSize = 8 * MiB
// MinSize is the default minimal size of a chunk.
MinSize = 512 * kiB
// MaxSize is the default maximal size of a chunk.
MaxSize = 8 * miB
splitmask = (1 << averageBits) - 1
chunkerBufSize = 512 * KiB
chunkerBufSize = 512 * kiB
)
var bufPool = sync.Pool{
New: func() interface{} { return make([]byte, chunkerBufSize) },
}
type tables struct {
out [256]Pol
mod [256]Pol
@ -52,15 +47,13 @@ type Chunk struct {
Start uint
Length uint
Cut uint64
Digest []byte
}
func (c Chunk) Reader(r io.ReaderAt) io.Reader {
return io.NewSectionReader(r, int64(c.Start), int64(c.Length))
Data []byte
}
// Chunker splits content with Rabin Fingerprints.
type Chunker struct {
MinSize, MaxSize uint
pol Pol
polShift uint
tables *tables
@ -82,17 +75,17 @@ type Chunker struct {
pre uint // wait for this many bytes before start calculating an new chunk
digest uint64
h hash.Hash
}
// New returns a new Chunker based on polynomial p that reads from rd
// with bufsize and pass all data to hash along the way.
func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker {
func New(rd io.Reader, pol Pol) *Chunker {
c := &Chunker{
buf: bufPool.Get().([]byte),
h: h,
pol: pol,
rd: rd,
buf: make([]byte, chunkerBufSize),
pol: pol,
rd: rd,
MinSize: MinSize,
MaxSize: MaxSize,
}
c.reset()
@ -100,6 +93,19 @@ func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker {
return c
}
// Reset reinitializes the chunker with a new reader and polynomial.
func (c *Chunker) Reset(rd io.Reader, pol Pol) {
*c = Chunker{
buf: c.buf,
pol: pol,
rd: rd,
MinSize: c.MinSize,
MaxSize: c.MaxSize,
}
c.reset()
}
func (c *Chunker) reset() {
c.polShift = uint(c.pol.Deg() - 8)
c.fillTables()
@ -115,12 +121,8 @@ func (c *Chunker) reset() {
c.slide(1)
c.start = c.pos
if c.h != nil {
c.h.Reset()
}
// do not start a new chunk unless at least MinSize bytes have been read
c.pre = MinSize - windowSize
c.pre = c.MinSize - windowSize
}
// Calculate out_table and mod_table for optimization. Must be called only
@ -179,12 +181,13 @@ func (c *Chunker) fillTables() {
}
// Next returns the position and length of the next chunk of data. If an error
// occurs while reading, the error is returned with a nil chunk. The state of
// the current chunk is undefined. When the last chunk has been returned, all
// subsequent calls yield a nil chunk and an io.EOF error.
func (c *Chunker) Next() (*Chunk, error) {
// occurs while reading, the error is returned. Afterwards, the state of the
// current chunk is undefined. When the last chunk has been returned, all
// subsequent calls yield an io.EOF error.
func (c *Chunker) Next(data []byte) (Chunk, error) {
data = data[:0]
if c.tables == nil {
return nil, errors.New("polynomial is not set")
return Chunk{}, errors.New("polynomial is not set")
}
for {
@ -203,22 +206,19 @@ func (c *Chunker) Next() (*Chunk, error) {
if err == io.EOF && !c.closed {
c.closed = true
// return the buffer to the pool
bufPool.Put(c.buf)
// return current chunk, if any bytes have been processed
if c.count > 0 {
return &Chunk{
return Chunk{
Start: c.start,
Length: c.count,
Cut: c.digest,
Digest: c.hashDigest(),
Data: data,
}, nil
}
}
if err != nil {
return nil, err
return Chunk{}, err
}
c.bpos = 0
@ -230,7 +230,7 @@ func (c *Chunker) Next() (*Chunk, error) {
n := c.bmax - c.bpos
if c.pre > uint(n) {
c.pre -= uint(n)
c.updateHash(c.buf[c.bpos:c.bmax])
data = append(data, c.buf[c.bpos:c.bmax]...)
c.count += uint(n)
c.pos += uint(n)
@ -239,7 +239,7 @@ func (c *Chunker) Next() (*Chunk, error) {
continue
}
c.updateHash(c.buf[c.bpos : c.bpos+c.pre])
data = append(data, c.buf[c.bpos:c.bpos+c.pre]...)
c.bpos += c.pre
c.count += c.pre
@ -264,22 +264,22 @@ func (c *Chunker) Next() (*Chunk, error) {
// end inline
add++
if add < MinSize {
if add < c.MinSize {
continue
}
if (c.digest&splitmask) == 0 || add >= MaxSize {
i := add - c.count - 1
c.updateHash(c.buf[c.bpos : c.bpos+uint(i)+1])
data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...)
c.count = add
c.pos += uint(i) + 1
c.bpos += uint(i) + 1
chunk := &Chunk{
chunk := Chunk{
Start: c.start,
Length: c.count,
Cut: c.digest,
Digest: c.hashDigest(),
Data: data,
}
c.reset()
@ -290,7 +290,7 @@ func (c *Chunker) Next() (*Chunk, error) {
steps := c.bmax - c.bpos
if steps > 0 {
c.updateHash(c.buf[c.bpos : c.bpos+steps])
data = append(data, c.buf[c.bpos:c.bpos+steps]...)
}
c.count += steps
c.pos += steps
@ -298,24 +298,6 @@ func (c *Chunker) Next() (*Chunk, error) {
}
}
func (c *Chunker) updateHash(data []byte) {
if c.h != nil {
// the hashes from crypto/sha* do not return an error
_, err := c.h.Write(data)
if err != nil {
panic(err)
}
}
}
func (c *Chunker) hashDigest() []byte {
if c.h == nil {
return nil
}
return c.h.Sum(nil)
}
func (c *Chunker) append(b byte) {
index := c.digest >> c.polShift
c.digest <<= 8

View file

@ -1,19 +1,13 @@
package chunker_test
package chunker
import (
"bytes"
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"hash"
"io"
"io/ioutil"
"math/rand"
"testing"
"time"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
)
func parseDigest(s string) []byte {
@ -32,7 +26,7 @@ type chunk struct {
}
// polynomial used for all the tests below
const testPol = chunker.Pol(0x3DA3358B4DC173)
const testPol = Pol(0x3DA3358B4DC173)
// created for 32MB of random data out of math/rand's Uint32() seeded by
// constant 23
@ -68,59 +62,51 @@ var chunks1 = []chunk{
// test if nullbytes are correctly split, even if length is a multiple of MinSize.
var chunks2 = []chunk{
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
}
func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk {
chunks := []*chunker.Chunk{}
func testWithData(t *testing.T, chnker *Chunker, testChunks []chunk, checkDigest bool) []Chunk {
chunks := []Chunk{}
pos := uint(0)
for i, chunk := range testChunks {
c, err := chnker.Next()
c, err := chnker.Next(nil)
if err != nil {
t.Fatalf("Error returned with chunk %d: %v", i, err)
}
if c == nil {
t.Fatalf("Nil chunk returned")
if c.Start != pos {
t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
i, pos, c.Start)
}
if c != nil {
if c.Start != pos {
t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
i, pos, c.Start)
}
if c.Length != chunk.Length {
t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
i, chunk.Length, c.Length)
}
if c.Length != chunk.Length {
t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
i, chunk.Length, c.Length)
}
if c.Cut != chunk.CutFP {
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
i, len(chunks)-1, chunk.CutFP, c.Cut)
}
if c.Cut != chunk.CutFP {
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
i, len(chunks)-1, chunk.CutFP, c.Cut)
}
if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) {
if checkDigest {
digest := hashData(c.Data)
if !bytes.Equal(chunk.Digest, digest) {
t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
i, len(chunks)-1, chunk.Digest, c.Digest)
i, len(chunks)-1, chunk.Digest, digest)
}
pos += c.Length
chunks = append(chunks, c)
}
pos += c.Length
chunks = append(chunks, c)
}
c, err := chnker.Next()
if c != nil {
t.Fatal("additional non-nil chunk returned")
}
_, err := chnker.Next(nil)
if err != io.EOF {
t.Fatal("wrong error returned after last chunk")
}
@ -143,39 +129,32 @@ func getRandom(seed, count int) []byte {
return buf
}
func hashData(d []byte) []byte {
h := sha256.New()
h.Write(d)
return h.Sum(nil)
}
func TestChunker(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
chunks := testWithData(t, ch, chunks1)
// test reader
for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf))
h := sha256.New()
n, err := io.Copy(h, rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(n) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, n)
}
d := h.Sum(nil)
if !bytes.Equal(d, chunks1[i].Digest) {
t.Fatalf("wrong hash returned: expected %02x, got %02x",
chunks1[i].Digest, d)
}
}
ch := New(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks1, true)
// setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
ch = New(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks2)
testWithData(t, ch, chunks2, true)
}
func TestChunkerReset(t *testing.T) {
buf := getRandom(23, 32*1024*1024)
ch := New(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks1, true)
ch.Reset(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks1, true)
}
func TestChunkerWithRandomPolynomial(t *testing.T) {
@ -184,67 +163,64 @@ func TestChunkerWithRandomPolynomial(t *testing.T) {
// generate a new random polynomial
start := time.Now()
p, err := chunker.RandomPolynomial()
OK(t, err)
p, err := RandomPolynomial()
if err != nil {
t.Fatal(err)
}
t.Logf("generating random polynomial took %v", time.Since(start))
start = time.Now()
ch := chunker.New(bytes.NewReader(buf), p, sha256.New())
ch := New(bytes.NewReader(buf), p)
t.Logf("creating chunker took %v", time.Since(start))
// make sure that first chunk is different
c, err := ch.Next()
c, err := ch.Next(nil)
Assert(t, c.Cut != chunks1[0].CutFP,
"Cut point is the same")
Assert(t, c.Length != chunks1[0].Length,
"Length is the same")
Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest),
"Digest is the same")
if c.Cut == chunks1[0].CutFP {
t.Fatal("Cut point is the same")
}
if c.Length == chunks1[0].Length {
t.Fatal("Length is the same")
}
if bytes.Equal(hashData(c.Data), chunks1[0].Digest) {
t.Fatal("Digest is the same")
}
}
func TestChunkerWithoutHash(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, nil)
chunks := testWithData(t, ch, chunks1)
ch := New(bytes.NewReader(buf), testPol)
chunks := testWithData(t, ch, chunks1, false)
// test reader
for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf))
buf2, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(len(buf2)) != chunks1[i].Length {
if uint(len(c.Data)) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, uint(len(buf2)))
chunks1[i].Length, len(c.Data))
}
if uint(len(buf2)) != chunks1[i].Length {
t.Fatalf("wrong number of bytes returned: expected %02x, got %02x",
chunks[i].Length, len(buf2))
}
if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) {
if !bytes.Equal(buf[c.Start:c.Start+c.Length], c.Data) {
t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
buf[c.Start:c.Start+c.Length], buf2)
buf[c.Start:c.Start+c.Length], c.Data)
}
}
// setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
ch = New(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks2)
testWithData(t, ch, chunks2, false)
}
func benchmarkChunker(b *testing.B, hash hash.Hash) {
size := 10 * 1024 * 1024
func benchmarkChunker(b *testing.B, checkDigest bool) {
size := 32 * 1024 * 1024
rd := bytes.NewReader(getRandom(23, size))
ch := New(rd, testPol)
buf := make([]byte, MaxSize)
b.ResetTimer()
b.SetBytes(int64(size))
@ -253,11 +229,16 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
for i := 0; i < b.N; i++ {
chunks = 0
rd.Seek(0, 0)
ch := chunker.New(rd, testPol, hash)
_, err := rd.Seek(0, 0)
if err != nil {
b.Fatalf("Seek() return error %v", err)
}
ch.Reset(rd, testPol)
cur := 0
for {
_, err := ch.Next()
chunk, err := ch.Next(buf)
if err == io.EOF {
break
@ -267,7 +248,26 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
b.Fatalf("Unexpected error occurred: %v", err)
}
if chunk.Length != chunks1[cur].Length {
b.Errorf("wrong chunk length, want %d, got %d",
chunks1[cur].Length, chunk.Length)
}
if chunk.Cut != chunks1[cur].CutFP {
b.Errorf("wrong cut fingerprint, want 0x%x, got 0x%x",
chunks1[cur].CutFP, chunk.Cut)
}
if checkDigest {
h := hashData(chunk.Data)
if !bytes.Equal(h, chunks1[cur].Digest) {
b.Errorf("wrong digest, want %x, got %x",
chunks1[cur].Digest, h)
}
}
chunks++
cur++
}
}
@ -275,24 +275,22 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
}
func BenchmarkChunkerWithSHA256(b *testing.B) {
benchmarkChunker(b, sha256.New())
}
func BenchmarkChunkerWithMD5(b *testing.B) {
benchmarkChunker(b, md5.New())
benchmarkChunker(b, true)
}
func BenchmarkChunker(b *testing.B) {
benchmarkChunker(b, nil)
benchmarkChunker(b, false)
}
func BenchmarkNewChunker(b *testing.B) {
p, err := chunker.RandomPolynomial()
OK(b, err)
p, err := RandomPolynomial()
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
chunker.New(bytes.NewBuffer(nil), p, nil)
New(bytes.NewBuffer(nil), p)
}
}

View file

@ -0,0 +1,39 @@
package chunker
import (
"bytes"
"crypto/sha256"
"fmt"
"io"
)
func ExampleChunker() {
// generate 32MiB of deterministic pseudo-random data
data := getRandom(23, 32*1024*1024)
// create a chunker
chunker := New(bytes.NewReader(data), Pol(0x3DA3358B4DC173))
// reuse this buffer
buf := make([]byte, 8*1024*1024)
for i := 0; i < 5; i++ {
chunk, err := chunker.Next(buf)
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
fmt.Printf("%d %02x\n", chunk.Length, sha256.Sum256(chunk.Data))
}
// Output:
// 2163460 4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d
// 643703 5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407
// 1528956 a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba
// 1955808 c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824
// 2222372 6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56
}

View file

@ -258,13 +258,15 @@ func qp(p uint, g Pol) Pol {
return res.Add(2).Mod(g)
}
func (p Pol) MarshalJSON() ([]byte, error) {
buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16)
// MarshalJSON returns the JSON representation of the Pol.
func (x Pol) MarshalJSON() ([]byte, error) {
buf := strconv.AppendUint([]byte{'"'}, uint64(x), 16)
buf = append(buf, '"')
return buf, nil
}
func (p *Pol) UnmarshalJSON(data []byte) error {
// UnmarshalJSON parses a Pol from the JSON data.
func (x *Pol) UnmarshalJSON(data []byte) error {
if len(data) < 2 {
return errors.New("invalid string for polynomial")
}
@ -272,7 +274,7 @@ func (p *Pol) UnmarshalJSON(data []byte) error {
if err != nil {
return err
}
*p = Pol(n)
*x = Pol(n)
return nil
}

View file

@ -1,16 +1,13 @@
package chunker_test
package chunker
import (
"strconv"
"testing"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
)
var polAddTests = []struct {
x, y chunker.Pol
sum chunker.Pol
x, y Pol
sum Pol
}{
{23, 16, 23 ^ 16},
{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
@ -18,24 +15,29 @@ var polAddTests = []struct {
}
func TestPolAdd(t *testing.T) {
for _, test := range polAddTests {
Equals(t, test.sum, test.x.Add(test.y))
Equals(t, test.sum, test.y.Add(test.x))
for i, test := range polAddTests {
if test.sum != test.x.Add(test.y) {
t.Errorf("test %d failed: sum != x+y", i)
}
if test.sum != test.y.Add(test.x) {
t.Errorf("test %d failed: sum != y+x", i)
}
}
}
func parseBin(s string) chunker.Pol {
func parseBin(s string) Pol {
i, err := strconv.ParseUint(s, 2, 64)
if err != nil {
panic(err)
}
return chunker.Pol(i)
return Pol(i)
}
var polMulTests = []struct {
x, y chunker.Pol
res chunker.Pol
x, y Pol
res Pol
}{
{1, 2, 2},
{
@ -78,13 +80,15 @@ var polMulTests = []struct {
func TestPolMul(t *testing.T) {
for i, test := range polMulTests {
m := test.x.Mul(test.y)
Assert(t, test.res == m,
"TestPolMul failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
if test.res != m {
t.Errorf("TestPolMul failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
m = test.y.Mul(test.x)
Assert(t, test.res == test.y.Mul(test.x),
"TestPolMul failed for %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
if test.res != test.y.Mul(test.x) {
t.Errorf("TestPolMul failed for %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
}
@ -95,21 +99,21 @@ func TestPolMulOverflow(t *testing.T) {
if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
return
} else {
t.Logf("invalid error raised: %v", err)
// re-raise error if not overflow
panic(err)
}
t.Logf("invalid error raised: %v", err)
// re-raise error if not overflow
panic(err)
}()
x := chunker.Pol(1 << 63)
x := Pol(1 << 63)
x.Mul(2)
t.Fatal("overflow test did not panic")
}
var polDivTests = []struct {
x, y chunker.Pol
res chunker.Pol
x, y Pol
res Pol
}{
{10, 50, 0},
{0, 1, 0},
@ -139,15 +143,16 @@ var polDivTests = []struct {
func TestPolDiv(t *testing.T) {
for i, test := range polDivTests {
m := test.x.Div(test.y)
Assert(t, test.res == m,
"TestPolDiv failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
if test.res != m {
t.Errorf("TestPolDiv failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
}
var polModTests = []struct {
x, y chunker.Pol
res chunker.Pol
x, y Pol
res Pol
}{
{10, 50, 10},
{0, 1, 0},
@ -175,14 +180,17 @@ var polModTests = []struct {
}
func TestPolModt(t *testing.T) {
for _, test := range polModTests {
Equals(t, test.res, test.x.Mod(test.y))
for i, test := range polModTests {
res := test.x.Mod(test.y)
if test.res != res {
t.Errorf("test %d failed: want %v, got %v", i, test.res, res)
}
}
}
func BenchmarkPolDivMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
f := Pol(0x2482734cacca49)
g := Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.DivMod(f)
@ -190,8 +198,8 @@ func BenchmarkPolDivMod(t *testing.B) {
}
func BenchmarkPolDiv(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
f := Pol(0x2482734cacca49)
g := Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.Div(f)
@ -199,8 +207,8 @@ func BenchmarkPolDiv(t *testing.B) {
}
func BenchmarkPolMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
f := Pol(0x2482734cacca49)
g := Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.Mod(f)
@ -208,7 +216,7 @@ func BenchmarkPolMod(t *testing.B) {
}
func BenchmarkPolDeg(t *testing.B) {
f := chunker.Pol(0x3af4b284899)
f := Pol(0x3af4b284899)
d := f.Deg()
if d != 41 {
t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
@ -221,25 +229,31 @@ func BenchmarkPolDeg(t *testing.B) {
}
func TestRandomPolynomial(t *testing.T) {
_, err := chunker.RandomPolynomial()
OK(t, err)
_, err := RandomPolynomial()
if err != nil {
t.Fatal(err)
}
}
func BenchmarkRandomPolynomial(t *testing.B) {
for i := 0; i < t.N; i++ {
_, err := chunker.RandomPolynomial()
OK(t, err)
_, err := RandomPolynomial()
if err != nil {
t.Fatal(err)
}
}
}
func TestExpandPolynomial(t *testing.T) {
pol := chunker.Pol(0x3DA3358B4DC173)
pol := Pol(0x3DA3358B4DC173)
s := pol.Expand()
Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s)
if s != "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1" {
t.Fatal("wrong result")
}
}
var polIrredTests = []struct {
f chunker.Pol
f Pol
irred bool
}{
{0x38f1e565e288df, false},
@ -270,15 +284,16 @@ var polIrredTests = []struct {
func TestPolIrreducible(t *testing.T) {
for _, test := range polIrredTests {
Assert(t, test.f.Irreducible() == test.irred,
"Irreducibility test for Polynomial %v failed: got %v, wanted %v",
test.f, test.f.Irreducible(), test.irred)
if test.f.Irreducible() != test.irred {
t.Errorf("Irreducibility test for Polynomial %v failed: got %v, wanted %v",
test.f, test.f.Irreducible(), test.irred)
}
}
}
func BenchmarkPolIrreducible(b *testing.B) {
// find first irreducible polynomial
var pol chunker.Pol
var pol Pol
for _, test := range polIrredTests {
if test.irred {
pol = test.f
@ -287,15 +302,16 @@ func BenchmarkPolIrreducible(b *testing.B) {
}
for i := 0; i < b.N; i++ {
Assert(b, pol.Irreducible(),
"Irreducibility test for Polynomial %v failed", pol)
if !pol.Irreducible() {
b.Errorf("Irreducibility test for Polynomial %v failed", pol)
}
}
}
var polGCDTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
gcd chunker.Pol
f1 Pol
f2 Pol
gcd Pol
}{
{10, 50, 2},
{0, 1, 1},
@ -345,21 +361,24 @@ var polGCDTests = []struct {
func TestPolGCD(t *testing.T) {
for i, test := range polGCDTests {
gcd := test.f1.GCD(test.f2)
Assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
if test.gcd != gcd {
t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
}
gcd = test.f2.GCD(test.f1)
Assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
if test.gcd != gcd {
t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
}
}
}
var polMulModTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
g chunker.Pol
mod chunker.Pol
f1 Pol
f2 Pol
g Pol
mod Pol
}{
{
0x1230,
@ -378,8 +397,9 @@ var polMulModTests = []struct {
func TestPolMulMod(t *testing.T) {
for i, test := range polMulModTests {
mod := test.f1.MulMod(test.f2, test.g)
Assert(t, mod == test.mod,
"MulMod test %d (%+v) failed: got %v, wanted %v",
i, test, mod, test.mod)
if mod != test.mod {
t.Errorf("MulMod test %d (%+v) failed: got %v, wanted %v",
i, test, mod, test.mod)
}
}
}