diff --git a/archiver.go b/archiver.go index f77efb4ad..cc5d079b1 100644 --- a/archiver.go +++ b/archiver.go @@ -1,6 +1,7 @@ package restic import ( + "crypto/sha256" "encoding/json" "fmt" "io" @@ -183,10 +184,8 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error { return err } - chnker := GetChunker("archiver.SaveFile") - chnker.Reset(file, arch.s.Config.ChunkerPolynomial) + chnker := chunker.New(file, arch.s.Config.ChunkerPolynomial, sha256.New()) resultChannels := [](<-chan saveResult){} - defer FreeChunker("archiver.SaveFile", chnker) for { chunk, err := chnker.Next() diff --git a/archiver_test.go b/archiver_test.go index df9b1fcfc..519f34839 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -2,6 +2,7 @@ package restic_test import ( "bytes" + "crypto/sha256" "flag" "io" "testing" @@ -25,9 +26,8 @@ type Rdr interface { } func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { - ch := restic.GetChunker("BenchmarkChunkEncrypt") rd.Seek(0, 0) - ch.Reset(rd, testPol) + ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() @@ -47,8 +47,6 @@ func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.K _, err = crypto.Encrypt(key, buf2, buf) OK(b, err) } - - restic.FreeChunker("BenchmarkChunkEncrypt", ch) } func BenchmarkChunkEncrypt(b *testing.B) { @@ -73,9 +71,7 @@ func BenchmarkChunkEncrypt(b *testing.B) { } func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { - ch := restic.GetChunker("BenchmarkChunkEncryptP") - rd.Seek(0, 0) - ch.Reset(rd, testPol) + ch := chunker.New(rd, testPol, sha256.New()) for { chunk, err := ch.Next() @@ -88,8 +84,6 @@ func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) io.ReadFull(chunk.Reader(rd), buf) crypto.Encrypt(key, buf, buf) } - - restic.FreeChunker("BenchmarkChunkEncryptP", ch) } func BenchmarkChunkEncryptParallel(b *testing.B) { diff --git a/chunker/chunker.go b/chunker/chunker.go index 9e016e4dd..944321f75 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -17,6 +17,9 @@ const ( // aim to create chunks of 20 bits or about 1MiB on average. averageBits = 20 + // default buffer size + bufSize = 512 * KiB + // MinSize is the minimal size of a chunk. MinSize = 512 * KiB // MaxSize is the maximal size of a chunk. @@ -81,36 +84,33 @@ type Chunker struct { // New returns a new Chunker based on polynomial p that reads from data from rd // with bufsize and pass all data to hash along the way. -func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) *Chunker { +func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { c := &Chunker{ - buf: make([]byte, bufsize), - h: hash, + buf: make([]byte, bufSize), + h: h, + pol: pol, + rd: rd, } - c.Reset(rd, p) + + c.reset() + return c } -// Reset restarts a chunker so that it can be reused with a different -// polynomial and reader. -func (c *Chunker) Reset(rd io.Reader, p Pol) { - c.pol = p - c.polShift = uint(p.Deg() - 8) +func (c *Chunker) reset() { + c.polShift = uint(c.pol.Deg() - 8) c.fillTables() - c.rd = rd for i := 0; i < windowSize; i++ { c.window[i] = 0 } + c.closed = false c.digest = 0 c.wpos = 0 - c.pos = 0 - c.start = 0 c.count = 0 - - if p != 0 { - c.slide(1) - } + c.slide(1) + c.start = c.pos if c.h != nil { c.h.Reset() @@ -276,16 +276,7 @@ func (c *Chunker) Next() (*Chunk, error) { Digest: c.hashDigest(), } - if c.h != nil { - c.h.Reset() - } - - // reset chunker, but keep position - pos := c.pos - c.Reset(c.rd, c.pol) - c.pos = pos - c.start = pos - c.pre = MinSize - windowSize + c.reset() return chunk, nil } diff --git a/chunker/chunker_test.go b/chunker/chunker_test.go index 9df5b49ae..fbd692d7d 100644 --- a/chunker/chunker_test.go +++ b/chunker/chunker_test.go @@ -19,7 +19,6 @@ import ( ) var benchmarkFile = flag.String("bench.file", "", "read from this file for benchmark") -var testBufSize = flag.Int("test.bufsize", 256*1024, "use this buffer size for benchmark") func parseDigest(s string) []byte { d, err := hex.DecodeString(s) @@ -151,7 +150,7 @@ func getRandom(seed, count int) []byte { func TestChunker(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, *testBufSize, sha256.New()) + ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) chunks := testWithData(t, ch, chunks1) // test reader @@ -178,7 +177,7 @@ func TestChunker(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, *testBufSize, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) } @@ -194,7 +193,7 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { t.Logf("generating random polynomial took %v", time.Since(start)) start = time.Now() - ch := chunker.New(bytes.NewReader(buf), p, *testBufSize, sha256.New()) + ch := chunker.New(bytes.NewReader(buf), p, sha256.New()) t.Logf("creating chunker took %v", time.Since(start)) // make sure that first chunk is different @@ -211,7 +210,7 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { func TestChunkerWithoutHash(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, *testBufSize, nil) + ch := chunker.New(bytes.NewReader(buf), testPol, nil) chunks := testWithData(t, ch, chunks1) // test reader @@ -241,30 +240,17 @@ func TestChunkerWithoutHash(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, *testBufSize, sha256.New()) + ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) testWithData(t, ch, chunks2) } -func TestChunkerReuse(t *testing.T) { - // test multiple uses of the same chunker - ch := chunker.New(nil, testPol, *testBufSize, sha256.New()) - buf := getRandom(23, 32*1024*1024) - - for i := 0; i < 4; i++ { - ch.Reset(bytes.NewReader(buf), testPol) - testWithData(t, ch, chunks1) - } -} - func benchmarkChunker(b *testing.B, hash hash.Hash) { var ( rd io.ReadSeeker size int ) - b.Logf("using bufsize %v", *testBufSize) - if *benchmarkFile != "" { b.Logf("using file %q for benchmark", *benchmarkFile) f, err := os.Open(*benchmarkFile) @@ -284,8 +270,6 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { rd = bytes.NewReader(getRandom(23, size)) } - ch := chunker.New(rd, testPol, *testBufSize, hash) - b.ResetTimer() b.SetBytes(int64(size)) @@ -294,7 +278,7 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { chunks = 0 rd.Seek(0, 0) - ch.Reset(rd, testPol) + ch := chunker.New(rd, testPol, hash) for { _, err := ch.Next() @@ -333,6 +317,6 @@ func BenchmarkNewChunker(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - chunker.New(bytes.NewBuffer(nil), p, *testBufSize, nil) + chunker.New(bytes.NewBuffer(nil), p, nil) } } diff --git a/pools.go b/pools.go index 02a56d9eb..151752062 100644 --- a/pools.go +++ b/pools.go @@ -1,7 +1,6 @@ package restic import ( - "crypto/sha256" "sync" "github.com/restic/restic/chunker" @@ -23,7 +22,6 @@ type poolStats struct { const ( maxCiphertextSize = crypto.Extension + chunker.MaxSize - chunkerBufSize = 512 * chunker.KiB ) func (s *poolStats) Get(k string) { @@ -72,12 +70,10 @@ func newPoolStats() *poolStats { } var ( - chunkPool = sync.Pool{New: newChunkBuf} - chunkerPool = sync.Pool{New: newChunker} + chunkPool = sync.Pool{New: newChunkBuf} - chunkStats = newPoolStats() - nodeStats = newPoolStats() - chunkerStats = newPoolStats() + chunkStats = newPoolStats() + nodeStats = newPoolStats() ) func newChunkBuf() interface{} { @@ -89,15 +85,6 @@ func newChunkBuf() interface{} { return make([]byte, maxCiphertextSize) } -func newChunker() interface{} { - chunkStats.m.Lock() - defer chunkStats.m.Unlock() - chunkStats.new++ - - // create a new chunker with a nil reader and null polynomial - return chunker.New(nil, 0, chunkerBufSize, sha256.New()) -} - func GetChunkBuf(s string) []byte { chunkStats.Get(s) return chunkPool.Get().([]byte) @@ -108,16 +95,6 @@ func FreeChunkBuf(s string, buf []byte) { chunkPool.Put(buf) } -func GetChunker(s string) *chunker.Chunker { - chunkerStats.Get(s) - return chunkerPool.Get().(*chunker.Chunker) -} - -func FreeChunker(s string, ch *chunker.Chunker) { - chunkerStats.Put(s) - chunkerPool.Put(ch) -} - func PoolAlloc() { debug.Log("pools.PoolAlloc", "pool stats for chunk: new %d, get %d, put %d, diff %d, max %d\n", chunkStats.new, chunkStats.get, chunkStats.put, chunkStats.get-chunkStats.put, chunkStats.max) @@ -131,10 +108,4 @@ func PoolAlloc() { for k, v := range nodeStats.mget { debug.Log("pools.PoolAlloc", "pool stats for node[%s]: get %d, put %d, diff %d, max %d\n", k, v, nodeStats.mput[k], v-nodeStats.mput[k], nodeStats.mmax[k]) } - - debug.Log("pools.PoolAlloc", "pool stats for chunker: new %d, get %d, put %d, diff %d, max %d\n", - chunkerStats.new, chunkerStats.get, chunkerStats.put, chunkerStats.get-chunkerStats.put, chunkerStats.max) - for k, v := range chunkerStats.mget { - debug.Log("pools.PoolAlloc", "pool stats for chunker[%s]: get %d, put %d, diff %d, max %d\n", k, v, chunkerStats.mput[k], v-chunkerStats.mput[k], chunkerStats.mmax[k]) - } }