forked from TrueCloudLab/restic
Chunker: remove pool, buf and make bufsize an option
This commit is contained in:
parent
8dc5c2296a
commit
a5c33d80d8
2 changed files with 54 additions and 56 deletions
|
@ -9,10 +9,11 @@ const (
|
|||
KiB = 1024
|
||||
MiB = 1024 * KiB
|
||||
|
||||
// randomly generated irreducible polynomial of degree 53 in Z_2[X]
|
||||
// Polynomial is a randomly generated irreducible polynomial of degree 53
|
||||
// in Z_2[X]. All rabin fingerprints are calculated with this polynomial.
|
||||
Polynomial = 0x3DA3358B4DC173
|
||||
|
||||
// use a sliding window of 64 byte.
|
||||
// WindowSize is the size of the sliding window.
|
||||
WindowSize = 64
|
||||
|
||||
// aim to create chunks of 20 bits or about 1MiB on average.
|
||||
|
@ -30,15 +31,6 @@ var (
|
|||
once sync.Once
|
||||
mod_table [256]uint64
|
||||
out_table [256]uint64
|
||||
|
||||
chunkerPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &Chunker{
|
||||
window: make([]byte, WindowSize),
|
||||
buf: make([]byte, MaxSize),
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// A chunk is one content-dependent chunk of bytes whose end was cut when the
|
||||
|
@ -72,22 +64,19 @@ type Chunker struct {
|
|||
}
|
||||
|
||||
// New returns a new Chunker that reads from data from rd.
|
||||
func New(rd io.Reader) *Chunker {
|
||||
c := chunkerPool.Get().(*Chunker)
|
||||
c.rd = rd
|
||||
func New(rd io.Reader, bufsize int) *Chunker {
|
||||
once.Do(fill_tables)
|
||||
|
||||
once.Do(c.fill_tables)
|
||||
c := &Chunker{
|
||||
window: make([]byte, WindowSize),
|
||||
buf: make([]byte, bufsize),
|
||||
rd: rd,
|
||||
}
|
||||
c.reset()
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Free returns this chunker to the allocation pool
|
||||
func (c *Chunker) Free() {
|
||||
c.rd = nil
|
||||
chunkerPool.Put(c)
|
||||
}
|
||||
|
||||
func (c *Chunker) reset() {
|
||||
for i := 0; i < WindowSize; i++ {
|
||||
c.window[i] = 0
|
||||
|
@ -103,7 +92,7 @@ func (c *Chunker) reset() {
|
|||
}
|
||||
|
||||
// Calculate out_table and mod_table for optimization. Must be called only once.
|
||||
func (c *Chunker) fill_tables() {
|
||||
func fill_tables() {
|
||||
// calculate table for sliding out bytes. The byte to slide out is used as
|
||||
// the index for the table, the value contains the following:
|
||||
// out_table[b] = Hash(b || 0 || ... || 0)
|
||||
|
@ -139,13 +128,11 @@ func (c *Chunker) fill_tables() {
|
|||
}
|
||||
}
|
||||
|
||||
// Next returns the next chunk of data. If an error occurs while reading,
|
||||
// the error is returned with a nil chunk. The state of the current chunk
|
||||
// is undefined. When the last chunk has been returned, all subsequent
|
||||
// calls yield a nil chunk and an io.EOF error.
|
||||
func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||
dst = dst[:0]
|
||||
|
||||
// Next returns the position and length of the next chunk of data. If an error
|
||||
// occurs while reading, the error is returned with a nil chunk. The state of
|
||||
// the current chunk is undefined. When the last chunk has been returned, all
|
||||
// subsequent calls yield a nil chunk and an io.EOF error.
|
||||
func (c *Chunker) Next() (*Chunk, error) {
|
||||
for {
|
||||
if c.bpos >= c.bmax {
|
||||
n, err := io.ReadFull(c.rd, c.buf)
|
||||
|
@ -168,7 +155,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
Start: c.start,
|
||||
Length: c.count,
|
||||
Cut: c.digest,
|
||||
Data: dst,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
@ -186,7 +172,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
n := c.bmax - c.bpos
|
||||
if c.pre > n {
|
||||
c.pre -= n
|
||||
dst = append(dst, c.buf[c.bpos:c.bmax]...)
|
||||
|
||||
c.count += n
|
||||
c.pos += n
|
||||
|
@ -194,7 +179,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
continue
|
||||
}
|
||||
|
||||
dst = append(dst, c.buf[c.bpos:c.bpos+c.pre]...)
|
||||
c.bpos += c.pre
|
||||
c.count += c.pre
|
||||
c.pos += c.pre
|
||||
|
@ -216,7 +200,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
c.digest ^= mod_table[index]
|
||||
|
||||
if (c.count+i+1 >= MinSize && (c.digest&splitmask) == 0) || c.count+i+1 >= MaxSize {
|
||||
dst = append(dst, c.buf[c.bpos:c.bpos+i+1]...)
|
||||
c.count += i + 1
|
||||
c.pos += i + 1
|
||||
c.bpos += i + 1
|
||||
|
@ -225,7 +208,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
Start: c.start,
|
||||
Length: c.count,
|
||||
Cut: c.digest,
|
||||
Data: dst,
|
||||
}
|
||||
|
||||
// keep position
|
||||
|
@ -240,9 +222,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
}
|
||||
|
||||
steps := c.bmax - c.bpos
|
||||
if steps > 0 {
|
||||
dst = append(dst, c.buf[c.bpos:c.bpos+steps]...)
|
||||
}
|
||||
c.count += steps
|
||||
c.pos += steps
|
||||
c.bpos = c.bmax
|
||||
|
|
|
@ -2,13 +2,18 @@ package chunker_test
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"io"
|
||||
"math/rand"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/restic/chunker"
|
||||
)
|
||||
|
||||
var benchmarkFile = flag.String("bench.file", "", "read from this file for benchmark")
|
||||
var testBufSize = flag.Int("test.bufsize", 256*1024, "use this buffer size for benchmark")
|
||||
|
||||
type chunk struct {
|
||||
Length int
|
||||
CutFP uint64
|
||||
|
@ -55,9 +60,8 @@ var chunks2 = []chunk{
|
|||
}
|
||||
|
||||
func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
||||
buf := make([]byte, chunker.MaxSize)
|
||||
for i, chunk := range chunks {
|
||||
c, err := chnker.Next(buf)
|
||||
c, err := chnker.Next()
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Error returned with chunk %d: %v", i, err)
|
||||
|
@ -73,11 +77,6 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
|||
i, chunk.Length, c.Length)
|
||||
}
|
||||
|
||||
if len(c.Data) != chunk.Length {
|
||||
t.Fatalf("Data length for chunk %d does not match: expected %d, got %d",
|
||||
i, chunk.Length, len(c.Data))
|
||||
}
|
||||
|
||||
if c.Cut != chunk.CutFP {
|
||||
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
|
||||
i, len(chunks)-1, chunk.CutFP, c.Cut)
|
||||
|
@ -85,7 +84,7 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
|||
}
|
||||
}
|
||||
|
||||
c, err := chnker.Next(buf)
|
||||
c, err := chnker.Next()
|
||||
|
||||
if c != nil {
|
||||
t.Fatal("additional non-nil chunk returned")
|
||||
|
@ -114,32 +113,51 @@ func get_random(seed, count int) []byte {
|
|||
func TestChunker(t *testing.T) {
|
||||
// setup data source
|
||||
buf := get_random(23, 32*1024*1024)
|
||||
ch := chunker.New(bytes.NewReader(buf))
|
||||
ch := chunker.New(bytes.NewReader(buf), *testBufSize)
|
||||
test_with_data(t, ch, chunks1)
|
||||
ch.Free()
|
||||
|
||||
// setup nullbyte data source
|
||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
|
||||
ch = chunker.New(bytes.NewReader(buf))
|
||||
ch = chunker.New(bytes.NewReader(buf), *testBufSize)
|
||||
|
||||
test_with_data(t, ch, chunks2)
|
||||
ch.Free()
|
||||
}
|
||||
|
||||
func TestChunkerReuse(t *testing.T) {
|
||||
// test multiple uses of the same chunker
|
||||
for i := 0; i < 4; i++ {
|
||||
buf := get_random(23, 32*1024*1024)
|
||||
ch := chunker.New(bytes.NewReader(buf))
|
||||
ch := chunker.New(bytes.NewReader(buf), *testBufSize)
|
||||
test_with_data(t, ch, chunks1)
|
||||
ch.Free()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkChunker(b *testing.B) {
|
||||
size := 10 * 1024 * 1024
|
||||
buf := get_random(23, size)
|
||||
dst := make([]byte, chunker.MaxSize)
|
||||
var (
|
||||
rd io.ReadSeeker
|
||||
size int
|
||||
)
|
||||
|
||||
b.Logf("using bufsize %v", *testBufSize)
|
||||
|
||||
if *benchmarkFile != "" {
|
||||
b.Logf("using file %q for benchmark", *benchmarkFile)
|
||||
f, err := os.Open(*benchmarkFile)
|
||||
if err != nil {
|
||||
b.Fatalf("open(%q): %v", *benchmarkFile, err)
|
||||
}
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
b.Fatalf("lstat(%q): %v", *benchmarkFile, err)
|
||||
}
|
||||
|
||||
size = int(fi.Size())
|
||||
rd = f
|
||||
} else {
|
||||
size = 10 * 1024 * 1024
|
||||
rd = bytes.NewReader(get_random(23, size))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.SetBytes(int64(size))
|
||||
|
@ -148,10 +166,11 @@ func BenchmarkChunker(b *testing.B) {
|
|||
for i := 0; i < b.N; i++ {
|
||||
chunks = 0
|
||||
|
||||
ch := chunker.New(bytes.NewReader(buf))
|
||||
rd.Seek(0, 0)
|
||||
ch := chunker.New(rd, *testBufSize)
|
||||
|
||||
for {
|
||||
_, err := ch.Next(dst)
|
||||
_, err := ch.Next()
|
||||
|
||||
if err == io.EOF {
|
||||
break
|
||||
|
|
Loading…
Reference in a new issue