forked from TrueCloudLab/restic
Chunker: remove pool, buf and make bufsize an option
This commit is contained in:
parent
8dc5c2296a
commit
a5c33d80d8
2 changed files with 54 additions and 56 deletions
|
@ -9,10 +9,11 @@ const (
|
||||||
KiB = 1024
|
KiB = 1024
|
||||||
MiB = 1024 * KiB
|
MiB = 1024 * KiB
|
||||||
|
|
||||||
// randomly generated irreducible polynomial of degree 53 in Z_2[X]
|
// Polynomial is a randomly generated irreducible polynomial of degree 53
|
||||||
|
// in Z_2[X]. All rabin fingerprints are calculated with this polynomial.
|
||||||
Polynomial = 0x3DA3358B4DC173
|
Polynomial = 0x3DA3358B4DC173
|
||||||
|
|
||||||
// use a sliding window of 64 byte.
|
// WindowSize is the size of the sliding window.
|
||||||
WindowSize = 64
|
WindowSize = 64
|
||||||
|
|
||||||
// aim to create chunks of 20 bits or about 1MiB on average.
|
// aim to create chunks of 20 bits or about 1MiB on average.
|
||||||
|
@ -30,15 +31,6 @@ var (
|
||||||
once sync.Once
|
once sync.Once
|
||||||
mod_table [256]uint64
|
mod_table [256]uint64
|
||||||
out_table [256]uint64
|
out_table [256]uint64
|
||||||
|
|
||||||
chunkerPool = sync.Pool{
|
|
||||||
New: func() interface{} {
|
|
||||||
return &Chunker{
|
|
||||||
window: make([]byte, WindowSize),
|
|
||||||
buf: make([]byte, MaxSize),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// A chunk is one content-dependent chunk of bytes whose end was cut when the
|
// A chunk is one content-dependent chunk of bytes whose end was cut when the
|
||||||
|
@ -72,22 +64,19 @@ type Chunker struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new Chunker that reads from data from rd.
|
// New returns a new Chunker that reads from data from rd.
|
||||||
func New(rd io.Reader) *Chunker {
|
func New(rd io.Reader, bufsize int) *Chunker {
|
||||||
c := chunkerPool.Get().(*Chunker)
|
once.Do(fill_tables)
|
||||||
c.rd = rd
|
|
||||||
|
|
||||||
once.Do(c.fill_tables)
|
c := &Chunker{
|
||||||
|
window: make([]byte, WindowSize),
|
||||||
|
buf: make([]byte, bufsize),
|
||||||
|
rd: rd,
|
||||||
|
}
|
||||||
c.reset()
|
c.reset()
|
||||||
|
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
// Free returns this chunker to the allocation pool
|
|
||||||
func (c *Chunker) Free() {
|
|
||||||
c.rd = nil
|
|
||||||
chunkerPool.Put(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Chunker) reset() {
|
func (c *Chunker) reset() {
|
||||||
for i := 0; i < WindowSize; i++ {
|
for i := 0; i < WindowSize; i++ {
|
||||||
c.window[i] = 0
|
c.window[i] = 0
|
||||||
|
@ -103,7 +92,7 @@ func (c *Chunker) reset() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate out_table and mod_table for optimization. Must be called only once.
|
// Calculate out_table and mod_table for optimization. Must be called only once.
|
||||||
func (c *Chunker) fill_tables() {
|
func fill_tables() {
|
||||||
// calculate table for sliding out bytes. The byte to slide out is used as
|
// calculate table for sliding out bytes. The byte to slide out is used as
|
||||||
// the index for the table, the value contains the following:
|
// the index for the table, the value contains the following:
|
||||||
// out_table[b] = Hash(b || 0 || ... || 0)
|
// out_table[b] = Hash(b || 0 || ... || 0)
|
||||||
|
@ -139,13 +128,11 @@ func (c *Chunker) fill_tables() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Next returns the next chunk of data. If an error occurs while reading,
|
// Next returns the position and length of the next chunk of data. If an error
|
||||||
// the error is returned with a nil chunk. The state of the current chunk
|
// occurs while reading, the error is returned with a nil chunk. The state of
|
||||||
// is undefined. When the last chunk has been returned, all subsequent
|
// the current chunk is undefined. When the last chunk has been returned, all
|
||||||
// calls yield a nil chunk and an io.EOF error.
|
// subsequent calls yield a nil chunk and an io.EOF error.
|
||||||
func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
func (c *Chunker) Next() (*Chunk, error) {
|
||||||
dst = dst[:0]
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
if c.bpos >= c.bmax {
|
if c.bpos >= c.bmax {
|
||||||
n, err := io.ReadFull(c.rd, c.buf)
|
n, err := io.ReadFull(c.rd, c.buf)
|
||||||
|
@ -168,7 +155,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||||
Start: c.start,
|
Start: c.start,
|
||||||
Length: c.count,
|
Length: c.count,
|
||||||
Cut: c.digest,
|
Cut: c.digest,
|
||||||
Data: dst,
|
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,7 +172,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||||
n := c.bmax - c.bpos
|
n := c.bmax - c.bpos
|
||||||
if c.pre > n {
|
if c.pre > n {
|
||||||
c.pre -= n
|
c.pre -= n
|
||||||
dst = append(dst, c.buf[c.bpos:c.bmax]...)
|
|
||||||
|
|
||||||
c.count += n
|
c.count += n
|
||||||
c.pos += n
|
c.pos += n
|
||||||
|
@ -194,7 +179,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
dst = append(dst, c.buf[c.bpos:c.bpos+c.pre]...)
|
|
||||||
c.bpos += c.pre
|
c.bpos += c.pre
|
||||||
c.count += c.pre
|
c.count += c.pre
|
||||||
c.pos += c.pre
|
c.pos += c.pre
|
||||||
|
@ -216,7 +200,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||||
c.digest ^= mod_table[index]
|
c.digest ^= mod_table[index]
|
||||||
|
|
||||||
if (c.count+i+1 >= MinSize && (c.digest&splitmask) == 0) || c.count+i+1 >= MaxSize {
|
if (c.count+i+1 >= MinSize && (c.digest&splitmask) == 0) || c.count+i+1 >= MaxSize {
|
||||||
dst = append(dst, c.buf[c.bpos:c.bpos+i+1]...)
|
|
||||||
c.count += i + 1
|
c.count += i + 1
|
||||||
c.pos += i + 1
|
c.pos += i + 1
|
||||||
c.bpos += i + 1
|
c.bpos += i + 1
|
||||||
|
@ -225,7 +208,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||||
Start: c.start,
|
Start: c.start,
|
||||||
Length: c.count,
|
Length: c.count,
|
||||||
Cut: c.digest,
|
Cut: c.digest,
|
||||||
Data: dst,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// keep position
|
// keep position
|
||||||
|
@ -240,9 +222,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
steps := c.bmax - c.bpos
|
steps := c.bmax - c.bpos
|
||||||
if steps > 0 {
|
|
||||||
dst = append(dst, c.buf[c.bpos:c.bpos+steps]...)
|
|
||||||
}
|
|
||||||
c.count += steps
|
c.count += steps
|
||||||
c.pos += steps
|
c.pos += steps
|
||||||
c.bpos = c.bmax
|
c.bpos = c.bmax
|
||||||
|
|
|
@ -2,13 +2,18 @@ package chunker_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"flag"
|
||||||
"io"
|
"io"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/restic/restic/chunker"
|
"github.com/restic/restic/chunker"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var benchmarkFile = flag.String("bench.file", "", "read from this file for benchmark")
|
||||||
|
var testBufSize = flag.Int("test.bufsize", 256*1024, "use this buffer size for benchmark")
|
||||||
|
|
||||||
type chunk struct {
|
type chunk struct {
|
||||||
Length int
|
Length int
|
||||||
CutFP uint64
|
CutFP uint64
|
||||||
|
@ -55,9 +60,8 @@ var chunks2 = []chunk{
|
||||||
}
|
}
|
||||||
|
|
||||||
func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
||||||
buf := make([]byte, chunker.MaxSize)
|
|
||||||
for i, chunk := range chunks {
|
for i, chunk := range chunks {
|
||||||
c, err := chnker.Next(buf)
|
c, err := chnker.Next()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Error returned with chunk %d: %v", i, err)
|
t.Fatalf("Error returned with chunk %d: %v", i, err)
|
||||||
|
@ -73,11 +77,6 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
||||||
i, chunk.Length, c.Length)
|
i, chunk.Length, c.Length)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(c.Data) != chunk.Length {
|
|
||||||
t.Fatalf("Data length for chunk %d does not match: expected %d, got %d",
|
|
||||||
i, chunk.Length, len(c.Data))
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.Cut != chunk.CutFP {
|
if c.Cut != chunk.CutFP {
|
||||||
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
|
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
|
||||||
i, len(chunks)-1, chunk.CutFP, c.Cut)
|
i, len(chunks)-1, chunk.CutFP, c.Cut)
|
||||||
|
@ -85,7 +84,7 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
c, err := chnker.Next(buf)
|
c, err := chnker.Next()
|
||||||
|
|
||||||
if c != nil {
|
if c != nil {
|
||||||
t.Fatal("additional non-nil chunk returned")
|
t.Fatal("additional non-nil chunk returned")
|
||||||
|
@ -114,32 +113,51 @@ func get_random(seed, count int) []byte {
|
||||||
func TestChunker(t *testing.T) {
|
func TestChunker(t *testing.T) {
|
||||||
// setup data source
|
// setup data source
|
||||||
buf := get_random(23, 32*1024*1024)
|
buf := get_random(23, 32*1024*1024)
|
||||||
ch := chunker.New(bytes.NewReader(buf))
|
ch := chunker.New(bytes.NewReader(buf), *testBufSize)
|
||||||
test_with_data(t, ch, chunks1)
|
test_with_data(t, ch, chunks1)
|
||||||
ch.Free()
|
|
||||||
|
|
||||||
// setup nullbyte data source
|
// setup nullbyte data source
|
||||||
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
|
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
|
||||||
ch = chunker.New(bytes.NewReader(buf))
|
ch = chunker.New(bytes.NewReader(buf), *testBufSize)
|
||||||
|
|
||||||
test_with_data(t, ch, chunks2)
|
test_with_data(t, ch, chunks2)
|
||||||
ch.Free()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestChunkerReuse(t *testing.T) {
|
func TestChunkerReuse(t *testing.T) {
|
||||||
// test multiple uses of the same chunker
|
// test multiple uses of the same chunker
|
||||||
for i := 0; i < 4; i++ {
|
for i := 0; i < 4; i++ {
|
||||||
buf := get_random(23, 32*1024*1024)
|
buf := get_random(23, 32*1024*1024)
|
||||||
ch := chunker.New(bytes.NewReader(buf))
|
ch := chunker.New(bytes.NewReader(buf), *testBufSize)
|
||||||
test_with_data(t, ch, chunks1)
|
test_with_data(t, ch, chunks1)
|
||||||
ch.Free()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkChunker(b *testing.B) {
|
func BenchmarkChunker(b *testing.B) {
|
||||||
size := 10 * 1024 * 1024
|
var (
|
||||||
buf := get_random(23, size)
|
rd io.ReadSeeker
|
||||||
dst := make([]byte, chunker.MaxSize)
|
size int
|
||||||
|
)
|
||||||
|
|
||||||
|
b.Logf("using bufsize %v", *testBufSize)
|
||||||
|
|
||||||
|
if *benchmarkFile != "" {
|
||||||
|
b.Logf("using file %q for benchmark", *benchmarkFile)
|
||||||
|
f, err := os.Open(*benchmarkFile)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("open(%q): %v", *benchmarkFile, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fi, err := f.Stat()
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("lstat(%q): %v", *benchmarkFile, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
size = int(fi.Size())
|
||||||
|
rd = f
|
||||||
|
} else {
|
||||||
|
size = 10 * 1024 * 1024
|
||||||
|
rd = bytes.NewReader(get_random(23, size))
|
||||||
|
}
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
b.SetBytes(int64(size))
|
b.SetBytes(int64(size))
|
||||||
|
@ -148,10 +166,11 @@ func BenchmarkChunker(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
chunks = 0
|
chunks = 0
|
||||||
|
|
||||||
ch := chunker.New(bytes.NewReader(buf))
|
rd.Seek(0, 0)
|
||||||
|
ch := chunker.New(rd, *testBufSize)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
_, err := ch.Next(dst)
|
_, err := ch.Next()
|
||||||
|
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
break
|
break
|
||||||
|
|
Loading…
Add table
Reference in a new issue