Optimize chunker
Skip MinSize bytes at the beginning of each chunk. This increases throughput about 30% percent. Before: $ go test -v -bench . ./chunker === RUN TestChunker --- PASS: TestChunker (0.60s) === RUN TestChunkerReuse --- PASS: TestChunkerReuse (2.18s) PASS BenchmarkChunker 10 111229642 ns/op 94.27 MB/s --- BENCH: BenchmarkChunker chunker_test.go:168: 6 chunks, average chunk size: 1747626 bytes chunker_test.go:168: 6 chunks, average chunk size: 1747626 bytes ok github.com/restic/restic/chunker 4.120s After: $ go test -v -bench . ./chunker === RUN TestChunker --- PASS: TestChunker (0.48s) === RUN TestChunkerReuse --- PASS: TestChunkerReuse (1.75s) PASS BenchmarkChunker 20 81468596 ns/op 128.71 MB/s --- BENCH: BenchmarkChunker chunker_test.go:168: 6 chunks, average chunk size: 1747626 bytes chunker_test.go:168: 6 chunks, average chunk size: 1747626 bytes ok github.com/restic/restic/chunker 4.061s
This commit is contained in:
parent
cdf3336e7a
commit
bdcdcdea7d
2 changed files with 29 additions and 3 deletions
|
@ -66,6 +66,8 @@ type Chunker struct {
|
|||
count int
|
||||
pos int
|
||||
|
||||
pre int // wait for this many bytes before start calculating an new chunk
|
||||
|
||||
digest uint64
|
||||
}
|
||||
|
||||
|
@ -96,6 +98,8 @@ func (c *Chunker) reset() {
|
|||
c.pos = 0
|
||||
c.count = 0
|
||||
c.slide(1)
|
||||
// do not start a new chunk unless at least MinSize bytes have been read
|
||||
c.pre = MinSize - WindowSize
|
||||
}
|
||||
|
||||
// Calculate out_table and mod_table for optimization. Must be called only once.
|
||||
|
@ -141,6 +145,7 @@ func (c *Chunker) fill_tables() {
|
|||
// calls yield a nil chunk and an io.EOF error.
|
||||
func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
||||
dst = dst[:0]
|
||||
|
||||
for {
|
||||
if c.bpos >= c.bmax {
|
||||
n, err := io.ReadFull(c.rd, c.buf)
|
||||
|
@ -176,6 +181,26 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
c.bmax = n
|
||||
}
|
||||
|
||||
// check if bytes have to be dismissed before starting a new chunk
|
||||
if c.pre > 0 {
|
||||
n := c.bmax - c.bpos
|
||||
if c.pre > n {
|
||||
c.pre -= n
|
||||
dst = append(dst, c.buf[c.bpos:c.bmax]...)
|
||||
|
||||
c.count += n
|
||||
c.pos += n
|
||||
c.bpos = c.bmax
|
||||
continue
|
||||
}
|
||||
|
||||
dst = append(dst, c.buf[c.bpos:c.bpos+c.pre]...)
|
||||
c.bpos += c.pre
|
||||
c.count += c.pre
|
||||
c.pos += c.pre
|
||||
c.pre = 0
|
||||
}
|
||||
|
||||
for i, b := range c.buf[c.bpos:c.bmax] {
|
||||
// inline c.slide(b) and append(b) to increase performance
|
||||
out := c.window[c.wpos]
|
||||
|
@ -208,6 +233,7 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
|
|||
c.reset()
|
||||
c.pos = pos
|
||||
c.start = pos
|
||||
c.pre = MinSize - WindowSize
|
||||
|
||||
return chunk, nil
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ var chunks1 = []chunk{
|
|||
chunk{800374, 0x000968473f900000},
|
||||
chunk{2453512, 0x001e197c92600000},
|
||||
chunk{2651975, 0x000ae6c868000000},
|
||||
chunk{237392, 0x00184c5825e18636},
|
||||
chunk{237392, 0x0000000000000001},
|
||||
}
|
||||
|
||||
// test if nullbytes are correctly split, even if length is a multiple of MinSize.
|
||||
|
@ -79,8 +79,8 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
|
|||
}
|
||||
|
||||
if c.Cut != chunk.CutFP {
|
||||
t.Fatalf("Cut fingerprint for chunk %d does not match: expected %016x, got %016x",
|
||||
i, chunk.CutFP, c.Cut)
|
||||
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
|
||||
i, len(chunks)-1, chunk.CutFP, c.Cut)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue