forked from TrueCloudLab/restic
Update chunker
This commit is contained in:
parent
4dac6d45fd
commit
9e24238cdd
2 changed files with 88 additions and 57 deletions
2
vendor/manifest
vendored
2
vendor/manifest
vendored
|
@ -40,7 +40,7 @@
|
||||||
{
|
{
|
||||||
"importpath": "github.com/restic/chunker",
|
"importpath": "github.com/restic/chunker",
|
||||||
"repository": "https://github.com/restic/chunker",
|
"repository": "https://github.com/restic/chunker",
|
||||||
"revision": "16c849a106e0a50d658e8f5e49a01f6728f4f92c",
|
"revision": "d1a97fa6e55ab338a8ceb769d72f856a56e9379a",
|
||||||
"branch": "master"
|
"branch": "master"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
143
vendor/src/github.com/restic/chunker/chunker.go
vendored
143
vendor/src/github.com/restic/chunker/chunker.go
vendored
|
@ -33,12 +33,12 @@ type tables struct {
|
||||||
|
|
||||||
// cache precomputed tables, these are read-only anyway
|
// cache precomputed tables, these are read-only anyway
|
||||||
var cache struct {
|
var cache struct {
|
||||||
entries map[Pol]*tables
|
entries map[Pol]tables
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
cache.entries = make(map[Pol]*tables)
|
cache.entries = make(map[Pol]tables)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Chunk is one content-dependent chunk of bytes whose end was cut when the
|
// Chunk is one content-dependent chunk of bytes whose end was cut when the
|
||||||
|
@ -50,17 +50,7 @@ type Chunk struct {
|
||||||
Data []byte
|
Data []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
// Chunker splits content with Rabin Fingerprints.
|
type chunkerState struct {
|
||||||
type Chunker struct {
|
|
||||||
MinSize, MaxSize uint
|
|
||||||
|
|
||||||
pol Pol
|
|
||||||
polShift uint
|
|
||||||
tables *tables
|
|
||||||
|
|
||||||
rd io.Reader
|
|
||||||
closed bool
|
|
||||||
|
|
||||||
window [windowSize]byte
|
window [windowSize]byte
|
||||||
wpos int
|
wpos int
|
||||||
|
|
||||||
|
@ -77,15 +67,37 @@ type Chunker struct {
|
||||||
digest uint64
|
digest uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type chunkerConfig struct {
|
||||||
|
MinSize, MaxSize uint
|
||||||
|
|
||||||
|
pol Pol
|
||||||
|
polShift uint
|
||||||
|
tables tables
|
||||||
|
tablesInitialized bool
|
||||||
|
|
||||||
|
rd io.Reader
|
||||||
|
closed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunker splits content with Rabin Fingerprints.
|
||||||
|
type Chunker struct {
|
||||||
|
chunkerConfig
|
||||||
|
chunkerState
|
||||||
|
}
|
||||||
|
|
||||||
// New returns a new Chunker based on polynomial p that reads from rd
|
// New returns a new Chunker based on polynomial p that reads from rd
|
||||||
// with bufsize and pass all data to hash along the way.
|
// with bufsize and pass all data to hash along the way.
|
||||||
func New(rd io.Reader, pol Pol) *Chunker {
|
func New(rd io.Reader, pol Pol) *Chunker {
|
||||||
c := &Chunker{
|
c := &Chunker{
|
||||||
buf: make([]byte, chunkerBufSize),
|
chunkerState: chunkerState{
|
||||||
pol: pol,
|
buf: make([]byte, chunkerBufSize),
|
||||||
rd: rd,
|
},
|
||||||
MinSize: MinSize,
|
chunkerConfig: chunkerConfig{
|
||||||
MaxSize: MaxSize,
|
pol: pol,
|
||||||
|
rd: rd,
|
||||||
|
MinSize: MinSize,
|
||||||
|
MaxSize: MaxSize,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
c.reset()
|
c.reset()
|
||||||
|
@ -96,11 +108,15 @@ func New(rd io.Reader, pol Pol) *Chunker {
|
||||||
// Reset reinitializes the chunker with a new reader and polynomial.
|
// Reset reinitializes the chunker with a new reader and polynomial.
|
||||||
func (c *Chunker) Reset(rd io.Reader, pol Pol) {
|
func (c *Chunker) Reset(rd io.Reader, pol Pol) {
|
||||||
*c = Chunker{
|
*c = Chunker{
|
||||||
buf: c.buf,
|
chunkerState: chunkerState{
|
||||||
pol: pol,
|
buf: c.buf,
|
||||||
rd: rd,
|
},
|
||||||
MinSize: c.MinSize,
|
chunkerConfig: chunkerConfig{
|
||||||
MaxSize: c.MaxSize,
|
pol: pol,
|
||||||
|
rd: rd,
|
||||||
|
MinSize: MinSize,
|
||||||
|
MaxSize: MaxSize,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
c.reset()
|
c.reset()
|
||||||
|
@ -118,7 +134,7 @@ func (c *Chunker) reset() {
|
||||||
c.digest = 0
|
c.digest = 0
|
||||||
c.wpos = 0
|
c.wpos = 0
|
||||||
c.count = 0
|
c.count = 0
|
||||||
c.slide(1)
|
c.digest = c.slide(c.digest, 1)
|
||||||
c.start = c.pos
|
c.start = c.pos
|
||||||
|
|
||||||
// do not start a new chunk unless at least MinSize bytes have been read
|
// do not start a new chunk unless at least MinSize bytes have been read
|
||||||
|
@ -133,6 +149,8 @@ func (c *Chunker) fillTables() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c.tablesInitialized = true
|
||||||
|
|
||||||
// test if the tables are cached for this polynomial
|
// test if the tables are cached for this polynomial
|
||||||
cache.Lock()
|
cache.Lock()
|
||||||
defer cache.Unlock()
|
defer cache.Unlock()
|
||||||
|
@ -141,10 +159,6 @@ func (c *Chunker) fillTables() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// else create a new entry
|
|
||||||
c.tables = &tables{}
|
|
||||||
cache.entries[c.pol] = c.tables
|
|
||||||
|
|
||||||
// calculate table for sliding out bytes. The byte to slide out is used as
|
// calculate table for sliding out bytes. The byte to slide out is used as
|
||||||
// the index for the table, the value contains the following:
|
// the index for the table, the value contains the following:
|
||||||
// out_table[b] = Hash(b || 0 || ... || 0)
|
// out_table[b] = Hash(b || 0 || ... || 0)
|
||||||
|
@ -178,6 +192,8 @@ func (c *Chunker) fillTables() {
|
||||||
// enough to reduce modulo Polynomial
|
// enough to reduce modulo Polynomial
|
||||||
c.tables.mod[b] = Pol(uint64(b)<<uint(k)).Mod(c.pol) | (Pol(b) << uint(k))
|
c.tables.mod[b] = Pol(uint64(b)<<uint(k)).Mod(c.pol) | (Pol(b) << uint(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cache.entries[c.pol] = c.tables
|
||||||
}
|
}
|
||||||
|
|
||||||
// Next returns the position and length of the next chunk of data. If an error
|
// Next returns the position and length of the next chunk of data. If an error
|
||||||
|
@ -186,13 +202,19 @@ func (c *Chunker) fillTables() {
|
||||||
// subsequent calls yield an io.EOF error.
|
// subsequent calls yield an io.EOF error.
|
||||||
func (c *Chunker) Next(data []byte) (Chunk, error) {
|
func (c *Chunker) Next(data []byte) (Chunk, error) {
|
||||||
data = data[:0]
|
data = data[:0]
|
||||||
if c.tables == nil {
|
if !c.tablesInitialized {
|
||||||
return Chunk{}, errors.New("polynomial is not set")
|
return Chunk{}, errors.New("tables for polynomial computation not initialized")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tabout := c.tables.out
|
||||||
|
tabmod := c.tables.mod
|
||||||
|
polShift := c.polShift
|
||||||
|
minSize := c.MinSize
|
||||||
|
maxSize := c.MaxSize
|
||||||
|
buf := c.buf
|
||||||
for {
|
for {
|
||||||
if c.bpos >= c.bmax {
|
if c.bpos >= c.bmax {
|
||||||
n, err := io.ReadFull(c.rd, c.buf[:])
|
n, err := io.ReadFull(c.rd, buf[:])
|
||||||
|
|
||||||
if err == io.ErrUnexpectedEOF {
|
if err == io.ErrUnexpectedEOF {
|
||||||
err = nil
|
err = nil
|
||||||
|
@ -230,7 +252,7 @@ func (c *Chunker) Next(data []byte) (Chunk, error) {
|
||||||
n := c.bmax - c.bpos
|
n := c.bmax - c.bpos
|
||||||
if c.pre > uint(n) {
|
if c.pre > uint(n) {
|
||||||
c.pre -= uint(n)
|
c.pre -= uint(n)
|
||||||
data = append(data, c.buf[c.bpos:c.bmax]...)
|
data = append(data, buf[c.bpos:c.bmax]...)
|
||||||
|
|
||||||
c.count += uint(n)
|
c.count += uint(n)
|
||||||
c.pos += uint(n)
|
c.pos += uint(n)
|
||||||
|
@ -239,7 +261,7 @@ func (c *Chunker) Next(data []byte) (Chunk, error) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
data = append(data, c.buf[c.bpos:c.bpos+c.pre]...)
|
data = append(data, buf[c.bpos:c.bpos+c.pre]...)
|
||||||
|
|
||||||
c.bpos += c.pre
|
c.bpos += c.pre
|
||||||
c.count += c.pre
|
c.count += c.pre
|
||||||
|
@ -248,37 +270,41 @@ func (c *Chunker) Next(data []byte) (Chunk, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
add := c.count
|
add := c.count
|
||||||
for _, b := range c.buf[c.bpos:c.bmax] {
|
digest := c.digest
|
||||||
// inline c.slide(b) and append(b) to increase performance
|
win := c.window
|
||||||
out := c.window[c.wpos]
|
wpos := c.wpos
|
||||||
c.window[c.wpos] = b
|
for _, b := range buf[c.bpos:c.bmax] {
|
||||||
c.digest ^= uint64(c.tables.out[out])
|
// slide(b)
|
||||||
c.wpos = (c.wpos + 1) % windowSize
|
out := win[wpos]
|
||||||
|
win[wpos] = b
|
||||||
|
digest ^= uint64(tabout[out])
|
||||||
|
wpos = (wpos + 1) % windowSize
|
||||||
|
|
||||||
// c.append(b)
|
// updateDigest
|
||||||
index := c.digest >> c.polShift
|
index := byte(digest >> polShift)
|
||||||
c.digest <<= 8
|
digest <<= 8
|
||||||
c.digest |= uint64(b)
|
digest |= uint64(b)
|
||||||
|
|
||||||
c.digest ^= uint64(c.tables.mod[index])
|
digest ^= uint64(tabmod[index])
|
||||||
// end inline
|
// end manual inline
|
||||||
|
|
||||||
add++
|
add++
|
||||||
if add < c.MinSize {
|
if add < minSize {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c.digest&splitmask) == 0 || add >= MaxSize {
|
if (digest&splitmask) == 0 || add >= maxSize {
|
||||||
i := add - c.count - 1
|
i := add - c.count - 1
|
||||||
data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...)
|
data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...)
|
||||||
c.count = add
|
c.count = add
|
||||||
c.pos += uint(i) + 1
|
c.pos += uint(i) + 1
|
||||||
c.bpos += uint(i) + 1
|
c.bpos += uint(i) + 1
|
||||||
|
c.buf = buf
|
||||||
|
|
||||||
chunk := Chunk{
|
chunk := Chunk{
|
||||||
Start: c.start,
|
Start: c.start,
|
||||||
Length: c.count,
|
Length: c.count,
|
||||||
Cut: c.digest,
|
Cut: digest,
|
||||||
Data: data,
|
Data: data,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -287,6 +313,9 @@ func (c *Chunker) Next(data []byte) (Chunk, error) {
|
||||||
return chunk, nil
|
return chunk, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
c.digest = digest
|
||||||
|
c.window = win
|
||||||
|
c.wpos = wpos
|
||||||
|
|
||||||
steps := c.bmax - c.bpos
|
steps := c.bmax - c.bpos
|
||||||
if steps > 0 {
|
if steps > 0 {
|
||||||
|
@ -298,21 +327,23 @@ func (c *Chunker) Next(data []byte) (Chunk, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Chunker) append(b byte) {
|
func updateDigest(digest uint64, polShift uint, tab tables, b byte) (newDigest uint64) {
|
||||||
index := c.digest >> c.polShift
|
index := digest >> polShift
|
||||||
c.digest <<= 8
|
digest <<= 8
|
||||||
c.digest |= uint64(b)
|
digest |= uint64(b)
|
||||||
|
|
||||||
c.digest ^= uint64(c.tables.mod[index])
|
digest ^= uint64(tab.mod[index])
|
||||||
|
return digest
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Chunker) slide(b byte) {
|
func (c *Chunker) slide(digest uint64, b byte) (newDigest uint64) {
|
||||||
out := c.window[c.wpos]
|
out := c.window[c.wpos]
|
||||||
c.window[c.wpos] = b
|
c.window[c.wpos] = b
|
||||||
c.digest ^= uint64(c.tables.out[out])
|
digest ^= uint64(c.tables.out[out])
|
||||||
c.wpos = (c.wpos + 1) % windowSize
|
c.wpos = (c.wpos + 1) % windowSize
|
||||||
|
|
||||||
c.append(b)
|
digest = updateDigest(digest, c.polShift, c.tables, b)
|
||||||
|
return digest
|
||||||
}
|
}
|
||||||
|
|
||||||
func appendByte(hash Pol, b byte, pol Pol) Pol {
|
func appendByte(hash Pol, b byte, pol Pol) Pol {
|
||||||
|
|
Loading…
Reference in a new issue