Seed chunker with random per-repository polynomial

This commit is contained in:
Alexander Neumann 2015-04-06 00:22:19 +02:00
parent c969a42e8d
commit b2307cafa2
7 changed files with 79 additions and 31 deletions

View file

@ -201,7 +201,7 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) (Blobs, error) {
// store all chunks
chnker := GetChunker("archiver.SaveFile")
chnker.Reset(file)
chnker.Reset(file, arch.s.ChunkerPolynomial())
chans := [](<-chan Blob){}
defer FreeChunker("archiver.SaveFile", chnker)

View file

@ -13,6 +13,7 @@ import (
)
var benchArchiveDirectory = flag.String("test.benchdir", ".", "benchmark archiving a real directory (default: .)")
var testPol = chunker.Pol(0x3DA3358B4DC173)
func get_random(seed, count int) []byte {
buf := make([]byte, count)
@ -39,7 +40,7 @@ type Rdr interface {
func benchmarkChunkEncrypt(b testing.TB, buf []byte, rd Rdr, key *restic.Key) {
ch := restic.GetChunker("BenchmarkChunkEncrypt")
rd.Seek(0, 0)
ch.Reset(rd)
ch.Reset(rd, testPol)
for {
chunk, err := ch.Next()
@ -86,7 +87,7 @@ func BenchmarkChunkEncrypt(b *testing.B) {
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *restic.Key) {
ch := restic.GetChunker("BenchmarkChunkEncryptP")
rd.Seek(0, 0)
ch.Reset(rd)
ch.Reset(rd, testPol)
for {
chunk, err := ch.Next()

View file

@ -80,27 +80,21 @@ type Chunker struct {
// New returns a new Chunker based on polynomial p that reads from data from rd
// with bufsize and pass all data to hash along the way.
func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) (*Chunker, error) {
func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) *Chunker {
c := &Chunker{
buf: make([]byte, bufsize),
h: hash,
}
if err := c.Reset(rd, p); err != nil {
return nil, err
}
return c, nil
c.Reset(rd, p)
return c
}
// Reset restarts a chunker so that it can be reused with a different
// polynomial and reader.
func (c *Chunker) Reset(rd io.Reader, p Pol) error {
func (c *Chunker) Reset(rd io.Reader, p Pol) {
c.pol = p
c.pol_shift = uint(p.Deg() - 8)
if err := c.fill_tables(); err != nil {
return err
}
c.fill_tables()
c.rd = rd
for i := 0; i < WindowSize; i++ {
@ -112,7 +106,10 @@ func (c *Chunker) Reset(rd io.Reader, p Pol) error {
c.pos = 0
c.start = 0
c.count = 0
c.slide(1)
if p != 0 {
c.slide(1)
}
if c.h != nil {
c.h.Reset()
@ -120,30 +117,28 @@ func (c *Chunker) Reset(rd io.Reader, p Pol) error {
// do not start a new chunk unless at least MinSize bytes have been read
c.pre = MinSize - WindowSize
return nil
}
// Calculate out_table and mod_table for optimization. Must be called only
// once. This implementation uses a cache in the global variable cache.
func (c *Chunker) fill_tables() error {
func (c *Chunker) fill_tables() {
// if polynomial hasn't been specified, do not compute anything for now
if c.pol == 0 {
return
}
// test if the tables are cached for this polynomial
cache.Lock()
defer cache.Unlock()
if t, ok := cache.entries[c.pol]; ok {
c.tables = t
return nil
return
}
// else create a new entry
c.tables = &tables{}
cache.entries[c.pol] = c.tables
// test irreducibility of p
if !c.pol.Irreducible() {
return errors.New("invalid polynomial")
}
// calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0)
@ -177,8 +172,6 @@ func (c *Chunker) fill_tables() error {
// enough to reduce modulo Polynomial
c.tables.mod[b] = mod(uint64(b)<<uint(k), uint64(c.pol)) | (uint64(b) << uint(k))
}
return nil
}
// Next returns the position and length of the next chunk of data. If an error
@ -186,6 +179,10 @@ func (c *Chunker) fill_tables() error {
// the current chunk is undefined. When the last chunk has been returned, all
// subsequent calls yield a nil chunk and an io.EOF error.
func (c *Chunker) Next() (*Chunk, error) {
if c.tables == nil {
return nil, errors.New("polynomial is not set")
}
for {
if c.bpos >= c.bmax {
n, err := io.ReadFull(c.rd, c.buf[:])

View file

@ -6,6 +6,8 @@ import (
"errors"
"fmt"
"strconv"
"github.com/davecgh/go-spew/spew"
)
// Pol is a polynomial from F_2[X].
@ -255,3 +257,23 @@ func qp(p uint, g Pol) Pol {
// add x
return res.Add(2).Mod(g)
}
func (p Pol) MarshalJSON() ([]byte, error) {
buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16)
buf = append(buf, '"')
spew.Dump(buf)
return buf, nil
}
func (p *Pol) UnmarshalJSON(data []byte) error {
if len(data) < 2 {
return errors.New("invalid string for polynomial")
}
n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64)
if err != nil {
return err
}
*p = Pol(n)
return nil
}

28
key.go
View file

@ -13,6 +13,7 @@ import (
"github.com/restic/restic/backend"
"github.com/restic/restic/chunker"
"github.com/restic/restic/debug"
"golang.org/x/crypto/poly1305"
)
@ -62,10 +63,12 @@ type Key struct {
// MasterKeys holds signing and encryption keys for a repository. It is stored
// encrypted and signed as a JSON data structure in the Data field of the Key
// structure.
// structure. For the master key, the secret random polynomial used for content
// defined chunking is included.
type MasterKeys struct {
Sign MACKey `json:"sign"`
Encrypt AESKey `json:"encrypt"`
Sign MACKey `json:"sign"`
Encrypt AESKey `json:"encrypt"`
ChunkerPolynomial chunker.Pol `json:"chunker_polynomial,omitempty"`
}
// CreateKey initializes a master key in the given backend and encrypts it with
@ -106,6 +109,17 @@ func OpenKey(s Server, name string, password string) (*Key, error) {
}
k.name = name
// test if polynomial is valid and irreducible
if k.master.ChunkerPolynomial == 0 {
return nil, errors.New("Polynomial for content defined chunking is zero")
}
if !k.master.ChunkerPolynomial.Irreducible() {
return nil, errors.New("Polynomial for content defined chunking is invalid")
}
debug.Log("OpenKey", "Master keys loaded, polynomial %v", k.master.ChunkerPolynomial)
return k, nil
}
@ -184,6 +198,14 @@ func AddKey(s Server, password string, template *Key) (*Key, error) {
if template == nil {
// generate new random master keys
newkey.master = generateRandomKeys()
// generate random polynomial for cdc
p, err := chunker.RandomPolynomial()
if err != nil {
debug.Log("AddKey", "error generating new polynomial for cdc: %v", err)
return nil, err
}
debug.Log("AddKey", "generated new polynomial for cdc: %v", p)
newkey.master.ChunkerPolynomial = p
} else {
// copy master keys from old key
newkey.master = template.master

View file

@ -88,8 +88,8 @@ func newChunker() interface{} {
defer chunkStats.m.Unlock()
chunkStats.new++
// create a new chunker with a nil reader
return chunker.New(nil, chunkerBufSize, sha256.New())
// create a new chunker with a nil reader and null polynomial
return chunker.New(nil, 0, chunkerBufSize, sha256.New())
}
func GetChunkBuf(s string) []byte {

View file

@ -10,6 +10,7 @@ import (
"sync"
"github.com/restic/restic/backend"
"github.com/restic/restic/chunker"
"github.com/restic/restic/debug"
)
@ -26,6 +27,11 @@ func (s *Server) SetKey(k *Key) {
s.key = k
}
// ChunkerPolynomial returns the secret polynomial used for content defined chunking.
func (s *Server) ChunkerPolynomial() chunker.Pol {
return chunker.Pol(s.key.Master().ChunkerPolynomial)
}
// Find loads the list of all blobs of type t and searches for names which start
// with prefix. If none is found, nil and ErrNoIDPrefixFound is returned. If
// more than one is found, nil and ErrMultipleIDMatches is returned.