forked from TrueCloudLab/restic
Add packer, update Design.md
This commit is contained in:
parent
66b81a2a8b
commit
8f3b91ba32
3 changed files with 450 additions and 51 deletions
112
doc/Design.md
112
doc/Design.md
|
@ -64,6 +64,10 @@ The basic layout of a sample restic repository is shown below:
|
|||
|
||||
/tmp/restic-repo
|
||||
├── data
|
||||
│ ├── 21
|
||||
│ │ └── 2159dd48f8a24f33c307b750592773f8b71ff8d11452132a7b2e2a6a01611be1
|
||||
│ ├── 32
|
||||
│ │ └── 32ea976bc30771cebad8285cd99120ac8786f9ffd42141d452458089985043a5
|
||||
│ ├── 59
|
||||
│ │ └── 59fe4bcde59bd6222eba87795e35a90d82cd2f138a27b6835032b7b58173a426
|
||||
│ ├── 73
|
||||
|
@ -71,25 +75,14 @@ The basic layout of a sample restic repository is shown below:
|
|||
│ [...]
|
||||
├── id
|
||||
├── index
|
||||
│ └── c38f5fb68307c6a3e3aa945d556e325dc38f5fb68307c6a3e3aa945d556e325d
|
||||
│ ├── c38f5fb68307c6a3e3aa945d556e325dc38f5fb68307c6a3e3aa945d556e325d
|
||||
│ └── ca171b1b7394d90d330b265d90f506f9984043b342525f019788f97e745c71fd
|
||||
├── keys
|
||||
│ └── b02de829beeb3c01a63e6b25cbd421a98fef144f03b9a02e46eff9e2ca3f0bd7
|
||||
├── locks
|
||||
├── snapshots
|
||||
│ └── 22a5af1bdc6e616f8a29579458c49627e01b32210d09adb288d1ecda7c5711ec
|
||||
├── tmp
|
||||
├── trees
|
||||
│ ├── 21
|
||||
│ │ └── 2159dd48f8a24f33c307b750592773f8b71ff8d11452132a7b2e2a6a01611be1
|
||||
│ ├── 32
|
||||
│ │ └── 32ea976bc30771cebad8285cd99120ac8786f9ffd42141d452458089985043a5
|
||||
│ ├── 95
|
||||
│ │ └── 95f75feb05a7cc73e328b2efa668b1ea68f65fece55a93bc65aff6cd0bcfeefc
|
||||
│ ├── b8
|
||||
│ │ └── b8138ab08a4722596ac89c917827358da4672eac68e3c03a8115b88dbf4bfb59
|
||||
│ ├── e0
|
||||
│ │ └── e01150928f7ad24befd6ec15b087de1b9e0f92edabd8e5cabb3317f8b20ad044
|
||||
│ [...]
|
||||
└── version
|
||||
|
||||
A repository can be initialized with the `restic init` command, e.g.:
|
||||
|
@ -99,39 +92,47 @@ A repository can be initialized with the `restic init` command, e.g.:
|
|||
Pack Format
|
||||
-----------
|
||||
|
||||
All files in the repository except Key, Tree and Data files just contain raw
|
||||
data, stored as `IV || Ciphertext || MAC`. Tree and Data files may contain
|
||||
several Blobs of data. The format is described in the following.
|
||||
All files in the repository except Key and Data files just contain raw data,
|
||||
stored as `IV || Ciphertext || MAC`. Data files may contain one or more Blobs
|
||||
of data. The format is described in the following.
|
||||
|
||||
A Pack starts with a nonce and a header, the header describes the content and
|
||||
is encrypted and signed. The Pack's structure is as follows:
|
||||
The Pack's structure is as follows:
|
||||
|
||||
NONCE || Header_Length ||
|
||||
IV_Header || Ciphertext_Header || MAC_Header ||
|
||||
IV_Blob_1 || Ciphertext_Blob_1 || MAC_Blob_1 ||
|
||||
[...]
|
||||
IV_Blob_n || Ciphertext_Blob_n || MAC_Blob_n ||
|
||||
MAC
|
||||
EncryptedBlob1 || ... || EncryptedBlobN || EncryptedHeader || Header_Length
|
||||
|
||||
`NONCE` consists of 16 bytes and `Header_Length` is a four byte integer in
|
||||
little-endian encoding.
|
||||
At the end of the Pack is a header, which describes the content and is
|
||||
encrypted and signed. `Header_Length` is the length of the encrypted header
|
||||
encoded as a is a four byte integer in little-endian encoding.
|
||||
|
||||
All the parts (`Ciphertext_Header`, `Ciphertext_Blob1` etc.) are signed and
|
||||
encrypted independently. In addition, the complete pack is signed using
|
||||
`NONCE`. This enables repository reorganisation without having to touch the
|
||||
encrypted Blobs. In addition it also allows efficient indexing, for only the
|
||||
header needs to be read in order to find out which Blobs are contained in the
|
||||
Pack. Since the header is signed, authenticity of the header can be checked
|
||||
without having to read the complete Pack.
|
||||
All the blobs (`EncryptedBlob1`, `EncryptedBlobN` etc.) are signed and
|
||||
encrypted independently. This enables repository reorganisation without having
|
||||
to touch the encrypted Blobs. In addition it also allows efficient indexing,
|
||||
for only the header needs to be read in order to find out which Blobs are
|
||||
contained in the Pack. Since the header is signed, authenticity of the header
|
||||
can be checked without having to read the complete Pack.
|
||||
|
||||
After decryption, a Pack's header consists of the following elements:
|
||||
|
||||
Length(IV_Blob_1+Ciphertext_Blob1+MAC_Blob_1) || Hash(Plaintext_Blob_1) ||
|
||||
Type_Blob1 || Length(EncryptedBlob1) || Hash(Plaintext_Blob1) ||
|
||||
[...]
|
||||
Length(IV_Blob_n+Ciphertext_Blob_n+MAC_Blob_n) || Hash(Plaintext_Blob_n) ||
|
||||
Type_BlobN || Length(EncryptedBlobN) || Hash(Plaintext_Blobn) ||
|
||||
|
||||
This is enough to calculate the offsets for all the Blobs in the Pack. Length
|
||||
is the length of a Blob as a four byte integer in little-endian format.
|
||||
is the length of a Blob as a four byte integer in little-endian format. The
|
||||
type field is a one byte field and labels the content of a blob according to
|
||||
the following table:
|
||||
|
||||
Type | Meaning
|
||||
-----|---------
|
||||
0 | data
|
||||
1 | tree
|
||||
|
||||
All other types are invalid, more types may be added in the future.
|
||||
|
||||
For reconstructing the index or parsing a pack without an index, first the last
|
||||
four bytes must be read in order to find the length of the header. Afterwards,
|
||||
the header can be read and parsed, which yields all plaintext hashes, types,
|
||||
offsets and lengths of all included blobs.
|
||||
|
||||
Indexing
|
||||
--------
|
||||
|
@ -139,23 +140,40 @@ Indexing
|
|||
Index files contain information about Data and Tree Blobs and the Packs they
|
||||
are contained in and store this information in the repository. When the local
|
||||
cached index is not accessible any more, the index files can be downloaded and
|
||||
used to reconstruct the index. The index Blobs are encrypted and signed like
|
||||
Data and Tree Blobs, so the outer structure is `IV || Ciphertext || MAC` again.
|
||||
The plaintext consists of a JSON document like the following:
|
||||
used to reconstruct the index. The files are encrypted and signed like Data and
|
||||
Tree Blobs, so the outer structure is `IV || Ciphertext || MAC` again. The
|
||||
plaintext consists of a JSON document like the following:
|
||||
|
||||
[
|
||||
{
|
||||
[ {
|
||||
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
|
||||
"blobs": [
|
||||
"3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
|
||||
"9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
|
||||
"d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"
|
||||
]
|
||||
{
|
||||
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
|
||||
"type": "data",
|
||||
"offset": 0,
|
||||
"length": 25
|
||||
},{
|
||||
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
|
||||
"type": "tree",
|
||||
"offset": 38,
|
||||
"length": 100
|
||||
},
|
||||
{
|
||||
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
|
||||
"type": "data",
|
||||
"offset": 150,
|
||||
"length": 123
|
||||
}
|
||||
]
|
||||
} ]
|
||||
|
||||
This JSON document lists all the Blobs with contents. In this example, the Pack
|
||||
`73d04e61` contains three Blobs, the plaintext hashes are listed afterwards.
|
||||
This JSON document lists Blobs with contents. In this example, the Pack
|
||||
`73d04e61` contains two data Blobs and one Tree blob, the plaintext hashes are
|
||||
listed afterwards.
|
||||
|
||||
There may be an arbitrary number of index files, containing information on
|
||||
non-disjoint sets of Packs. The number of packs described in a single file is
|
||||
chosen so that the file size is kep below 8 MiB.
|
||||
|
||||
Keys, Encryption and MAC
|
||||
------------------------
|
||||
|
|
270
pack/pack.go
Normal file
270
pack/pack.go
Normal file
|
@ -0,0 +1,270 @@
|
|||
package pack
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/restic/restic/backend"
|
||||
"github.com/restic/restic/crypto"
|
||||
)
|
||||
|
||||
type BlobType uint8
|
||||
|
||||
const (
|
||||
Data BlobType = 0
|
||||
Tree = 1
|
||||
)
|
||||
|
||||
func (t BlobType) MarshalJSON() ([]byte, error) {
|
||||
switch t {
|
||||
case Data:
|
||||
return []byte(`"data"`), nil
|
||||
case Tree:
|
||||
return []byte(`"tree"`), nil
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown blob type")
|
||||
}
|
||||
|
||||
func (t *BlobType) UnmarshalJSON(buf []byte) error {
|
||||
switch string(buf) {
|
||||
case `"data"`:
|
||||
*t = Data
|
||||
case `"tree"`:
|
||||
*t = Tree
|
||||
default:
|
||||
return errors.New("unknown blob type")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Blob is a blob within a pack.
|
||||
type Blob struct {
|
||||
Type BlobType
|
||||
Length uint32
|
||||
ID backend.ID
|
||||
Offset uint
|
||||
}
|
||||
|
||||
// GetReader returns an io.Reader for the blob entry e.
|
||||
func (e Blob) GetReader(rd io.ReadSeeker) (io.Reader, error) {
|
||||
// seek to the correct location
|
||||
_, err := rd.Seek(int64(e.Offset), 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return io.LimitReader(rd, int64(e.Length)), nil
|
||||
}
|
||||
|
||||
// Packer is used to create a new Pack.
|
||||
type Packer struct {
|
||||
blobs []Blob
|
||||
|
||||
bytes uint
|
||||
k *crypto.Key
|
||||
wr io.Writer
|
||||
hw *backend.HashingWriter
|
||||
|
||||
m sync.Mutex
|
||||
}
|
||||
|
||||
// NewPacker returns a new Packer that can be used to pack blobs
|
||||
// together.
|
||||
func NewPacker(k *crypto.Key, w io.Writer) *Packer {
|
||||
return &Packer{k: k, wr: w, hw: backend.NewHashingWriter(w, sha256.New())}
|
||||
}
|
||||
|
||||
// Add saves the data read from rd as a new blob to the packer. Returned is the
|
||||
// number of bytes written to the pack.
|
||||
func (p *Packer) Add(t BlobType, id backend.ID, rd io.Reader) (int64, error) {
|
||||
p.m.Lock()
|
||||
defer p.m.Unlock()
|
||||
|
||||
c := Blob{Type: t, ID: id}
|
||||
|
||||
n, err := io.Copy(p.hw, rd)
|
||||
c.Length = uint32(n)
|
||||
c.Offset = p.bytes
|
||||
p.bytes += uint(n)
|
||||
p.blobs = append(p.blobs, c)
|
||||
|
||||
return n, err
|
||||
}
|
||||
|
||||
var entrySize = binary.Size(BlobType(0)) + binary.Size(uint32(0)) + backend.IDSize
|
||||
|
||||
// headerEntry is used with encoding/binary to read and write header entries
|
||||
type headerEntry struct {
|
||||
Type BlobType
|
||||
Length uint32
|
||||
ID [backend.IDSize]byte
|
||||
}
|
||||
|
||||
// Finalize writes the header for all added blobs and finalizes the pack.
|
||||
// Returned are the complete number of bytes written, including the header.
|
||||
// After Finalize() has finished, the ID of this pack can be obtained by
|
||||
// calling ID().
|
||||
func (p *Packer) Finalize() (int64, error) {
|
||||
p.m.Lock()
|
||||
defer p.m.Unlock()
|
||||
|
||||
// n tracks the number of bytes written for the header
|
||||
var n int64 = int64(p.bytes)
|
||||
|
||||
// create writer to encrypt header
|
||||
wr := crypto.EncryptTo(p.k, p.hw)
|
||||
|
||||
// write header
|
||||
for _, b := range p.blobs {
|
||||
entry := headerEntry{
|
||||
Type: b.Type,
|
||||
Length: b.Length,
|
||||
}
|
||||
copy(entry.ID[:], b.ID)
|
||||
|
||||
err := binary.Write(wr, binary.LittleEndian, entry)
|
||||
if err != nil {
|
||||
return int64(n), err
|
||||
}
|
||||
|
||||
n += int64(entrySize)
|
||||
}
|
||||
|
||||
// finalize encrypted header
|
||||
err := wr.Close()
|
||||
if err != nil {
|
||||
return int64(n), err
|
||||
}
|
||||
|
||||
// account for crypto overhead
|
||||
n += crypto.Extension
|
||||
|
||||
// write length
|
||||
err = binary.Write(p.hw, binary.LittleEndian, uint32(len(p.blobs)*entrySize+crypto.Extension))
|
||||
if err != nil {
|
||||
return int64(n), err
|
||||
}
|
||||
n += int64(binary.Size(uint32(0)))
|
||||
|
||||
p.bytes = uint(n)
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// ID returns the ID of all data written so far.
|
||||
func (p *Packer) ID() backend.ID {
|
||||
p.m.Lock()
|
||||
defer p.m.Unlock()
|
||||
|
||||
return p.hw.Sum(nil)
|
||||
}
|
||||
|
||||
// Size returns the number of bytes written so far.
|
||||
func (p *Packer) Size() uint {
|
||||
p.m.Lock()
|
||||
defer p.m.Unlock()
|
||||
|
||||
return p.bytes
|
||||
}
|
||||
|
||||
// Count returns the number of blobs in this packer.
|
||||
func (p *Packer) Count() int {
|
||||
p.m.Lock()
|
||||
defer p.m.Unlock()
|
||||
|
||||
return len(p.blobs)
|
||||
}
|
||||
|
||||
// Blobs returns the slice of blobs that have been written.
|
||||
func (p *Packer) Blobs() []Blob {
|
||||
p.m.Lock()
|
||||
defer p.m.Unlock()
|
||||
|
||||
return p.blobs
|
||||
}
|
||||
|
||||
// Writer returns the underlying writer.
|
||||
func (p *Packer) Writer() io.Writer {
|
||||
return p.wr
|
||||
}
|
||||
|
||||
func (p *Packer) String() string {
|
||||
return fmt.Sprintf("<Packer %d blobs, %d bytes>", len(p.blobs), p.bytes)
|
||||
}
|
||||
|
||||
// Unpacker is used to read individual blobs from a pack.
|
||||
type Unpacker struct {
|
||||
rd io.ReadSeeker
|
||||
Entries []Blob
|
||||
k *crypto.Key
|
||||
}
|
||||
|
||||
// NewUnpacker returns a pointer to Unpacker which can be used to read
|
||||
// individual Blobs from a pack.
|
||||
func NewUnpacker(k *crypto.Key, entries []Blob, rd io.ReadSeeker) (*Unpacker, error) {
|
||||
var err error
|
||||
ls := binary.Size(uint32(0))
|
||||
|
||||
// reset to the end to read header length
|
||||
_, err = rd.Seek(-int64(ls), 2)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("seeking to read header length failed: %v", err)
|
||||
}
|
||||
|
||||
// read length
|
||||
var l uint32
|
||||
err = binary.Read(rd, binary.LittleEndian, &l)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading header length failed: %v", err)
|
||||
}
|
||||
|
||||
// reset to the beginning of the header
|
||||
_, err = rd.Seek(-int64(ls)-int64(l), 2)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("seeking to read header length failed: %v", err)
|
||||
}
|
||||
|
||||
// read header
|
||||
hrd, err := crypto.DecryptFrom(k, io.LimitReader(rd, int64(l)))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if entries == nil {
|
||||
pos := uint(0)
|
||||
for {
|
||||
e := headerEntry{}
|
||||
err = binary.Read(hrd, binary.LittleEndian, &e)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
entries = append(entries, Blob{
|
||||
Type: e.Type,
|
||||
Length: e.Length,
|
||||
ID: e.ID[:],
|
||||
Offset: pos,
|
||||
})
|
||||
|
||||
pos += uint(e.Length)
|
||||
}
|
||||
}
|
||||
|
||||
p := &Unpacker{
|
||||
rd: rd,
|
||||
k: k,
|
||||
Entries: entries,
|
||||
}
|
||||
|
||||
return p, nil
|
||||
}
|
111
pack/pack_test.go
Normal file
111
pack/pack_test.go
Normal file
|
@ -0,0 +1,111 @@
|
|||
package pack_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/restic/backend"
|
||||
"github.com/restic/restic/crypto"
|
||||
"github.com/restic/restic/pack"
|
||||
. "github.com/restic/restic/test"
|
||||
)
|
||||
|
||||
var lengths = []int{23, 31650, 25860, 10928, 13769, 19862, 5211, 127, 13690, 30231}
|
||||
|
||||
// var lengths = []int{200}
|
||||
|
||||
func TestCreatePack(t *testing.T) {
|
||||
type Buf struct {
|
||||
data []byte
|
||||
id backend.ID
|
||||
}
|
||||
|
||||
bufs := []Buf{}
|
||||
|
||||
for _, l := range lengths {
|
||||
b := make([]byte, l)
|
||||
_, err := io.ReadFull(rand.Reader, b)
|
||||
OK(t, err)
|
||||
h := sha256.Sum256(b)
|
||||
bufs = append(bufs, Buf{data: b, id: h[:]})
|
||||
}
|
||||
|
||||
file := bytes.NewBuffer(nil)
|
||||
|
||||
// create random keys
|
||||
k := crypto.NewKey()
|
||||
|
||||
// pack blobs
|
||||
p := pack.NewPacker(k, file)
|
||||
for _, b := range bufs {
|
||||
p.Add(pack.Tree, b.id, bytes.NewReader(b.data))
|
||||
}
|
||||
|
||||
// write file
|
||||
n, err := p.Finalize()
|
||||
OK(t, err)
|
||||
|
||||
written := 0
|
||||
// data
|
||||
for _, l := range lengths {
|
||||
written += l
|
||||
}
|
||||
// header length
|
||||
written += binary.Size(uint32(0))
|
||||
// header
|
||||
written += len(lengths) * (binary.Size(pack.BlobType(0)) + binary.Size(uint32(0)) + backend.IDSize)
|
||||
// header crypto
|
||||
written += crypto.Extension
|
||||
|
||||
// check length
|
||||
Equals(t, int64(written), n)
|
||||
Equals(t, uint(written), p.Size())
|
||||
|
||||
// read and parse it again
|
||||
rd := bytes.NewReader(file.Bytes())
|
||||
np, err := pack.NewUnpacker(k, nil, rd)
|
||||
OK(t, err)
|
||||
Equals(t, len(np.Entries), len(bufs))
|
||||
|
||||
for i, b := range bufs {
|
||||
e := np.Entries[i]
|
||||
Equals(t, b.id, e.ID)
|
||||
|
||||
brd, err := e.GetReader(rd)
|
||||
OK(t, err)
|
||||
data, err := ioutil.ReadAll(brd)
|
||||
OK(t, err)
|
||||
|
||||
Assert(t, bytes.Equal(b.data, data),
|
||||
"data for blob %v doesn't match", i)
|
||||
}
|
||||
}
|
||||
|
||||
var blobTypeJson = []struct {
|
||||
t pack.BlobType
|
||||
res string
|
||||
}{
|
||||
{pack.Data, `"data"`},
|
||||
{pack.Tree, `"tree"`},
|
||||
}
|
||||
|
||||
func TestBlobTypeJSON(t *testing.T) {
|
||||
for _, test := range blobTypeJson {
|
||||
// test serialize
|
||||
buf, err := json.Marshal(test.t)
|
||||
OK(t, err)
|
||||
Equals(t, test.res, string(buf))
|
||||
|
||||
// test unserialize
|
||||
var v pack.BlobType
|
||||
err = json.Unmarshal([]byte(test.res), &v)
|
||||
OK(t, err)
|
||||
Equals(t, test.t, v)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue