restic/internal/repository/repository_test.go
Michael Eischer 7132df529e repository: Increase index size for repo version 2
A compressed index is only about one third the size of an uncompressed
one. Thus increase the number of entries in an index to avoid cluttering
the repository with small indexes.
2022-04-30 11:34:10 +02:00

614 lines
14 KiB
Go

package repository_test
import (
"bytes"
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"math/rand"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
rtest "github.com/restic/restic/internal/test"
)
var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20}
var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
func TestSave(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
for _, size := range testSizes {
data := make([]byte, size)
_, err := io.ReadFull(rnd, data)
rtest.OK(t, err)
id := restic.Hash(data)
// save
sid, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, data, restic.ID{}, false)
rtest.OK(t, err)
rtest.Equals(t, id, sid)
rtest.OK(t, repo.Flush(context.Background()))
// rtest.OK(t, repo.SaveIndex())
// read back
buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, nil)
rtest.OK(t, err)
rtest.Equals(t, size, len(buf))
rtest.Assert(t, len(buf) == len(data),
"number of bytes read back does not match: expected %d, got %d",
len(data), len(buf))
rtest.Assert(t, bytes.Equal(buf, data),
"data does not match: expected %02x, got %02x",
data, buf)
}
}
func TestSaveFrom(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
for _, size := range testSizes {
data := make([]byte, size)
_, err := io.ReadFull(rnd, data)
rtest.OK(t, err)
id := restic.Hash(data)
// save
id2, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, data, id, false)
rtest.OK(t, err)
rtest.Equals(t, id, id2)
rtest.OK(t, repo.Flush(context.Background()))
// read back
buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, nil)
rtest.OK(t, err)
rtest.Equals(t, size, len(buf))
rtest.Assert(t, len(buf) == len(data),
"number of bytes read back does not match: expected %d, got %d",
len(data), len(buf))
rtest.Assert(t, bytes.Equal(buf, data),
"data does not match: expected %02x, got %02x",
data, buf)
}
}
func BenchmarkSaveAndEncrypt(t *testing.B) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
size := 4 << 20 // 4MiB
data := make([]byte, size)
_, err := io.ReadFull(rnd, data)
rtest.OK(t, err)
id := restic.ID(sha256.Sum256(data))
t.ReportAllocs()
t.ResetTimer()
t.SetBytes(int64(size))
for i := 0; i < t.N; i++ {
_, _, err = repo.SaveBlob(context.TODO(), restic.DataBlob, data, id, true)
rtest.OK(t, err)
}
}
func TestLoadTree(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
if rtest.BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping")
}
// archive a few files
sn := archiver.TestSnapshot(t, repo, rtest.BenchArchiveDirectory, nil)
rtest.OK(t, repo.Flush(context.Background()))
_, err := repo.LoadTree(context.TODO(), *sn.Tree)
rtest.OK(t, err)
}
func BenchmarkLoadTree(t *testing.B) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
if rtest.BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping")
}
// archive a few files
sn := archiver.TestSnapshot(t, repo, rtest.BenchArchiveDirectory, nil)
rtest.OK(t, repo.Flush(context.Background()))
t.ResetTimer()
for i := 0; i < t.N; i++ {
_, err := repo.LoadTree(context.TODO(), *sn.Tree)
rtest.OK(t, err)
}
}
func TestLoadBlob(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
length := 1000000
buf := restic.NewBlobBuffer(length)
_, err := io.ReadFull(rnd, buf)
rtest.OK(t, err)
id, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, restic.ID{}, false)
rtest.OK(t, err)
rtest.OK(t, repo.Flush(context.Background()))
base := restic.CiphertextLength(length)
for _, testlength := range []int{0, base - 20, base - 1, base, base + 7, base + 15, base + 1000} {
buf = make([]byte, 0, testlength)
buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf)
if err != nil {
t.Errorf("LoadBlob() returned an error for buffer size %v: %v", testlength, err)
continue
}
if len(buf) != length {
t.Errorf("LoadBlob() returned the wrong number of bytes: want %v, got %v", length, len(buf))
continue
}
}
}
func BenchmarkLoadBlob(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
defer cleanup()
length := 1000000
buf := restic.NewBlobBuffer(length)
_, err := io.ReadFull(rnd, buf)
rtest.OK(b, err)
id, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, restic.ID{}, false)
rtest.OK(b, err)
rtest.OK(b, repo.Flush(context.Background()))
b.ResetTimer()
b.SetBytes(int64(length))
for i := 0; i < b.N; i++ {
var err error
buf, err = repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf)
// Checking the SHA-256 with restic.Hash can make up 38% of the time
// spent in this loop, so pause the timer.
b.StopTimer()
rtest.OK(b, err)
if len(buf) != length {
b.Errorf("wanted %d bytes, got %d", length, len(buf))
}
id2 := restic.Hash(buf)
if !id.Equal(id2) {
b.Errorf("wrong data returned, wanted %v, got %v", id.Str(), id2.Str())
}
b.StartTimer()
}
}
func BenchmarkLoadUnpacked(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
defer cleanup()
length := 1000000
buf := restic.NewBlobBuffer(length)
_, err := io.ReadFull(rnd, buf)
rtest.OK(b, err)
dataID := restic.Hash(buf)
storageID, err := repo.SaveUnpacked(context.TODO(), restic.PackFile, buf)
rtest.OK(b, err)
// rtest.OK(b, repo.Flush())
b.ResetTimer()
b.SetBytes(int64(length))
for i := 0; i < b.N; i++ {
data, err := repo.LoadUnpacked(context.TODO(), nil, restic.PackFile, storageID)
rtest.OK(b, err)
// See comment in BenchmarkLoadBlob.
b.StopTimer()
if len(data) != length {
b.Errorf("wanted %d bytes, got %d", length, len(data))
}
id2 := restic.Hash(data)
if !dataID.Equal(id2) {
b.Errorf("wrong data returned, wanted %v, got %v", storageID.Str(), id2.Str())
}
b.StartTimer()
}
}
func TestLoadJSONUnpacked(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
if rtest.BenchArchiveDirectory == "" {
t.Skip("benchdir not set, skipping")
}
// archive a snapshot
sn := restic.Snapshot{}
sn.Hostname = "foobar"
sn.Username = "test!"
id, err := repo.SaveJSONUnpacked(context.TODO(), restic.SnapshotFile, &sn)
rtest.OK(t, err)
var sn2 restic.Snapshot
// restore
err = repo.LoadJSONUnpacked(context.TODO(), restic.SnapshotFile, id, &sn2)
rtest.OK(t, err)
rtest.Equals(t, sn.Hostname, sn2.Hostname)
rtest.Equals(t, sn.Username, sn2.Username)
var cf restic.Config
// load and check Config
err = repo.LoadJSONUnpacked(context.TODO(), restic.ConfigFile, id, &cf)
rtest.OK(t, err)
rtest.Equals(t, cf.ChunkerPolynomial, repository.TestChunkerPol)
}
var repoFixture = filepath.Join("testdata", "test-repo.tar.gz")
func TestRepositoryLoadIndex(t *testing.T) {
repodir, cleanup := rtest.Env(t, repoFixture)
defer cleanup()
repo := repository.TestOpenLocal(t, repodir)
rtest.OK(t, repo.LoadIndex(context.TODO()))
}
// loadIndex loads the index id from backend and returns it.
func loadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*repository.Index, error) {
buf, err := repo.LoadUnpacked(ctx, nil, restic.IndexFile, id)
if err != nil {
return nil, err
}
idx, oldFormat, err := repository.DecodeIndex(buf, id)
if oldFormat {
fmt.Fprintf(os.Stderr, "index %v has old format\n", id.Str())
}
return idx, err
}
func BenchmarkLoadIndex(b *testing.B) {
repository.TestUseLowSecurityKDFParameters(b)
repo, cleanup := repository.TestRepository(b)
defer cleanup()
idx := repository.NewIndex()
for i := 0; i < 5000; i++ {
idx.Store(restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Length: 1234,
Offset: 1235,
},
PackID: restic.NewRandomID(),
})
}
id, err := repository.SaveIndex(context.TODO(), repo, idx)
rtest.OK(b, err)
b.Logf("index saved as %v (%v entries)", id.Str(), idx.Count(restic.DataBlob))
fi, err := repo.Backend().Stat(context.TODO(), restic.Handle{Type: restic.IndexFile, Name: id.String()})
rtest.OK(b, err)
b.Logf("filesize is %v", fi.Size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := loadIndex(context.TODO(), repo, id)
rtest.OK(b, err)
}
}
// saveRandomDataBlobs generates random data blobs and saves them to the repository.
func saveRandomDataBlobs(t testing.TB, repo restic.Repository, num int, sizeMax int) {
for i := 0; i < num; i++ {
size := rand.Int() % sizeMax
buf := make([]byte, size)
_, err := io.ReadFull(rnd, buf)
rtest.OK(t, err)
_, _, err = repo.SaveBlob(context.TODO(), restic.DataBlob, buf, restic.ID{}, false)
rtest.OK(t, err)
}
}
func TestRepositoryIncrementalIndex(t *testing.T) {
r, cleanup := repository.TestRepository(t)
defer cleanup()
repo := r.(*repository.Repository)
repository.IndexFull = func(*repository.Index, bool) bool { return true }
// add 15 packs
for j := 0; j < 5; j++ {
// add 3 packs, write intermediate index
for i := 0; i < 3; i++ {
saveRandomDataBlobs(t, repo, 5, 1<<15)
rtest.OK(t, repo.FlushPacks(context.Background()))
}
rtest.OK(t, repo.SaveFullIndex(context.TODO()))
}
// add another 5 packs
for i := 0; i < 5; i++ {
saveRandomDataBlobs(t, repo, 5, 1<<15)
rtest.OK(t, repo.FlushPacks(context.Background()))
}
// save final index
rtest.OK(t, repo.SaveIndex(context.TODO()))
packEntries := make(map[restic.ID]map[restic.ID]struct{})
err := repo.List(context.TODO(), restic.IndexFile, func(id restic.ID, size int64) error {
idx, err := loadIndex(context.TODO(), repo, id)
rtest.OK(t, err)
for pb := range idx.Each(context.TODO()) {
if _, ok := packEntries[pb.PackID]; !ok {
packEntries[pb.PackID] = make(map[restic.ID]struct{})
}
packEntries[pb.PackID][id] = struct{}{}
}
return nil
})
if err != nil {
t.Fatal(err)
}
for packID, ids := range packEntries {
if len(ids) > 1 {
t.Errorf("pack %v listed in %d indexes\n", packID, len(ids))
}
}
}
// buildPackfileWithoutHeader returns a manually built pack file without a header.
func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) (blobs []restic.Blob, packfile []byte) {
var offset uint
for i, size := range blobSizes {
plaintext := test.Random(800+i, size)
// we use a deterministic nonce here so the whole process is
// deterministic, last byte is the blob index
var nonce = []byte{
0x15, 0x98, 0xc0, 0xf7, 0xb9, 0x65, 0x97, 0x74,
0x12, 0xdc, 0xd3, 0x62, 0xa9, 0x6e, 0x20, byte(i),
}
before := len(packfile)
packfile = append(packfile, nonce...)
packfile = key.Seal(packfile, nonce, plaintext, nil)
after := len(packfile)
ciphertextLength := after - before
blobs = append(blobs, restic.Blob{
BlobHandle: restic.BlobHandle{
ID: restic.Hash(plaintext),
Type: restic.DataBlob,
},
Length: uint(ciphertextLength),
Offset: offset,
})
offset = uint(len(packfile))
}
return blobs, packfile
}
func TestStreamPack(t *testing.T) {
// always use the same key for deterministic output
const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}`
var key crypto.Key
err := json.Unmarshal([]byte(jsonKey), &key)
if err != nil {
t.Fatal(err)
}
blobSizes := []int{
10,
5231,
18812,
123123,
12301,
892242,
28616,
13351,
252287,
188883,
2522811,
18883,
}
packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key)
load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
data := packfile
if offset > int64(len(data)) {
offset = 0
length = 0
}
data = data[offset:]
if length > len(data) {
length = len(data)
}
data = data[:length]
return fn(bytes.NewReader(data))
}
// first, test regular usage
t.Run("regular", func(t *testing.T) {
tests := []struct {
blobs []restic.Blob
}{
{packfileBlobs[1:2]},
{packfileBlobs[2:5]},
{packfileBlobs[2:8]},
{[]restic.Blob{
packfileBlobs[0],
packfileBlobs[8],
packfileBlobs[4],
}},
{[]restic.Blob{
packfileBlobs[0],
packfileBlobs[len(packfileBlobs)-1],
}},
}
for _, test := range tests {
t.Run("", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
gotBlobs := make(map[restic.ID]int)
handleBlob := func(blob restic.BlobHandle, buf []byte, err error) error {
gotBlobs[blob.ID]++
id := restic.Hash(buf)
if !id.Equal(blob.ID) {
t.Fatalf("wrong id %v for blob %s returned", id, blob.ID)
}
return err
}
wantBlobs := make(map[restic.ID]int)
for _, blob := range test.blobs {
wantBlobs[blob.ID] = 1
}
err = repository.StreamPack(ctx, load, &key, restic.ID{}, test.blobs, handleBlob)
if err != nil {
t.Fatal(err)
}
if !cmp.Equal(wantBlobs, gotBlobs) {
t.Fatal(cmp.Diff(wantBlobs, gotBlobs))
}
})
}
})
// next, test invalid uses, which should return an error
t.Run("invalid", func(t *testing.T) {
tests := []struct {
blobs []restic.Blob
err string
}{
{
// pass one blob several times
blobs: []restic.Blob{
packfileBlobs[3],
packfileBlobs[8],
packfileBlobs[3],
packfileBlobs[4],
},
err: "overlapping blobs in pack",
},
{
// pass something that's not a valid blob in the current pack file
blobs: []restic.Blob{
{
Offset: 123,
Length: 20000,
},
},
err: "ciphertext verification failed",
},
{
// pass a blob that's too small
blobs: []restic.Blob{
{
Offset: 123,
Length: 10,
},
},
err: "invalid blob length",
},
}
for _, test := range tests {
t.Run("", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
handleBlob := func(blob restic.BlobHandle, buf []byte, err error) error {
return err
}
err = repository.StreamPack(ctx, load, &key, restic.ID{}, test.blobs, handleBlob)
if err == nil {
t.Fatalf("wanted error %v, got nil", test.err)
}
if !strings.Contains(err.Error(), test.err) {
t.Fatalf("wrong error returned, it should contain %q but was %q", test.err, err)
}
})
}
})
}