restic/internal/repository/repack_test.go

376 lines
10 KiB
Go
Raw Permalink Normal View History

2016-08-01 18:04:23 +00:00
package repository_test
import (
2017-06-05 21:56:59 +00:00
"context"
"math/rand"
"testing"
2020-11-02 11:55:34 +00:00
"time"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/backend"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/repository"
2017-07-24 15:42:25 +00:00
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
"github.com/restic/restic/internal/ui/progress"
"golang.org/x/sync/errgroup"
)
2024-08-10 17:34:49 +00:00
func randomSize(random *rand.Rand, min, max int) int {
return random.Intn(max-min) + min
2016-08-01 18:24:15 +00:00
}
2024-08-10 17:34:49 +00:00
func createRandomBlobs(t testing.TB, random *rand.Rand, repo restic.Repository, blobs int, pData float32, smallBlobs bool) {
var wg errgroup.Group
repo.StartPackUploader(context.TODO(), &wg)
2016-08-01 18:24:15 +00:00
for i := 0; i < blobs; i++ {
var (
2016-08-31 18:58:57 +00:00
tpe restic.BlobType
2016-08-01 18:24:15 +00:00
length int
)
2024-08-10 17:34:49 +00:00
if random.Float32() < pData {
2016-08-31 21:07:50 +00:00
tpe = restic.DataBlob
2024-04-14 09:48:40 +00:00
if smallBlobs {
2024-08-10 17:34:49 +00:00
length = randomSize(random, 1*1024, 20*1024) // 1KiB to 20KiB of data
2024-04-14 09:48:40 +00:00
} else {
2024-08-10 17:34:49 +00:00
length = randomSize(random, 10*1024, 1024*1024) // 10KiB to 1MiB of data
2024-04-14 09:48:40 +00:00
}
2016-08-01 18:24:15 +00:00
} else {
2016-08-31 21:07:50 +00:00
tpe = restic.TreeBlob
2024-08-10 17:34:49 +00:00
length = randomSize(random, 1*1024, 20*1024) // 1KiB to 20KiB
2016-08-01 18:24:15 +00:00
}
buf := make([]byte, length)
2024-08-10 17:34:49 +00:00
random.Read(buf)
2022-05-01 12:26:57 +00:00
id, exists, _, err := repo.SaveBlob(context.TODO(), tpe, buf, restic.ID{}, false)
2016-08-01 18:24:15 +00:00
if err != nil {
t.Fatalf("SaveFrom() error %v", err)
}
if exists {
t.Errorf("duplicate blob %v/%v ignored", id, restic.DataBlob)
continue
}
2016-08-01 18:24:15 +00:00
if rand.Float32() < 0.2 {
if err = repo.Flush(context.Background()); err != nil {
2016-08-01 18:24:15 +00:00
t.Fatalf("repo.Flush() returned error %v", err)
}
repo.StartPackUploader(context.TODO(), &wg)
2016-08-01 18:24:15 +00:00
}
}
if err := repo.Flush(context.Background()); err != nil {
2016-08-01 18:24:15 +00:00
t.Fatalf("repo.Flush() returned error %v", err)
}
}
2024-08-10 17:34:49 +00:00
func createRandomWrongBlob(t testing.TB, random *rand.Rand, repo restic.Repository) restic.BlobHandle {
length := randomSize(random, 10*1024, 1024*1024) // 10KiB to 1MiB of data
buf := make([]byte, length)
2024-08-10 17:34:49 +00:00
random.Read(buf)
id := restic.Hash(buf)
// invert first data byte
buf[0] ^= 0xff
var wg errgroup.Group
repo.StartPackUploader(context.TODO(), &wg)
2022-05-01 12:26:57 +00:00
_, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, id, false)
if err != nil {
t.Fatalf("SaveFrom() error %v", err)
}
if err := repo.Flush(context.Background()); err != nil {
t.Fatalf("repo.Flush() returned error %v", err)
}
2024-01-20 20:54:47 +00:00
return restic.BlobHandle{ID: id, Type: restic.DataBlob}
}
2016-08-01 18:24:15 +00:00
// selectBlobs splits the list of all blobs randomly into two lists. A blob
2023-12-06 12:11:55 +00:00
// will be contained in the firstone with probability p.
2024-08-10 17:34:49 +00:00
func selectBlobs(t *testing.T, random *rand.Rand, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) {
2016-08-31 21:07:50 +00:00
list1 = restic.NewBlobSet()
list2 = restic.NewBlobSet()
2016-08-01 18:24:15 +00:00
2016-08-31 21:07:50 +00:00
blobs := restic.NewBlobSet()
err := repo.List(context.TODO(), restic.PackFile, func(id restic.ID, size int64) error {
entries, _, err := repo.ListPack(context.TODO(), id, size)
2016-08-01 18:24:15 +00:00
if err != nil {
t.Fatalf("error listing pack %v: %v", id, err)
}
for _, entry := range entries {
2016-08-31 21:07:50 +00:00
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
if blobs.Has(h) {
t.Errorf("ignoring duplicate blob %v", h)
return nil
}
blobs.Insert(h)
2024-08-10 17:34:49 +00:00
if random.Float32() <= p {
2016-08-31 21:07:50 +00:00
list1.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
2016-08-01 18:24:15 +00:00
} else {
2016-08-31 21:07:50 +00:00
list2.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
2016-08-01 18:24:15 +00:00
}
}
return nil
})
if err != nil {
t.Fatal(err)
2016-08-01 18:24:15 +00:00
}
return list1, list2
}
func listPacks(t *testing.T, repo restic.Lister) restic.IDSet {
return listFiles(t, repo, restic.PackFile)
}
func listFiles(t *testing.T, repo restic.Lister, tpe backend.FileType) restic.IDSet {
2016-08-31 18:29:54 +00:00
list := restic.NewIDSet()
err := repo.List(context.TODO(), tpe, func(id restic.ID, size int64) error {
2016-08-01 18:24:15 +00:00
list.Insert(id)
return nil
})
if err != nil {
t.Fatal(err)
2016-08-01 18:24:15 +00:00
}
return list
}
func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSet) restic.IDSet {
2016-08-31 18:29:54 +00:00
packs := restic.NewIDSet()
2016-08-01 18:24:15 +00:00
for h := range blobs {
list := repo.LookupBlob(h.Type, h.ID)
if len(list) == 0 {
t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type)
2016-08-01 18:24:15 +00:00
}
for _, pb := range list {
packs.Insert(pb.PackID)
}
2016-08-01 18:24:15 +00:00
}
return packs
}
func repack(t *testing.T, repo restic.Repository, packs restic.IDSet, blobs restic.BlobSet) {
repackedBlobs, err := repository.Repack(context.TODO(), repo, repo, packs, blobs, nil)
if err != nil {
t.Fatal(err)
}
for id := range repackedBlobs {
err = repo.RemoveUnpacked(context.TODO(), restic.PackFile, id)
if err != nil {
t.Fatal(err)
}
}
}
func rebuildAndReloadIndex(t *testing.T, repo *repository.Repository) {
rtest.OK(t, repository.RepairIndex(context.TODO(), repo, repository.RepairIndexOptions{
ReadAllPacks: true,
}, &progress.NoopPrinter{}))
2020-10-10 20:29:55 +00:00
rtest.OK(t, repo.LoadIndex(context.TODO(), nil))
}
func TestRepack(t *testing.T) {
repository.TestAllVersions(t, testRepack)
}
func testRepack(t *testing.T, version uint) {
repo, _ := repository.TestRepositoryWithVersion(t, version)
2020-11-02 11:55:34 +00:00
seed := time.Now().UnixNano()
2024-08-10 17:34:49 +00:00
random := rand.New(rand.NewSource(seed))
t.Logf("rand seed is %v", seed)
2024-04-14 09:48:40 +00:00
// add a small amount of blobs twice to create multiple pack files
2024-08-10 17:34:49 +00:00
createRandomBlobs(t, random, repo, 10, 0.7, false)
createRandomBlobs(t, random, repo, 10, 0.7, false)
packsBefore := listPacks(t, repo)
// Running repack on empty ID sets should not do anything at all.
repack(t, repo, nil, nil)
packsAfter := listPacks(t, repo)
if !packsAfter.Equals(packsBefore) {
t.Fatalf("packs are not equal, Repack modified something. Before:\n %v\nAfter:\n %v",
packsBefore, packsAfter)
}
2024-08-10 17:34:49 +00:00
removeBlobs, keepBlobs := selectBlobs(t, random, repo, 0.2)
removePacks := findPacksForBlobs(t, repo, removeBlobs)
repack(t, repo, removePacks, keepBlobs)
rebuildAndReloadIndex(t, repo)
packsAfter = listPacks(t, repo)
for id := range removePacks {
if packsAfter.Has(id) {
t.Errorf("pack %v still present although it should have been repacked and removed", id.Str())
}
}
for h := range keepBlobs {
list := repo.LookupBlob(h.Type, h.ID)
if len(list) == 0 {
t.Errorf("unable to find blob %v in repo", h.ID.Str())
continue
}
if len(list) != 1 {
t.Errorf("expected one pack in the list, got: %v", list)
continue
}
pb := list[0]
if removePacks.Has(pb.PackID) {
t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID)
}
}
for h := range removeBlobs {
if _, found := repo.LookupBlobSize(h.Type, h.ID); found {
t.Errorf("blob %v still contained in the repo", h)
}
}
}
func TestRepackCopy(t *testing.T) {
repository.TestAllVersions(t, testRepackCopy)
}
type oneConnectionRepo struct {
restic.Repository
}
func (r oneConnectionRepo) Connections() uint {
return 1
}
func testRepackCopy(t *testing.T, version uint) {
repo, _ := repository.TestRepositoryWithVersion(t, version)
dstRepo, _ := repository.TestRepositoryWithVersion(t, version)
// test with minimal possible connection count
repoWrapped := &oneConnectionRepo{repo}
dstRepoWrapped := &oneConnectionRepo{dstRepo}
seed := time.Now().UnixNano()
2024-08-10 17:34:49 +00:00
random := rand.New(rand.NewSource(seed))
t.Logf("rand seed is %v", seed)
2024-04-14 09:48:40 +00:00
// add a small amount of blobs twice to create multiple pack files
2024-08-10 17:34:49 +00:00
createRandomBlobs(t, random, repo, 10, 0.7, false)
createRandomBlobs(t, random, repo, 10, 0.7, false)
2024-08-10 17:34:49 +00:00
_, keepBlobs := selectBlobs(t, random, repo, 0.2)
copyPacks := findPacksForBlobs(t, repo, keepBlobs)
_, err := repository.Repack(context.TODO(), repoWrapped, dstRepoWrapped, copyPacks, keepBlobs, nil)
if err != nil {
t.Fatal(err)
}
rebuildAndReloadIndex(t, dstRepo)
for h := range keepBlobs {
list := dstRepo.LookupBlob(h.Type, h.ID)
if len(list) == 0 {
t.Errorf("unable to find blob %v in repo", h.ID.Str())
continue
}
if len(list) != 1 {
t.Errorf("expected one pack in the list, got: %v", list)
continue
}
}
}
func TestRepackWrongBlob(t *testing.T) {
repository.TestAllVersions(t, testRepackWrongBlob)
}
func testRepackWrongBlob(t *testing.T, version uint) {
// disable verification to allow adding corrupted blobs to the repository
repo, _ := repository.TestRepositoryWithBackend(t, nil, version, repository.Options{NoExtraVerify: true})
2020-11-02 11:55:34 +00:00
seed := time.Now().UnixNano()
2024-08-10 17:34:49 +00:00
random := rand.New(rand.NewSource(seed))
t.Logf("rand seed is %v", seed)
2024-08-10 17:34:49 +00:00
createRandomBlobs(t, random, repo, 5, 0.7, false)
createRandomWrongBlob(t, random, repo)
// just keep all blobs, but also rewrite every pack
2024-08-10 17:34:49 +00:00
_, keepBlobs := selectBlobs(t, random, repo, 0)
rewritePacks := findPacksForBlobs(t, repo, keepBlobs)
_, err := repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil)
if err == nil {
t.Fatal("expected repack to fail but got no error")
}
2020-11-02 11:53:45 +00:00
t.Logf("found expected error: %v", err)
}
func TestRepackBlobFallback(t *testing.T) {
repository.TestAllVersions(t, testRepackBlobFallback)
}
func testRepackBlobFallback(t *testing.T, version uint) {
// disable verification to allow adding corrupted blobs to the repository
repo, _ := repository.TestRepositoryWithBackend(t, nil, version, repository.Options{NoExtraVerify: true})
seed := time.Now().UnixNano()
2024-08-10 17:34:49 +00:00
random := rand.New(rand.NewSource(seed))
t.Logf("rand seed is %v", seed)
2024-08-10 17:34:49 +00:00
length := randomSize(random, 10*1024, 1024*1024) // 10KiB to 1MiB of data
buf := make([]byte, length)
2024-08-10 17:34:49 +00:00
random.Read(buf)
id := restic.Hash(buf)
// corrupted copy
modbuf := make([]byte, len(buf))
copy(modbuf, buf)
// invert first data byte
modbuf[0] ^= 0xff
// create pack with broken copy
var wg errgroup.Group
repo.StartPackUploader(context.TODO(), &wg)
_, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, modbuf, id, false)
rtest.OK(t, err)
rtest.OK(t, repo.Flush(context.Background()))
// find pack with damaged blob
keepBlobs := restic.NewBlobSet(restic.BlobHandle{Type: restic.DataBlob, ID: id})
rewritePacks := findPacksForBlobs(t, repo, keepBlobs)
// create pack with valid copy
repo.StartPackUploader(context.TODO(), &wg)
_, _, _, err = repo.SaveBlob(context.TODO(), restic.DataBlob, buf, id, true)
rtest.OK(t, err)
rtest.OK(t, repo.Flush(context.Background()))
// repack must fallback to valid copy
_, err = repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil)
rtest.OK(t, err)
keepBlobs = restic.NewBlobSet(restic.BlobHandle{Type: restic.DataBlob, ID: id})
packs := findPacksForBlobs(t, repo, keepBlobs)
rtest.Assert(t, len(packs) == 3, "unexpected number of copies: %v", len(packs))
}