Merge pull request #2674 from MichaelEischer/prune-strict-checks

prune: Stricter error checks
This commit is contained in:
Alexander Neumann 2020-08-23 10:30:41 +02:00 committed by GitHub
commit b1b3f1ecb6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 185 additions and 30 deletions

View file

@ -0,0 +1,8 @@
Bugfix: Add stricter prune error checks
Additional checks were added to the prune command in order to improve
resiliency to backend, hardware and/or networking issues. The checks now
detect a few more cases where such outside factors could potentially cause
data loss.
https://github.com/restic/restic/pull/2674

View file

@ -191,10 +191,17 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
return err
}
if len(usedBlobs) > stats.blobs {
return errors.Fatalf("number of used blobs is larger than number of available blobs!\n" +
var missingBlobs []restic.BlobHandle
for h := range usedBlobs {
if _, ok := blobCount[h]; !ok {
missingBlobs = append(missingBlobs, h)
}
}
if len(missingBlobs) > 0 {
return errors.Fatalf("%v not found in the new index\n"+
"Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+
"Please report this error (along with the output of the 'prune' run) at\n"+
"https://github.com/restic/restic/issues/new")
"https://github.com/restic/restic/issues/new/choose", missingBlobs)
}
Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",

View file

@ -92,7 +92,10 @@ func rebuildIndex(ctx context.Context, repo restic.Repository, ignorePacks resti
Verbosef("saved new indexes as %v\n", ids)
Verbosef("remove %d old index files\n", len(supersedes))
DeleteFiles(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile)
err = DeleteFilesChecked(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile)
if err != nil {
return errors.Fatalf("unable to remove an old index: %v\n", err)
}
return nil
}

View file

@ -44,6 +44,8 @@ var version = "0.9.6-dev (compiled manually)"
// TimeFormat is the format used for all timestamps printed by restic.
const TimeFormat = "2006-01-02 15:04:05"
type backendWrapper func(r restic.Backend) (restic.Backend, error)
// GlobalOptions hold all global options for restic.
type GlobalOptions struct {
Repo string
@ -68,6 +70,8 @@ type GlobalOptions struct {
stdout io.Writer
stderr io.Writer
backendTestHook backendWrapper
// verbosity is set as follows:
// 0 means: don't print any messages except errors, this is used when --quiet is specified
// 1 is the default: print essential messages
@ -395,6 +399,14 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
Warnf("%v returned error, retrying after %v: %v\n", msg, d, err)
})
// wrap backend if a test specified a hook
if opts.backendTestHook != nil {
be, err = opts.backendTestHook(be)
if err != nil {
return nil, err
}
}
s := repository.New(be)
passwordTriesLeft := 1

View file

@ -359,6 +359,28 @@ func TestBackupNonExistingFile(t *testing.T) {
testRunBackup(t, "", dirs, opts, env.gopts)
}
func removeDataPacksExcept(gopts GlobalOptions, t *testing.T, keep restic.IDSet) {
r, err := OpenRepository(gopts)
rtest.OK(t, err)
// Get all tree packs
rtest.OK(t, r.LoadIndex(gopts.ctx))
treePacks := restic.NewIDSet()
for _, idx := range r.Index().(*repository.MasterIndex).All() {
for _, id := range idx.TreePacks() {
treePacks.Insert(id)
}
}
// remove all packs containing data blobs
rtest.OK(t, r.List(gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error {
if treePacks.Has(id) || keep.Has(id) {
return nil
}
return r.Backend().Remove(gopts.ctx, restic.Handle{Type: restic.PackFile, Name: id.String()})
}))
}
func TestBackupSelfHealing(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
@ -374,25 +396,8 @@ func TestBackupSelfHealing(t *testing.T) {
testRunBackup(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts)
testRunCheck(t, env.gopts)
r, err := OpenRepository(env.gopts)
rtest.OK(t, err)
// Get all tree packs
rtest.OK(t, r.LoadIndex(env.gopts.ctx))
treePacks := restic.NewIDSet()
for _, idx := range r.Index().(*repository.MasterIndex).All() {
for _, id := range idx.TreePacks() {
treePacks.Insert(id)
}
}
// remove all packs containing data blobs
rtest.OK(t, r.List(env.gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error {
if treePacks.Has(id) {
return nil
}
return r.Backend().Remove(env.gopts.ctx, restic.Handle{Type: restic.PackFile, Name: id.String()})
}))
// remove all data packs
removeDataPacksExcept(env.gopts, t, restic.NewIDSet())
testRunRebuildIndex(t, env.gopts)
// now the repo is also missing the data blob in the index; check should report this
@ -400,7 +405,7 @@ func TestBackupSelfHealing(t *testing.T) {
"check should have reported an error")
// second backup should report an error but "heal" this situation
err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts)
err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts)
rtest.Assert(t, err != nil,
"backup should have reported an error")
testRunCheck(t, env.gopts)
@ -1132,6 +1137,37 @@ func TestRebuildIndexAlwaysFull(t *testing.T) {
TestRebuildIndex(t)
}
type appendOnlyBackend struct {
restic.Backend
}
// called via repo.Backend().Remove()
func (b *appendOnlyBackend) Remove(ctx context.Context, h restic.Handle) error {
return errors.Errorf("Failed to remove %v", h)
}
func TestRebuildIndexFailsOnAppendOnly(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
datafile := filepath.Join("..", "..", "internal", "checker", "testdata", "duplicate-packs-in-index-test-repo.tar.gz")
rtest.SetupTarTestFixture(t, env.base, datafile)
globalOptions.stdout = ioutil.Discard
defer func() {
globalOptions.stdout = os.Stdout
}()
env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) {
return &appendOnlyBackend{r}, nil
}
err := runRebuildIndex(env.gopts)
if err == nil {
t.Error("expected rebuildIndex to fail")
}
t.Log(err)
}
func TestCheckRestoreNoLock(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
@ -1195,6 +1231,58 @@ func TestPrune(t *testing.T) {
testRunCheck(t, env.gopts)
}
func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet {
r, err := OpenRepository(gopts)
rtest.OK(t, err)
packs := restic.NewIDSet()
rtest.OK(t, r.List(gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error {
packs.Insert(id)
return nil
}))
return packs
}
func TestPruneWithDamagedRepository(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
datafile := filepath.Join("testdata", "backup-data.tar.gz")
testRunInit(t, env.gopts)
rtest.SetupTarTestFixture(t, env.testdata, datafile)
opts := BackupOptions{}
// create and delete snapshot to create unused blobs
testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts)
firstSnapshot := testRunList(t, "snapshots", env.gopts)
rtest.Assert(t, len(firstSnapshot) == 1,
"expected one snapshot, got %v", firstSnapshot)
testRunForget(t, env.gopts, firstSnapshot[0].String())
oldPacks := listPacks(env.gopts, t)
// create new snapshot, but lose all data
testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts)
snapshotIDs := testRunList(t, "snapshots", env.gopts)
removeDataPacksExcept(env.gopts, t, oldPacks)
rtest.Assert(t, len(snapshotIDs) == 1,
"expected one snapshot, got %v", snapshotIDs)
// prune should fail
err := runPrune(env.gopts)
if err == nil {
t.Fatalf("expected prune to fail")
}
if !strings.Contains(err.Error(), "blobs seem to be missing") {
t.Fatalf("did not find hint for missing blobs")
}
t.Log(err)
}
func TestHardLink(t *testing.T) {
// this test assumes a test set with a single directory containing hard linked files
env, cleanup := withTestEnvironment(t)

View file

@ -2,8 +2,6 @@ package repository
import (
"context"
"fmt"
"os"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
@ -59,8 +57,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee
debug.Log(" process blob %v", h)
buf = buf[:]
if uint(len(buf)) < entry.Length {
if uint(cap(buf)) < entry.Length {
buf = make([]byte, entry.Length)
}
buf = buf[:entry.Length]
@ -85,7 +82,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee
if !id.Equal(entry.ID) {
debug.Log("read blob %v/%v from %v: wrong data returned, hash is %v",
h.Type, h.ID, tempfile.Name(), id)
fmt.Fprintf(os.Stderr, "read blob %v from %v: wrong data returned, hash is %v",
return nil, errors.Errorf("read blob %v from %v: wrong data returned, hash is %v",
h, tempfile.Name(), id)
}

View file

@ -54,6 +54,24 @@ func createRandomBlobs(t testing.TB, repo restic.Repository, blobs int, pData fl
}
}
func createRandomWrongBlob(t testing.TB, repo restic.Repository) {
length := randomSize(10*1024, 1024*1024) // 10KiB to 1MiB of data
buf := make([]byte, length)
rand.Read(buf)
id := restic.Hash(buf)
// invert first data byte
buf[0] ^= 0xff
_, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, id, false)
if err != nil {
t.Fatalf("SaveFrom() error %v", err)
}
if err := repo.Flush(context.Background()); err != nil {
t.Fatalf("repo.Flush() returned error %v", err)
}
}
// selectBlobs splits the list of all blobs randomly into two lists. A blob
// will be contained in the firstone ith probability p.
func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) {
@ -239,3 +257,25 @@ func TestRepack(t *testing.T) {
}
}
}
func TestRepackWrongBlob(t *testing.T) {
repo, cleanup := repository.TestRepository(t)
defer cleanup()
seed := rand.Int63()
rand.Seed(seed)
t.Logf("rand seed is %v", seed)
createRandomBlobs(t, repo, 5, 0.7)
createRandomWrongBlob(t, repo)
// just keep all blobs, but also rewrite every pack
_, keepBlobs := selectBlobs(t, repo, 0)
rewritePacks := findPacksForBlobs(t, repo, keepBlobs)
_, err := repository.Repack(context.TODO(), repo, rewritePacks, keepBlobs, nil)
if err == nil {
t.Fatal("expected repack to fail but got no error")
}
t.Log(err)
}