forked from TrueCloudLab/restic
check: Only track data blobs when unused blobs should be reported
This improves the memory usage of check a lot as it now only has to track tree blobs when run using the default parameters.
This commit is contained in:
parent
6da66c15d8
commit
1f43cac12d
5 changed files with 34 additions and 25 deletions
|
@ -193,7 +193,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, opts.CheckUnused)
|
||||||
|
|
||||||
Verbosef("load indexes\n")
|
Verbosef("load indexes\n")
|
||||||
hints, errs := chkr.LoadIndex(gopts.ctx)
|
hints, errs := chkr.LoadIndex(gopts.ctx)
|
||||||
|
|
|
@ -27,6 +27,7 @@ type Checker struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
M restic.BlobSet
|
M restic.BlobSet
|
||||||
}
|
}
|
||||||
|
trackUnused bool
|
||||||
|
|
||||||
masterIndex *repository.MasterIndex
|
masterIndex *repository.MasterIndex
|
||||||
|
|
||||||
|
@ -34,11 +35,12 @@ type Checker struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new checker which runs on repo.
|
// New returns a new checker which runs on repo.
|
||||||
func New(repo restic.Repository) *Checker {
|
func New(repo restic.Repository, trackUnused bool) *Checker {
|
||||||
c := &Checker{
|
c := &Checker{
|
||||||
packs: make(map[restic.ID]int64),
|
packs: make(map[restic.ID]int64),
|
||||||
masterIndex: repository.NewMasterIndex(),
|
masterIndex: repository.NewMasterIndex(),
|
||||||
repo: repo,
|
repo: repo,
|
||||||
|
trackUnused: trackUnused,
|
||||||
}
|
}
|
||||||
|
|
||||||
c.blobRefs.M = restic.NewBlobSet()
|
c.blobRefs.M = restic.NewBlobSet()
|
||||||
|
@ -626,8 +628,6 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
|
||||||
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||||
debug.Log("checking tree %v", id)
|
debug.Log("checking tree %v", id)
|
||||||
|
|
||||||
var blobs []restic.ID
|
|
||||||
|
|
||||||
for _, node := range tree.Nodes {
|
for _, node := range tree.Nodes {
|
||||||
switch node.Type {
|
switch node.Type {
|
||||||
case "file":
|
case "file":
|
||||||
|
@ -641,7 +641,6 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
|
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
blobs = append(blobs, blobID)
|
|
||||||
blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob)
|
blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob)
|
||||||
if !found {
|
if !found {
|
||||||
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
|
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
|
||||||
|
@ -649,6 +648,21 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||||
}
|
}
|
||||||
size += uint64(blobSize)
|
size += uint64(blobSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.trackUnused {
|
||||||
|
// loop a second time to keep the locked section as short as possible
|
||||||
|
c.blobRefs.Lock()
|
||||||
|
for _, blobID := range node.Content {
|
||||||
|
if blobID.IsNull() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
|
||||||
|
c.blobRefs.M.Insert(h)
|
||||||
|
debug.Log("blob %v is referenced", blobID)
|
||||||
|
}
|
||||||
|
c.blobRefs.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
case "dir":
|
case "dir":
|
||||||
if node.Subtree == nil {
|
if node.Subtree == nil {
|
||||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
|
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
|
||||||
|
@ -672,19 +686,14 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, blobID := range blobs {
|
|
||||||
c.blobRefs.Lock()
|
|
||||||
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
|
|
||||||
c.blobRefs.M.Insert(h)
|
|
||||||
debug.Log("blob %v is referenced", blobID)
|
|
||||||
c.blobRefs.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
return errs
|
return errs
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnusedBlobs returns all blobs that have never been referenced.
|
// UnusedBlobs returns all blobs that have never been referenced.
|
||||||
func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
|
func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
|
||||||
|
if !c.trackUnused {
|
||||||
|
panic("only works when tracking blob references")
|
||||||
|
}
|
||||||
c.blobRefs.Lock()
|
c.blobRefs.Lock()
|
||||||
defer c.blobRefs.Unlock()
|
defer c.blobRefs.Unlock()
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,7 @@ func TestCheckRepo(t *testing.T) {
|
||||||
|
|
||||||
repo := repository.TestOpenLocal(t, repodir)
|
repo := repository.TestOpenLocal(t, repodir)
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, false)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) > 0 {
|
if len(errs) > 0 {
|
||||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||||
|
@ -87,7 +87,7 @@ func TestMissingPack(t *testing.T) {
|
||||||
}
|
}
|
||||||
test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))
|
test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, false)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) > 0 {
|
if len(errs) > 0 {
|
||||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||||
|
@ -123,7 +123,7 @@ func TestUnreferencedPack(t *testing.T) {
|
||||||
}
|
}
|
||||||
test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))
|
test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, false)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) > 0 {
|
if len(errs) > 0 {
|
||||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||||
|
@ -168,7 +168,7 @@ func TestUnreferencedBlobs(t *testing.T) {
|
||||||
|
|
||||||
sort.Sort(unusedBlobsBySnapshot)
|
sort.Sort(unusedBlobsBySnapshot)
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, true)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) > 0 {
|
if len(errs) > 0 {
|
||||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||||
|
@ -241,7 +241,7 @@ func TestModifiedIndex(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, false)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) == 0 {
|
if len(errs) == 0 {
|
||||||
t.Fatalf("expected errors not found")
|
t.Fatalf("expected errors not found")
|
||||||
|
@ -264,7 +264,7 @@ func TestDuplicatePacksInIndex(t *testing.T) {
|
||||||
|
|
||||||
repo := repository.TestOpenLocal(t, repodir)
|
repo := repository.TestOpenLocal(t, repodir)
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, false)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(hints) == 0 {
|
if len(hints) == 0 {
|
||||||
t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
|
t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
|
||||||
|
@ -336,7 +336,7 @@ func TestCheckerModifiedData(t *testing.T) {
|
||||||
checkRepo := repository.New(beError)
|
checkRepo := repository.New(beError)
|
||||||
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
|
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
|
||||||
|
|
||||||
chkr := checker.New(checkRepo)
|
chkr := checker.New(checkRepo, false)
|
||||||
|
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) > 0 {
|
if len(errs) > 0 {
|
||||||
|
@ -398,7 +398,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) {
|
||||||
loadedTrees: restic.NewIDSet(),
|
loadedTrees: restic.NewIDSet(),
|
||||||
}
|
}
|
||||||
|
|
||||||
chkr := checker.New(checkRepo)
|
chkr := checker.New(checkRepo, false)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) > 0 {
|
if len(errs) > 0 {
|
||||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||||
|
@ -509,7 +509,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
|
||||||
UnblockChannel: make(chan struct{}),
|
UnblockChannel: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
chkr := checker.New(delayRepo)
|
chkr := checker.New(delayRepo, false)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
<-ctx.Done()
|
<-ctx.Done()
|
||||||
|
@ -544,7 +544,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun
|
||||||
|
|
||||||
repo := repository.TestOpenLocal(t, repodir)
|
repo := repository.TestOpenLocal(t, repodir)
|
||||||
|
|
||||||
chkr := checker.New(repo)
|
chkr := checker.New(repo, false)
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) > 0 {
|
if len(errs) > 0 {
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
|
|
@ -9,7 +9,7 @@ import (
|
||||||
|
|
||||||
// TestCheckRepo runs the checker on repo.
|
// TestCheckRepo runs the checker on repo.
|
||||||
func TestCheckRepo(t testing.TB, repo restic.Repository) {
|
func TestCheckRepo(t testing.TB, repo restic.Repository) {
|
||||||
chkr := New(repo)
|
chkr := New(repo, true)
|
||||||
|
|
||||||
hints, errs := chkr.LoadIndex(context.TODO())
|
hints, errs := chkr.LoadIndex(context.TODO())
|
||||||
if len(errs) != 0 {
|
if len(errs) != 0 {
|
||||||
|
|
|
@ -360,7 +360,7 @@ func TestIndexSave(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
checker := checker.New(repo)
|
checker := checker.New(repo, false)
|
||||||
hints, errs := checker.LoadIndex(context.TODO())
|
hints, errs := checker.LoadIndex(context.TODO())
|
||||||
for _, h := range hints {
|
for _, h := range hints {
|
||||||
t.Logf("hint: %v\n", h)
|
t.Logf("hint: %v\n", h)
|
||||||
|
|
Loading…
Reference in a new issue