forked from TrueCloudLab/restic
Merge pull request #3099 from MichaelEischer/check-less-memory
This commit is contained in:
commit
5f3b802ee7
6 changed files with 60 additions and 56 deletions
6
changelog/unreleased/pull-3099
Normal file
6
changelog/unreleased/pull-3099
Normal file
|
@ -0,0 +1,6 @@
|
|||
Enhancement: Reduce memory usage of check command
|
||||
|
||||
The check command now requires less memory if it is run with the
|
||||
`--check-unused` option.
|
||||
|
||||
https://github.com/restic/restic/pull/3099
|
|
@ -193,7 +193,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|||
}
|
||||
}
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, opts.CheckUnused)
|
||||
|
||||
Verbosef("load indexes\n")
|
||||
hints, errs := chkr.LoadIndex(gopts.ctx)
|
||||
|
@ -255,7 +255,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|||
}
|
||||
|
||||
if opts.CheckUnused {
|
||||
for _, id := range chkr.UnusedBlobs() {
|
||||
for _, id := range chkr.UnusedBlobs(gopts.ctx) {
|
||||
Verbosef("unused blob %v\n", id)
|
||||
errorsFound = true
|
||||
}
|
||||
|
|
|
@ -25,31 +25,25 @@ type Checker struct {
|
|||
packs map[restic.ID]int64
|
||||
blobRefs struct {
|
||||
sync.Mutex
|
||||
// see flags below
|
||||
M map[restic.BlobHandle]blobStatus
|
||||
M restic.BlobSet
|
||||
}
|
||||
trackUnused bool
|
||||
|
||||
masterIndex *repository.MasterIndex
|
||||
|
||||
repo restic.Repository
|
||||
}
|
||||
|
||||
type blobStatus uint8
|
||||
|
||||
const (
|
||||
blobStatusExists blobStatus = 1 << iota
|
||||
blobStatusReferenced
|
||||
)
|
||||
|
||||
// New returns a new checker which runs on repo.
|
||||
func New(repo restic.Repository) *Checker {
|
||||
func New(repo restic.Repository, trackUnused bool) *Checker {
|
||||
c := &Checker{
|
||||
packs: make(map[restic.ID]int64),
|
||||
masterIndex: repository.NewMasterIndex(),
|
||||
repo: repo,
|
||||
trackUnused: trackUnused,
|
||||
}
|
||||
|
||||
c.blobRefs.M = make(map[restic.BlobHandle]blobStatus)
|
||||
c.blobRefs.M = restic.NewBlobSet()
|
||||
|
||||
return c
|
||||
}
|
||||
|
@ -162,8 +156,6 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
|||
debug.Log("process blobs")
|
||||
cnt := 0
|
||||
for blob := range res.Index.Each(wgCtx) {
|
||||
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
||||
c.blobRefs.M[h] = blobStatusExists
|
||||
cnt++
|
||||
|
||||
if _, ok := packToIndex[blob.PackID]; !ok {
|
||||
|
@ -529,9 +521,11 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
|
|||
// even when a file references a tree blob
|
||||
c.blobRefs.Lock()
|
||||
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
|
||||
status := c.blobRefs.M[h]
|
||||
blobReferenced := c.blobRefs.M.Has(h)
|
||||
// noop if already referenced
|
||||
c.blobRefs.M.Insert(h)
|
||||
c.blobRefs.Unlock()
|
||||
if (status & blobStatusReferenced) != 0 {
|
||||
if blobReferenced {
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -550,10 +544,6 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
|
|||
case loadCh <- nextTreeID:
|
||||
outstandingLoadTreeJobs++
|
||||
loadCh = nil
|
||||
c.blobRefs.Lock()
|
||||
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
|
||||
c.blobRefs.M[h] |= blobStatusReferenced
|
||||
c.blobRefs.Unlock()
|
||||
|
||||
case j, ok := <-inCh:
|
||||
if !ok {
|
||||
|
@ -638,8 +628,6 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
|
|||
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
||||
debug.Log("checking tree %v", id)
|
||||
|
||||
var blobs []restic.ID
|
||||
|
||||
for _, node := range tree.Nodes {
|
||||
switch node.Type {
|
||||
case "file":
|
||||
|
@ -653,13 +641,28 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
|||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
|
||||
continue
|
||||
}
|
||||
blobs = append(blobs, blobID)
|
||||
blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob)
|
||||
if !found {
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d size could not be found", node.Name, b)})
|
||||
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %v not found in index", node.Name, blobID)})
|
||||
}
|
||||
size += uint64(blobSize)
|
||||
}
|
||||
|
||||
if c.trackUnused {
|
||||
// loop a second time to keep the locked section as short as possible
|
||||
c.blobRefs.Lock()
|
||||
for _, blobID := range node.Content {
|
||||
if blobID.IsNull() {
|
||||
continue
|
||||
}
|
||||
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
|
||||
c.blobRefs.M.Insert(h)
|
||||
debug.Log("blob %v is referenced", blobID)
|
||||
}
|
||||
c.blobRefs.Unlock()
|
||||
}
|
||||
|
||||
case "dir":
|
||||
if node.Subtree == nil {
|
||||
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
|
||||
|
@ -683,31 +686,26 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
|
|||
}
|
||||
}
|
||||
|
||||
for _, blobID := range blobs {
|
||||
c.blobRefs.Lock()
|
||||
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
|
||||
if (c.blobRefs.M[h] & blobStatusExists) == 0 {
|
||||
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
|
||||
errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")})
|
||||
}
|
||||
c.blobRefs.M[h] |= blobStatusReferenced
|
||||
debug.Log("blob %v is referenced", blobID)
|
||||
c.blobRefs.Unlock()
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
// UnusedBlobs returns all blobs that have never been referenced.
|
||||
func (c *Checker) UnusedBlobs() (blobs restic.BlobHandles) {
|
||||
func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
|
||||
if !c.trackUnused {
|
||||
panic("only works when tracking blob references")
|
||||
}
|
||||
c.blobRefs.Lock()
|
||||
defer c.blobRefs.Unlock()
|
||||
|
||||
debug.Log("checking %d blobs", len(c.blobRefs.M))
|
||||
for id, flags := range c.blobRefs.M {
|
||||
if (flags & blobStatusReferenced) == 0 {
|
||||
debug.Log("blob %v not referenced", id)
|
||||
blobs = append(blobs, id)
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
for blob := range c.repo.Index().Each(ctx) {
|
||||
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
||||
if !c.blobRefs.M.Has(h) {
|
||||
debug.Log("blob %v not referenced", h)
|
||||
blobs = append(blobs, h)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ func TestCheckRepo(t *testing.T) {
|
|||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, false)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
|
@ -87,7 +87,7 @@ func TestMissingPack(t *testing.T) {
|
|||
}
|
||||
test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, false)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
|
@ -123,7 +123,7 @@ func TestUnreferencedPack(t *testing.T) {
|
|||
}
|
||||
test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, false)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
|
@ -168,7 +168,7 @@ func TestUnreferencedBlobs(t *testing.T) {
|
|||
|
||||
sort.Sort(unusedBlobsBySnapshot)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, true)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
|
@ -181,7 +181,7 @@ func TestUnreferencedBlobs(t *testing.T) {
|
|||
test.OKs(t, checkPacks(chkr))
|
||||
test.OKs(t, checkStruct(chkr))
|
||||
|
||||
blobs := chkr.UnusedBlobs()
|
||||
blobs := chkr.UnusedBlobs(context.TODO())
|
||||
sort.Sort(blobs)
|
||||
|
||||
test.Equals(t, unusedBlobsBySnapshot, blobs)
|
||||
|
@ -241,7 +241,7 @@ func TestModifiedIndex(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, false)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) == 0 {
|
||||
t.Fatalf("expected errors not found")
|
||||
|
@ -264,7 +264,7 @@ func TestDuplicatePacksInIndex(t *testing.T) {
|
|||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, false)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(hints) == 0 {
|
||||
t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
|
||||
|
@ -336,7 +336,7 @@ func TestCheckerModifiedData(t *testing.T) {
|
|||
checkRepo := repository.New(beError)
|
||||
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
|
||||
|
||||
chkr := checker.New(checkRepo)
|
||||
chkr := checker.New(checkRepo, false)
|
||||
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
|
@ -398,7 +398,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) {
|
|||
loadedTrees: restic.NewIDSet(),
|
||||
}
|
||||
|
||||
chkr := checker.New(checkRepo)
|
||||
chkr := checker.New(checkRepo, false)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
|
||||
|
@ -509,7 +509,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
|
|||
UnblockChannel: make(chan struct{}),
|
||||
}
|
||||
|
||||
chkr := checker.New(delayRepo)
|
||||
chkr := checker.New(delayRepo, false)
|
||||
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
|
@ -544,7 +544,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun
|
|||
|
||||
repo := repository.TestOpenLocal(t, repodir)
|
||||
|
||||
chkr := checker.New(repo)
|
||||
chkr := checker.New(repo, false)
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) > 0 {
|
||||
defer cleanup()
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
|
||||
// TestCheckRepo runs the checker on repo.
|
||||
func TestCheckRepo(t testing.TB, repo restic.Repository) {
|
||||
chkr := New(repo)
|
||||
chkr := New(repo, true)
|
||||
|
||||
hints, errs := chkr.LoadIndex(context.TODO())
|
||||
if len(errs) != 0 {
|
||||
|
@ -37,7 +37,7 @@ func TestCheckRepo(t testing.TB, repo restic.Repository) {
|
|||
}
|
||||
|
||||
// unused blobs
|
||||
blobs := chkr.UnusedBlobs()
|
||||
blobs := chkr.UnusedBlobs(context.TODO())
|
||||
if len(blobs) > 0 {
|
||||
t.Errorf("unused blobs found: %v", blobs)
|
||||
}
|
||||
|
|
|
@ -360,7 +360,7 @@ func TestIndexSave(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
checker := checker.New(repo)
|
||||
checker := checker.New(repo, false)
|
||||
hints, errs := checker.LoadIndex(context.TODO())
|
||||
for _, h := range hints {
|
||||
t.Logf("hint: %v\n", h)
|
||||
|
|
Loading…
Reference in a new issue