Merge pull request #3099 from MichaelEischer/check-less-memory

This commit is contained in:
Alexander Neumann 2020-11-16 10:11:25 +01:00 committed by GitHub
commit 5f3b802ee7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 56 deletions

View file

@ -0,0 +1,6 @@
Enhancement: Reduce memory usage of check command
The check command now requires less memory if it is run with the
`--check-unused` option.
https://github.com/restic/restic/pull/3099

View file

@ -193,7 +193,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
}
}
chkr := checker.New(repo)
chkr := checker.New(repo, opts.CheckUnused)
Verbosef("load indexes\n")
hints, errs := chkr.LoadIndex(gopts.ctx)
@ -255,7 +255,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
}
if opts.CheckUnused {
for _, id := range chkr.UnusedBlobs() {
for _, id := range chkr.UnusedBlobs(gopts.ctx) {
Verbosef("unused blob %v\n", id)
errorsFound = true
}

View file

@ -25,31 +25,25 @@ type Checker struct {
packs map[restic.ID]int64
blobRefs struct {
sync.Mutex
// see flags below
M map[restic.BlobHandle]blobStatus
M restic.BlobSet
}
trackUnused bool
masterIndex *repository.MasterIndex
repo restic.Repository
}
type blobStatus uint8
const (
blobStatusExists blobStatus = 1 << iota
blobStatusReferenced
)
// New returns a new checker which runs on repo.
func New(repo restic.Repository) *Checker {
func New(repo restic.Repository, trackUnused bool) *Checker {
c := &Checker{
packs: make(map[restic.ID]int64),
masterIndex: repository.NewMasterIndex(),
repo: repo,
trackUnused: trackUnused,
}
c.blobRefs.M = make(map[restic.BlobHandle]blobStatus)
c.blobRefs.M = restic.NewBlobSet()
return c
}
@ -162,8 +156,6 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
debug.Log("process blobs")
cnt := 0
for blob := range res.Index.Each(wgCtx) {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
c.blobRefs.M[h] = blobStatusExists
cnt++
if _, ok := packToIndex[blob.PackID]; !ok {
@ -529,9 +521,11 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
// even when a file references a tree blob
c.blobRefs.Lock()
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
status := c.blobRefs.M[h]
blobReferenced := c.blobRefs.M.Has(h)
// noop if already referenced
c.blobRefs.M.Insert(h)
c.blobRefs.Unlock()
if (status & blobStatusReferenced) != 0 {
if blobReferenced {
continue
}
@ -550,10 +544,6 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha
case loadCh <- nextTreeID:
outstandingLoadTreeJobs++
loadCh = nil
c.blobRefs.Lock()
h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob}
c.blobRefs.M[h] |= blobStatusReferenced
c.blobRefs.Unlock()
case j, ok := <-inCh:
if !ok {
@ -638,8 +628,6 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
debug.Log("checking tree %v", id)
var blobs []restic.ID
for _, node := range tree.Nodes {
switch node.Type {
case "file":
@ -653,13 +641,28 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
continue
}
blobs = append(blobs, blobID)
blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob)
if !found {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d size could not be found", node.Name, b)})
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %v not found in index", node.Name, blobID)})
}
size += uint64(blobSize)
}
if c.trackUnused {
// loop a second time to keep the locked section as short as possible
c.blobRefs.Lock()
for _, blobID := range node.Content {
if blobID.IsNull() {
continue
}
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
c.blobRefs.M.Insert(h)
debug.Log("blob %v is referenced", blobID)
}
c.blobRefs.Unlock()
}
case "dir":
if node.Subtree == nil {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
@ -683,31 +686,26 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
}
}
for _, blobID := range blobs {
c.blobRefs.Lock()
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
if (c.blobRefs.M[h] & blobStatusExists) == 0 {
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")})
}
c.blobRefs.M[h] |= blobStatusReferenced
debug.Log("blob %v is referenced", blobID)
c.blobRefs.Unlock()
}
return errs
}
// UnusedBlobs returns all blobs that have never been referenced.
func (c *Checker) UnusedBlobs() (blobs restic.BlobHandles) {
func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
if !c.trackUnused {
panic("only works when tracking blob references")
}
c.blobRefs.Lock()
defer c.blobRefs.Unlock()
debug.Log("checking %d blobs", len(c.blobRefs.M))
for id, flags := range c.blobRefs.M {
if (flags & blobStatusReferenced) == 0 {
debug.Log("blob %v not referenced", id)
blobs = append(blobs, id)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
for blob := range c.repo.Index().Each(ctx) {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !c.blobRefs.M.Has(h) {
debug.Log("blob %v not referenced", h)
blobs = append(blobs, h)
}
}

View file

@ -61,7 +61,7 @@ func TestCheckRepo(t *testing.T) {
repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -87,7 +87,7 @@ func TestMissingPack(t *testing.T) {
}
test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -123,7 +123,7 @@ func TestUnreferencedPack(t *testing.T) {
}
test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -168,7 +168,7 @@ func TestUnreferencedBlobs(t *testing.T) {
sort.Sort(unusedBlobsBySnapshot)
chkr := checker.New(repo)
chkr := checker.New(repo, true)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -181,7 +181,7 @@ func TestUnreferencedBlobs(t *testing.T) {
test.OKs(t, checkPacks(chkr))
test.OKs(t, checkStruct(chkr))
blobs := chkr.UnusedBlobs()
blobs := chkr.UnusedBlobs(context.TODO())
sort.Sort(blobs)
test.Equals(t, unusedBlobsBySnapshot, blobs)
@ -241,7 +241,7 @@ func TestModifiedIndex(t *testing.T) {
t.Fatal(err)
}
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) == 0 {
t.Fatalf("expected errors not found")
@ -264,7 +264,7 @@ func TestDuplicatePacksInIndex(t *testing.T) {
repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(hints) == 0 {
t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
@ -336,7 +336,7 @@ func TestCheckerModifiedData(t *testing.T) {
checkRepo := repository.New(beError)
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
chkr := checker.New(checkRepo)
chkr := checker.New(checkRepo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
@ -398,7 +398,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) {
loadedTrees: restic.NewIDSet(),
}
chkr := checker.New(checkRepo)
chkr := checker.New(checkRepo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -509,7 +509,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
UnblockChannel: make(chan struct{}),
}
chkr := checker.New(delayRepo)
chkr := checker.New(delayRepo, false)
go func() {
<-ctx.Done()
@ -544,7 +544,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun
repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
defer cleanup()

View file

@ -9,7 +9,7 @@ import (
// TestCheckRepo runs the checker on repo.
func TestCheckRepo(t testing.TB, repo restic.Repository) {
chkr := New(repo)
chkr := New(repo, true)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) != 0 {
@ -37,7 +37,7 @@ func TestCheckRepo(t testing.TB, repo restic.Repository) {
}
// unused blobs
blobs := chkr.UnusedBlobs()
blobs := chkr.UnusedBlobs(context.TODO())
if len(blobs) > 0 {
t.Errorf("unused blobs found: %v", blobs)
}

View file

@ -360,7 +360,7 @@ func TestIndexSave(t *testing.T) {
}
}
checker := checker.New(repo)
checker := checker.New(repo, false)
hints, errs := checker.LoadIndex(context.TODO())
for _, h := range hints {
t.Logf("hint: %v\n", h)