Merge pull request #2599 from MichaelEischer/tweak-mem-usage
Reduce memory usage when searching for used blobs
This commit is contained in:
commit
b7b479b668
6 changed files with 54 additions and 30 deletions
6
changelog/unreleased/pull-2599
Normal file
6
changelog/unreleased/pull-2599
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
Enhancement: Slightly reduce memory usage of prune and stats commands
|
||||||
|
|
||||||
|
The prune and the stats command kept directory identifiers in memory twice
|
||||||
|
while searching for used blobs.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/pull/2599
|
|
@ -189,14 +189,13 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
|
||||||
Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots)
|
Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots)
|
||||||
|
|
||||||
usedBlobs := restic.NewBlobSet()
|
usedBlobs := restic.NewBlobSet()
|
||||||
seenBlobs := restic.NewBlobSet()
|
|
||||||
|
|
||||||
bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots")
|
bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots")
|
||||||
bar.Start()
|
bar.Start()
|
||||||
for _, sn := range snapshots {
|
for _, sn := range snapshots {
|
||||||
debug.Log("process snapshot %v", sn.ID())
|
debug.Log("process snapshot %v", sn.ID())
|
||||||
|
|
||||||
err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs, seenBlobs)
|
err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if repo.Backend().IsNotExist(err) {
|
if repo.Backend().IsNotExist(err) {
|
||||||
return errors.Fatal("unable to load a tree from the repo: " + err.Error())
|
return errors.Fatal("unable to load a tree from the repo: " + err.Error())
|
||||||
|
|
|
@ -93,7 +93,6 @@ func runStats(gopts GlobalOptions, args []string) error {
|
||||||
uniqueInodes: make(map[uint64]struct{}),
|
uniqueInodes: make(map[uint64]struct{}),
|
||||||
fileBlobs: make(map[string]restic.IDSet),
|
fileBlobs: make(map[string]restic.IDSet),
|
||||||
blobs: restic.NewBlobSet(),
|
blobs: restic.NewBlobSet(),
|
||||||
blobsSeen: restic.NewBlobSet(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if snapshotIDString != "" {
|
if snapshotIDString != "" {
|
||||||
|
@ -183,7 +182,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
|
||||||
if countMode == countModeRawData {
|
if countMode == countModeRawData {
|
||||||
// count just the sizes of unique blobs; we don't need to walk the tree
|
// count just the sizes of unique blobs; we don't need to walk the tree
|
||||||
// ourselves in this case, since a nifty function does it for us
|
// ourselves in this case, since a nifty function does it for us
|
||||||
return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs, stats.blobsSeen)
|
return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs)
|
||||||
}
|
}
|
||||||
|
|
||||||
err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, stats))
|
err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, stats))
|
||||||
|
@ -318,9 +317,9 @@ type statsContainer struct {
|
||||||
// blobs that have been seen as a part of the file
|
// blobs that have been seen as a part of the file
|
||||||
fileBlobs map[string]restic.IDSet
|
fileBlobs map[string]restic.IDSet
|
||||||
|
|
||||||
// blobs and blobsSeen are used to count individual
|
// blobs is used to count individual unique blobs,
|
||||||
// unique blobs, independent of references to files
|
// independent of references to files
|
||||||
blobs, blobsSeen restic.BlobSet
|
blobs restic.BlobSet
|
||||||
}
|
}
|
||||||
|
|
||||||
// fileID is a 256-bit hash that distinguishes unique files.
|
// fileID is a 256-bit hash that distinguishes unique files.
|
||||||
|
|
|
@ -2,11 +2,19 @@ package restic
|
||||||
|
|
||||||
import "context"
|
import "context"
|
||||||
|
|
||||||
|
// TreeLoader loads a tree from a repository.
|
||||||
|
type TreeLoader interface {
|
||||||
|
LoadTree(context.Context, ID) (*Tree, error)
|
||||||
|
}
|
||||||
|
|
||||||
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
|
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
|
||||||
// blobs) to the set blobs. The tree blobs in the `seen` BlobSet will not be visited
|
// blobs) to the set blobs. Already seen tree blobs will not be visited again.
|
||||||
// again.
|
func FindUsedBlobs(ctx context.Context, repo TreeLoader, treeID ID, blobs BlobSet) error {
|
||||||
func FindUsedBlobs(ctx context.Context, repo Repository, treeID ID, blobs BlobSet, seen BlobSet) error {
|
h := BlobHandle{ID: treeID, Type: TreeBlob}
|
||||||
blobs.Insert(BlobHandle{ID: treeID, Type: TreeBlob})
|
if blobs.Has(h) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
blobs.Insert(h)
|
||||||
|
|
||||||
tree, err := repo.LoadTree(ctx, treeID)
|
tree, err := repo.LoadTree(ctx, treeID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -20,15 +28,7 @@ func FindUsedBlobs(ctx context.Context, repo Repository, treeID ID, blobs BlobSe
|
||||||
blobs.Insert(BlobHandle{ID: blob, Type: DataBlob})
|
blobs.Insert(BlobHandle{ID: blob, Type: DataBlob})
|
||||||
}
|
}
|
||||||
case "dir":
|
case "dir":
|
||||||
subtreeID := *node.Subtree
|
err := FindUsedBlobs(ctx, repo, *node.Subtree, blobs)
|
||||||
h := BlobHandle{ID: subtreeID, Type: TreeBlob}
|
|
||||||
if seen.Has(h) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
seen.Insert(h)
|
|
||||||
|
|
||||||
err := FindUsedBlobs(ctx, repo, subtreeID, blobs, seen)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/restic/restic/internal/errors"
|
||||||
"github.com/restic/restic/internal/repository"
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
)
|
)
|
||||||
|
@ -93,7 +94,7 @@ func TestFindUsedBlobs(t *testing.T) {
|
||||||
|
|
||||||
for i, sn := range snapshots {
|
for i, sn := range snapshots {
|
||||||
usedBlobs := restic.NewBlobSet()
|
usedBlobs := restic.NewBlobSet()
|
||||||
err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, usedBlobs, restic.NewBlobSet())
|
err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, usedBlobs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("FindUsedBlobs returned error: %v", err)
|
t.Errorf("FindUsedBlobs returned error: %v", err)
|
||||||
continue
|
continue
|
||||||
|
@ -118,6 +119,31 @@ func TestFindUsedBlobs(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ForbiddenRepo struct{}
|
||||||
|
|
||||||
|
func (r ForbiddenRepo) LoadTree(ctx context.Context, id restic.ID) (*restic.Tree, error) {
|
||||||
|
return nil, errors.New("should not be called")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindUsedBlobsSkipsSeenBlobs(t *testing.T) {
|
||||||
|
repo, cleanup := repository.TestRepository(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
snapshot := restic.TestCreateSnapshot(t, repo, findTestTime, findTestDepth, 0)
|
||||||
|
t.Logf("snapshot %v saved, tree %v", snapshot.ID().Str(), snapshot.Tree.Str())
|
||||||
|
|
||||||
|
usedBlobs := restic.NewBlobSet()
|
||||||
|
err := restic.FindUsedBlobs(context.TODO(), repo, *snapshot.Tree, usedBlobs)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FindUsedBlobs returned error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = restic.FindUsedBlobs(context.TODO(), ForbiddenRepo{}, *snapshot.Tree, usedBlobs)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FindUsedBlobs returned error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkFindUsedBlobs(b *testing.B) {
|
func BenchmarkFindUsedBlobs(b *testing.B) {
|
||||||
repo, cleanup := repository.TestRepository(b)
|
repo, cleanup := repository.TestRepository(b)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
@ -127,9 +153,8 @@ func BenchmarkFindUsedBlobs(b *testing.B) {
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
seen := restic.NewBlobSet()
|
|
||||||
blobs := restic.NewBlobSet()
|
blobs := restic.NewBlobSet()
|
||||||
err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, blobs, seen)
|
err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, blobs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.Error(err)
|
b.Error(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,11 +10,6 @@ import (
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TreeLoader loads a tree from a repository.
|
|
||||||
type TreeLoader interface {
|
|
||||||
LoadTree(context.Context, restic.ID) (*restic.Tree, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipNode is returned by WalkFunc when a dir node should not be walked.
|
// SkipNode is returned by WalkFunc when a dir node should not be walked.
|
||||||
var SkipNode = errors.New("skip this node")
|
var SkipNode = errors.New("skip this node")
|
||||||
|
|
||||||
|
@ -38,7 +33,7 @@ type WalkFunc func(parentTreeID restic.ID, path string, node *restic.Node, nodeE
|
||||||
// Walk calls walkFn recursively for each node in root. If walkFn returns an
|
// Walk calls walkFn recursively for each node in root. If walkFn returns an
|
||||||
// error, it is passed up the call stack. The trees in ignoreTrees are not
|
// error, it is passed up the call stack. The trees in ignoreTrees are not
|
||||||
// walked. If walkFn ignores trees, these are added to the set.
|
// walked. If walkFn ignores trees, these are added to the set.
|
||||||
func Walk(ctx context.Context, repo TreeLoader, root restic.ID, ignoreTrees restic.IDSet, walkFn WalkFunc) error {
|
func Walk(ctx context.Context, repo restic.TreeLoader, root restic.ID, ignoreTrees restic.IDSet, walkFn WalkFunc) error {
|
||||||
tree, err := repo.LoadTree(ctx, root)
|
tree, err := repo.LoadTree(ctx, root)
|
||||||
_, err = walkFn(root, "/", nil, err)
|
_, err = walkFn(root, "/", nil, err)
|
||||||
|
|
||||||
|
@ -60,7 +55,7 @@ func Walk(ctx context.Context, repo TreeLoader, root restic.ID, ignoreTrees rest
|
||||||
// walk recursively traverses the tree, ignoring subtrees when the ID of the
|
// walk recursively traverses the tree, ignoring subtrees when the ID of the
|
||||||
// subtree is in ignoreTrees. If err is nil and ignore is true, the subtree ID
|
// subtree is in ignoreTrees. If err is nil and ignore is true, the subtree ID
|
||||||
// will be added to ignoreTrees by walk.
|
// will be added to ignoreTrees by walk.
|
||||||
func walk(ctx context.Context, repo TreeLoader, prefix string, parentTreeID restic.ID, tree *restic.Tree, ignoreTrees restic.IDSet, walkFn WalkFunc) (ignore bool, err error) {
|
func walk(ctx context.Context, repo restic.TreeLoader, prefix string, parentTreeID restic.ID, tree *restic.Tree, ignoreTrees restic.IDSet, walkFn WalkFunc) (ignore bool, err error) {
|
||||||
var allNodesIgnored = true
|
var allNodesIgnored = true
|
||||||
|
|
||||||
if len(tree.Nodes) == 0 {
|
if len(tree.Nodes) == 0 {
|
||||||
|
|
Loading…
Reference in a new issue