From 54067431020f275bd68f71cf29ac248f9107cffe Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 10 Apr 2022 11:57:01 +0200 Subject: [PATCH] prune: Automatically repack uncompressed trees for repo v2 Tree packs are cached locally at clients and thus benefit a lot from being compressed. Ensure this be having prune always repack pack files containing uncompressed trees. --- cmd/restic/cmd_prune.go | 17 +++- internal/migrations/compress_repo_v2.go | 102 ------------------------ 2 files changed, 14 insertions(+), 105 deletions(-) delete mode 100644 internal/migrations/compress_repo_v2.go diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index a6a8d0bde..71e1a21af 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -191,6 +191,7 @@ type packInfo struct { usedSize uint64 unusedSize uint64 tpe restic.BlobType + uncompressed bool } type packInfoWithID struct { @@ -299,6 +300,9 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB ip.unusedSize += size ip.unusedBlobs++ } + if !blob.IsCompressed() { + ip.uncompressed = true + } // update indexPack indexPack[blob.PackID] = ip } @@ -318,6 +322,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB } } + repoVersion := repo.Config().Version + // loop over all packs and decide what to do bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed") err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error { @@ -350,6 +356,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB stats.packs.partlyUsed++ } + // repo v2: always repack tree blobs if uncompressed + mustCompress := repoVersion >= 2 && p.tpe == restic.TreeBlob && p.uncompressed + // use a flag that pack must be compressed + p.uncompressed = mustCompress + // decide what to do switch { case p.usedBlobs == 0 && p.duplicateBlobs == 0: @@ -362,7 +373,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB // if this is a data pack and --repack-cacheable-only is set => keep pack! keep(p) - case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob: + case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress: // All blobs in pack are used and not duplicates/mixed => keep pack! keep(p) @@ -447,8 +458,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB case reachedRepackSize: keep(p.packInfo) - case p.duplicateBlobs > 0, p.tpe != restic.DataBlob: - // repacking duplicates/non-data is only limited by repackSize + case p.duplicateBlobs > 0, p.tpe != restic.DataBlob, p.uncompressed: + // repacking duplicates/non-data/uncompressed-trees is only limited by repackSize repack(p.ID, p.packInfo) case reachedUnusedSizeAfter: diff --git a/internal/migrations/compress_repo_v2.go b/internal/migrations/compress_repo_v2.go deleted file mode 100644 index b6986d29e..000000000 --- a/internal/migrations/compress_repo_v2.go +++ /dev/null @@ -1,102 +0,0 @@ -package migrations - -import ( - "context" - "fmt" - - "github.com/restic/restic/internal/repository" - "github.com/restic/restic/internal/restic" -) - -func init() { - register(&CompressRepoV2{}) -} - -type CompressRepoV2 struct{} - -func (*CompressRepoV2) Name() string { - return "compress_all_data" -} - -func (*CompressRepoV2) Desc() string { - return "compress all data in the repo" -} - -func (*CompressRepoV2) Check(ctx context.Context, repo restic.Repository) (bool, error) { - // only do very fast checks on the version here, we don't want the list of - // available migrations to take long to load - if repo.Config().Version < 2 { - return false, nil - } - - return true, nil -} - -// Apply requires that the repository must be already locked exclusively, this -// is done by the caller, so we can just go ahead, rewrite the packs as they -// are, remove the packs and rebuild the index. -func (*CompressRepoV2) Apply(ctx context.Context, repo restic.Repository) error { - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - err := repo.LoadIndex(ctx) - if err != nil { - return fmt.Errorf("index load failed: %w", err) - } - - packsWithUncompressedData := restic.NewIDSet() - keepBlobs := restic.NewBlobSet() - - for blob := range repo.Index().Each(ctx) { - keepBlobs.Insert(blob.BlobHandle) - - if blob.UncompressedLength != 0 { - // blob is already compressed, ignore - continue - - } - - // remember pack ID - packsWithUncompressedData.Insert(blob.PackID) - } - - if len(packsWithUncompressedData) == 0 { - // nothing to do - return nil - } - - // don't upload new indexes until we're done - repo.(*repository.Repository).DisableAutoIndexUpdate() - obsoletePacks, err := repository.Repack(ctx, repo, repo, packsWithUncompressedData, keepBlobs, nil) - if err != nil { - return fmt.Errorf("repack failed: %w", err) - } - - if len(obsoletePacks) != len(packsWithUncompressedData) { - return fmt.Errorf("Repack() return other packs, %d != %d", len(obsoletePacks), len(packsWithUncompressedData)) - } - - // build new index - idx := repo.Index().(*repository.MasterIndex) - obsoleteIndexes, err := idx.Save(ctx, repo, obsoletePacks, nil, nil) - if err != nil { - return fmt.Errorf("saving new index failed: %w", err) - } - - // remove data - for id := range obsoleteIndexes { - err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.IndexFile}) - if err != nil { - return fmt.Errorf("remove file failed: %w", err) - } - } - - for id := range obsoletePacks { - err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.PackFile}) - if err != nil { - return fmt.Errorf("remove file failed: %w", err) - } - } - - return nil -}