prune: Automatically repack uncompressed trees for repo v2

Tree packs are cached locally at clients and thus benefit a lot from
being compressed. Ensure this be having prune always repack pack files
containing uncompressed trees.
This commit is contained in:
Michael Eischer 2022-04-10 11:57:01 +02:00
parent c8c0d659ec
commit 5406743102
2 changed files with 14 additions and 105 deletions

View file

@ -191,6 +191,7 @@ type packInfo struct {
usedSize uint64 usedSize uint64
unusedSize uint64 unusedSize uint64
tpe restic.BlobType tpe restic.BlobType
uncompressed bool
} }
type packInfoWithID struct { type packInfoWithID struct {
@ -299,6 +300,9 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
ip.unusedSize += size ip.unusedSize += size
ip.unusedBlobs++ ip.unusedBlobs++
} }
if !blob.IsCompressed() {
ip.uncompressed = true
}
// update indexPack // update indexPack
indexPack[blob.PackID] = ip indexPack[blob.PackID] = ip
} }
@ -318,6 +322,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
} }
} }
repoVersion := repo.Config().Version
// loop over all packs and decide what to do // loop over all packs and decide what to do
bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed") bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed")
err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error { err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
@ -350,6 +356,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
stats.packs.partlyUsed++ stats.packs.partlyUsed++
} }
// repo v2: always repack tree blobs if uncompressed
mustCompress := repoVersion >= 2 && p.tpe == restic.TreeBlob && p.uncompressed
// use a flag that pack must be compressed
p.uncompressed = mustCompress
// decide what to do // decide what to do
switch { switch {
case p.usedBlobs == 0 && p.duplicateBlobs == 0: case p.usedBlobs == 0 && p.duplicateBlobs == 0:
@ -362,7 +373,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
// if this is a data pack and --repack-cacheable-only is set => keep pack! // if this is a data pack and --repack-cacheable-only is set => keep pack!
keep(p) keep(p)
case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob: case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress:
// All blobs in pack are used and not duplicates/mixed => keep pack! // All blobs in pack are used and not duplicates/mixed => keep pack!
keep(p) keep(p)
@ -447,8 +458,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
case reachedRepackSize: case reachedRepackSize:
keep(p.packInfo) keep(p.packInfo)
case p.duplicateBlobs > 0, p.tpe != restic.DataBlob: case p.duplicateBlobs > 0, p.tpe != restic.DataBlob, p.uncompressed:
// repacking duplicates/non-data is only limited by repackSize // repacking duplicates/non-data/uncompressed-trees is only limited by repackSize
repack(p.ID, p.packInfo) repack(p.ID, p.packInfo)
case reachedUnusedSizeAfter: case reachedUnusedSizeAfter:

View file

@ -1,102 +0,0 @@
package migrations
import (
"context"
"fmt"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
)
func init() {
register(&CompressRepoV2{})
}
type CompressRepoV2 struct{}
func (*CompressRepoV2) Name() string {
return "compress_all_data"
}
func (*CompressRepoV2) Desc() string {
return "compress all data in the repo"
}
func (*CompressRepoV2) Check(ctx context.Context, repo restic.Repository) (bool, error) {
// only do very fast checks on the version here, we don't want the list of
// available migrations to take long to load
if repo.Config().Version < 2 {
return false, nil
}
return true, nil
}
// Apply requires that the repository must be already locked exclusively, this
// is done by the caller, so we can just go ahead, rewrite the packs as they
// are, remove the packs and rebuild the index.
func (*CompressRepoV2) Apply(ctx context.Context, repo restic.Repository) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
err := repo.LoadIndex(ctx)
if err != nil {
return fmt.Errorf("index load failed: %w", err)
}
packsWithUncompressedData := restic.NewIDSet()
keepBlobs := restic.NewBlobSet()
for blob := range repo.Index().Each(ctx) {
keepBlobs.Insert(blob.BlobHandle)
if blob.UncompressedLength != 0 {
// blob is already compressed, ignore
continue
}
// remember pack ID
packsWithUncompressedData.Insert(blob.PackID)
}
if len(packsWithUncompressedData) == 0 {
// nothing to do
return nil
}
// don't upload new indexes until we're done
repo.(*repository.Repository).DisableAutoIndexUpdate()
obsoletePacks, err := repository.Repack(ctx, repo, repo, packsWithUncompressedData, keepBlobs, nil)
if err != nil {
return fmt.Errorf("repack failed: %w", err)
}
if len(obsoletePacks) != len(packsWithUncompressedData) {
return fmt.Errorf("Repack() return other packs, %d != %d", len(obsoletePacks), len(packsWithUncompressedData))
}
// build new index
idx := repo.Index().(*repository.MasterIndex)
obsoleteIndexes, err := idx.Save(ctx, repo, obsoletePacks, nil, nil)
if err != nil {
return fmt.Errorf("saving new index failed: %w", err)
}
// remove data
for id := range obsoleteIndexes {
err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.IndexFile})
if err != nil {
return fmt.Errorf("remove file failed: %w", err)
}
}
for id := range obsoletePacks {
err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.PackFile})
if err != nil {
return fmt.Errorf("remove file failed: %w", err)
}
}
return nil
}