forked from TrueCloudLab/restic
prune: Automatically repack uncompressed trees for repo v2
Tree packs are cached locally at clients and thus benefit a lot from being compressed. Ensure this be having prune always repack pack files containing uncompressed trees.
This commit is contained in:
parent
c8c0d659ec
commit
5406743102
2 changed files with 14 additions and 105 deletions
|
@ -191,6 +191,7 @@ type packInfo struct {
|
|||
usedSize uint64
|
||||
unusedSize uint64
|
||||
tpe restic.BlobType
|
||||
uncompressed bool
|
||||
}
|
||||
|
||||
type packInfoWithID struct {
|
||||
|
@ -299,6 +300,9 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
ip.unusedSize += size
|
||||
ip.unusedBlobs++
|
||||
}
|
||||
if !blob.IsCompressed() {
|
||||
ip.uncompressed = true
|
||||
}
|
||||
// update indexPack
|
||||
indexPack[blob.PackID] = ip
|
||||
}
|
||||
|
@ -318,6 +322,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
}
|
||||
}
|
||||
|
||||
repoVersion := repo.Config().Version
|
||||
|
||||
// loop over all packs and decide what to do
|
||||
bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed")
|
||||
err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
|
||||
|
@ -350,6 +356,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
stats.packs.partlyUsed++
|
||||
}
|
||||
|
||||
// repo v2: always repack tree blobs if uncompressed
|
||||
mustCompress := repoVersion >= 2 && p.tpe == restic.TreeBlob && p.uncompressed
|
||||
// use a flag that pack must be compressed
|
||||
p.uncompressed = mustCompress
|
||||
|
||||
// decide what to do
|
||||
switch {
|
||||
case p.usedBlobs == 0 && p.duplicateBlobs == 0:
|
||||
|
@ -362,7 +373,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
// if this is a data pack and --repack-cacheable-only is set => keep pack!
|
||||
keep(p)
|
||||
|
||||
case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob:
|
||||
case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress:
|
||||
// All blobs in pack are used and not duplicates/mixed => keep pack!
|
||||
keep(p)
|
||||
|
||||
|
@ -447,8 +458,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
case reachedRepackSize:
|
||||
keep(p.packInfo)
|
||||
|
||||
case p.duplicateBlobs > 0, p.tpe != restic.DataBlob:
|
||||
// repacking duplicates/non-data is only limited by repackSize
|
||||
case p.duplicateBlobs > 0, p.tpe != restic.DataBlob, p.uncompressed:
|
||||
// repacking duplicates/non-data/uncompressed-trees is only limited by repackSize
|
||||
repack(p.ID, p.packInfo)
|
||||
|
||||
case reachedUnusedSizeAfter:
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
package migrations
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
)
|
||||
|
||||
func init() {
|
||||
register(&CompressRepoV2{})
|
||||
}
|
||||
|
||||
type CompressRepoV2 struct{}
|
||||
|
||||
func (*CompressRepoV2) Name() string {
|
||||
return "compress_all_data"
|
||||
}
|
||||
|
||||
func (*CompressRepoV2) Desc() string {
|
||||
return "compress all data in the repo"
|
||||
}
|
||||
|
||||
func (*CompressRepoV2) Check(ctx context.Context, repo restic.Repository) (bool, error) {
|
||||
// only do very fast checks on the version here, we don't want the list of
|
||||
// available migrations to take long to load
|
||||
if repo.Config().Version < 2 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Apply requires that the repository must be already locked exclusively, this
|
||||
// is done by the caller, so we can just go ahead, rewrite the packs as they
|
||||
// are, remove the packs and rebuild the index.
|
||||
func (*CompressRepoV2) Apply(ctx context.Context, repo restic.Repository) error {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
err := repo.LoadIndex(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("index load failed: %w", err)
|
||||
}
|
||||
|
||||
packsWithUncompressedData := restic.NewIDSet()
|
||||
keepBlobs := restic.NewBlobSet()
|
||||
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
keepBlobs.Insert(blob.BlobHandle)
|
||||
|
||||
if blob.UncompressedLength != 0 {
|
||||
// blob is already compressed, ignore
|
||||
continue
|
||||
|
||||
}
|
||||
|
||||
// remember pack ID
|
||||
packsWithUncompressedData.Insert(blob.PackID)
|
||||
}
|
||||
|
||||
if len(packsWithUncompressedData) == 0 {
|
||||
// nothing to do
|
||||
return nil
|
||||
}
|
||||
|
||||
// don't upload new indexes until we're done
|
||||
repo.(*repository.Repository).DisableAutoIndexUpdate()
|
||||
obsoletePacks, err := repository.Repack(ctx, repo, repo, packsWithUncompressedData, keepBlobs, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("repack failed: %w", err)
|
||||
}
|
||||
|
||||
if len(obsoletePacks) != len(packsWithUncompressedData) {
|
||||
return fmt.Errorf("Repack() return other packs, %d != %d", len(obsoletePacks), len(packsWithUncompressedData))
|
||||
}
|
||||
|
||||
// build new index
|
||||
idx := repo.Index().(*repository.MasterIndex)
|
||||
obsoleteIndexes, err := idx.Save(ctx, repo, obsoletePacks, nil, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("saving new index failed: %w", err)
|
||||
}
|
||||
|
||||
// remove data
|
||||
for id := range obsoleteIndexes {
|
||||
err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.IndexFile})
|
||||
if err != nil {
|
||||
return fmt.Errorf("remove file failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
for id := range obsoletePacks {
|
||||
err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.PackFile})
|
||||
if err != nil {
|
||||
return fmt.Errorf("remove file failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
Loading…
Reference in a new issue