forked from TrueCloudLab/restic
prune: Automatically repack uncompressed trees for repo v2
Tree packs are cached locally at clients and thus benefit a lot from being compressed. Ensure this be having prune always repack pack files containing uncompressed trees.
This commit is contained in:
parent
c8c0d659ec
commit
5406743102
2 changed files with 14 additions and 105 deletions
|
@ -191,6 +191,7 @@ type packInfo struct {
|
||||||
usedSize uint64
|
usedSize uint64
|
||||||
unusedSize uint64
|
unusedSize uint64
|
||||||
tpe restic.BlobType
|
tpe restic.BlobType
|
||||||
|
uncompressed bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type packInfoWithID struct {
|
type packInfoWithID struct {
|
||||||
|
@ -299,6 +300,9 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
||||||
ip.unusedSize += size
|
ip.unusedSize += size
|
||||||
ip.unusedBlobs++
|
ip.unusedBlobs++
|
||||||
}
|
}
|
||||||
|
if !blob.IsCompressed() {
|
||||||
|
ip.uncompressed = true
|
||||||
|
}
|
||||||
// update indexPack
|
// update indexPack
|
||||||
indexPack[blob.PackID] = ip
|
indexPack[blob.PackID] = ip
|
||||||
}
|
}
|
||||||
|
@ -318,6 +322,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
repoVersion := repo.Config().Version
|
||||||
|
|
||||||
// loop over all packs and decide what to do
|
// loop over all packs and decide what to do
|
||||||
bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed")
|
bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed")
|
||||||
err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
|
err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
|
||||||
|
@ -350,6 +356,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
||||||
stats.packs.partlyUsed++
|
stats.packs.partlyUsed++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// repo v2: always repack tree blobs if uncompressed
|
||||||
|
mustCompress := repoVersion >= 2 && p.tpe == restic.TreeBlob && p.uncompressed
|
||||||
|
// use a flag that pack must be compressed
|
||||||
|
p.uncompressed = mustCompress
|
||||||
|
|
||||||
// decide what to do
|
// decide what to do
|
||||||
switch {
|
switch {
|
||||||
case p.usedBlobs == 0 && p.duplicateBlobs == 0:
|
case p.usedBlobs == 0 && p.duplicateBlobs == 0:
|
||||||
|
@ -362,7 +373,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
||||||
// if this is a data pack and --repack-cacheable-only is set => keep pack!
|
// if this is a data pack and --repack-cacheable-only is set => keep pack!
|
||||||
keep(p)
|
keep(p)
|
||||||
|
|
||||||
case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob:
|
case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress:
|
||||||
// All blobs in pack are used and not duplicates/mixed => keep pack!
|
// All blobs in pack are used and not duplicates/mixed => keep pack!
|
||||||
keep(p)
|
keep(p)
|
||||||
|
|
||||||
|
@ -447,8 +458,8 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
||||||
case reachedRepackSize:
|
case reachedRepackSize:
|
||||||
keep(p.packInfo)
|
keep(p.packInfo)
|
||||||
|
|
||||||
case p.duplicateBlobs > 0, p.tpe != restic.DataBlob:
|
case p.duplicateBlobs > 0, p.tpe != restic.DataBlob, p.uncompressed:
|
||||||
// repacking duplicates/non-data is only limited by repackSize
|
// repacking duplicates/non-data/uncompressed-trees is only limited by repackSize
|
||||||
repack(p.ID, p.packInfo)
|
repack(p.ID, p.packInfo)
|
||||||
|
|
||||||
case reachedUnusedSizeAfter:
|
case reachedUnusedSizeAfter:
|
||||||
|
|
|
@ -1,102 +0,0 @@
|
||||||
package migrations
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/restic/restic/internal/repository"
|
|
||||||
"github.com/restic/restic/internal/restic"
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
register(&CompressRepoV2{})
|
|
||||||
}
|
|
||||||
|
|
||||||
type CompressRepoV2 struct{}
|
|
||||||
|
|
||||||
func (*CompressRepoV2) Name() string {
|
|
||||||
return "compress_all_data"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*CompressRepoV2) Desc() string {
|
|
||||||
return "compress all data in the repo"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (*CompressRepoV2) Check(ctx context.Context, repo restic.Repository) (bool, error) {
|
|
||||||
// only do very fast checks on the version here, we don't want the list of
|
|
||||||
// available migrations to take long to load
|
|
||||||
if repo.Config().Version < 2 {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply requires that the repository must be already locked exclusively, this
|
|
||||||
// is done by the caller, so we can just go ahead, rewrite the packs as they
|
|
||||||
// are, remove the packs and rebuild the index.
|
|
||||||
func (*CompressRepoV2) Apply(ctx context.Context, repo restic.Repository) error {
|
|
||||||
ctx, cancel := context.WithCancel(ctx)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
err := repo.LoadIndex(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("index load failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
packsWithUncompressedData := restic.NewIDSet()
|
|
||||||
keepBlobs := restic.NewBlobSet()
|
|
||||||
|
|
||||||
for blob := range repo.Index().Each(ctx) {
|
|
||||||
keepBlobs.Insert(blob.BlobHandle)
|
|
||||||
|
|
||||||
if blob.UncompressedLength != 0 {
|
|
||||||
// blob is already compressed, ignore
|
|
||||||
continue
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// remember pack ID
|
|
||||||
packsWithUncompressedData.Insert(blob.PackID)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(packsWithUncompressedData) == 0 {
|
|
||||||
// nothing to do
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't upload new indexes until we're done
|
|
||||||
repo.(*repository.Repository).DisableAutoIndexUpdate()
|
|
||||||
obsoletePacks, err := repository.Repack(ctx, repo, repo, packsWithUncompressedData, keepBlobs, nil)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("repack failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(obsoletePacks) != len(packsWithUncompressedData) {
|
|
||||||
return fmt.Errorf("Repack() return other packs, %d != %d", len(obsoletePacks), len(packsWithUncompressedData))
|
|
||||||
}
|
|
||||||
|
|
||||||
// build new index
|
|
||||||
idx := repo.Index().(*repository.MasterIndex)
|
|
||||||
obsoleteIndexes, err := idx.Save(ctx, repo, obsoletePacks, nil, nil)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("saving new index failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove data
|
|
||||||
for id := range obsoleteIndexes {
|
|
||||||
err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.IndexFile})
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("remove file failed: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for id := range obsoletePacks {
|
|
||||||
err = repo.Backend().Remove(ctx, restic.Handle{Name: id.String(), Type: restic.PackFile})
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("remove file failed: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
Loading…
Reference in a new issue