forked from TrueCloudLab/restic
Reimplementation of prune
This commit is contained in:
parent
3b591ed987
commit
7f9a0a5907
5 changed files with 553 additions and 211 deletions
22
changelog/unreleased/pull-2718
Normal file
22
changelog/unreleased/pull-2718
Normal file
|
@ -0,0 +1,22 @@
|
|||
Enhancement: Improve pruning performance and make pruning more customizable
|
||||
|
||||
The prune command is now much faster. This is especially the case for remote
|
||||
repositories or repositories with not much data to prune.
|
||||
Also the memory usage of the prune command is now reduced.
|
||||
|
||||
By default the prune command now no longer removes all unused blobs. This
|
||||
behavior can be fine-tuned by new options, like tolerated unused space or
|
||||
maximum size of packs to repack. For more details, see
|
||||
https://restic.readthedocs.io/en/stable/060_forget.html
|
||||
|
||||
Moreover, prune now accepts the dry-run option and forget --dry-run --prune
|
||||
also shows what prune would do.
|
||||
|
||||
Fixes several open issues, e.g.:
|
||||
https://github.com/restic/restic/issues/1140
|
||||
https://github.com/restic/restic/issues/1985
|
||||
https://github.com/restic/restic/issues/2112
|
||||
https://github.com/restic/restic/issues/2227
|
||||
https://github.com/restic/restic/issues/2305
|
||||
|
||||
https://github.com/restic/restic/pull/2718
|
|
@ -80,9 +80,15 @@ func init() {
|
|||
f.BoolVar(&forgetOptions.Prune, "prune", false, "automatically run the 'prune' command if snapshots have been removed")
|
||||
|
||||
f.SortFlags = false
|
||||
addPruneOptions(cmdForget)
|
||||
}
|
||||
|
||||
func runForget(opts ForgetOptions, gopts GlobalOptions, args []string) error {
|
||||
err := verifyPruneOptions(&pruneOptions)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
repo, err := OpenRepository(gopts)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -205,7 +211,12 @@ func runForget(opts ForgetOptions, gopts GlobalOptions, args []string) error {
|
|||
}
|
||||
|
||||
if len(removeSnIDs) > 0 && opts.Prune && !opts.DryRun {
|
||||
return pruneRepository(gopts, repo)
|
||||
if !gopts.JSON {
|
||||
Verbosef("%d snapshots have been removed, running prune\n", len(removeSnIDs))
|
||||
}
|
||||
|
||||
pruneOptions.DryRun = opts.DryRun
|
||||
return runPruneWithRepo(pruneOptions, gopts, repo)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -1,15 +1,22 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"github.com/restic/restic/internal/debug"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/index"
|
||||
"github.com/restic/restic/internal/pack"
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var errorIndexIncomplete = errors.Fatal("index is not complete")
|
||||
var errorPacksMissing = errors.Fatal("packs from index missing in repo")
|
||||
var errorSizeNotMatching = errors.Fatal("pack size does not match calculated size from index")
|
||||
|
||||
var cmdPrune = &cobra.Command{
|
||||
Use: "prune [flags]",
|
||||
Short: "Remove unneeded data from the repository",
|
||||
|
@ -24,12 +31,72 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
|||
`,
|
||||
DisableAutoGenTag: true,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runPrune(globalOptions)
|
||||
return runPrune(pruneOptions, globalOptions)
|
||||
},
|
||||
}
|
||||
|
||||
// PruneOptions collects all options for the cleanup command.
|
||||
type PruneOptions struct {
|
||||
DryRun bool
|
||||
MaxUnused string
|
||||
MaxUnusedPercent float64
|
||||
MaxUnusedBytes uint64
|
||||
MaxRepackSize string
|
||||
MaxRepackBytes uint64
|
||||
// RepackSmall bool <- This option may be added later
|
||||
RepackCachableOnly bool
|
||||
}
|
||||
|
||||
var pruneOptions PruneOptions
|
||||
|
||||
func init() {
|
||||
cmdRoot.AddCommand(cmdPrune)
|
||||
f := cmdPrune.Flags()
|
||||
f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
|
||||
addPruneOptions(cmdPrune)
|
||||
}
|
||||
|
||||
func addPruneOptions(c *cobra.Command) {
|
||||
f := c.Flags()
|
||||
f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused space (allowed suffixes: k/K, m/M, g/G, t/T or value in %)")
|
||||
f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)")
|
||||
f.BoolVar(&pruneOptions.RepackCachableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable")
|
||||
// f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "also repack small packs")
|
||||
}
|
||||
|
||||
func verifyPruneOptions(opts *PruneOptions) error {
|
||||
if len(opts.MaxRepackSize) > 0 {
|
||||
size, err := parseSizeStr(opts.MaxRepackSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.MaxRepackBytes = uint64(size)
|
||||
}
|
||||
|
||||
length := len(opts.MaxUnused)
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if opts.MaxUnused[length-1] == '%' {
|
||||
opts.MaxUnusedPercent, err = strconv.ParseFloat(opts.MaxUnused[:length-1], 64)
|
||||
opts.MaxUnusedBytes = ^uint64(0)
|
||||
} else {
|
||||
var size int64
|
||||
size, err = parseSizeStr(opts.MaxUnused)
|
||||
opts.MaxUnusedPercent = 100.0
|
||||
opts.MaxUnusedBytes = uint64(size)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if opts.MaxUnusedPercent < 0.0 || opts.MaxUnusedPercent > 100.0 {
|
||||
return errors.Fatalf("--max-unused-percent should be between 0 and 100. Given value: %f", opts.MaxUnusedPercent)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func shortenStatus(maxLength int, s string) string {
|
||||
|
@ -44,7 +111,12 @@ func shortenStatus(maxLength int, s string) string {
|
|||
return s[:maxLength-3] + "..."
|
||||
}
|
||||
|
||||
func runPrune(gopts GlobalOptions) error {
|
||||
func runPrune(opts PruneOptions, gopts GlobalOptions) error {
|
||||
err := verifyPruneOptions(&opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
repo, err := OpenRepository(gopts)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -56,203 +128,345 @@ func runPrune(gopts GlobalOptions) error {
|
|||
return err
|
||||
}
|
||||
|
||||
return runPruneWithRepo(opts, gopts, repo)
|
||||
}
|
||||
|
||||
func runPruneWithRepo(opts PruneOptions, gopts GlobalOptions, repo *repository.Repository) error {
|
||||
// we do not need index updates while pruning!
|
||||
repo.DisableAutoIndexUpdate()
|
||||
|
||||
return pruneRepository(gopts, repo)
|
||||
}
|
||||
|
||||
func mixedBlobs(list []restic.Blob) bool {
|
||||
var tree, data bool
|
||||
|
||||
for _, pb := range list {
|
||||
switch pb.Type {
|
||||
case restic.TreeBlob:
|
||||
tree = true
|
||||
case restic.DataBlob:
|
||||
data = true
|
||||
}
|
||||
|
||||
if tree && data {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
|
||||
ctx := gopts.ctx
|
||||
|
||||
err := repo.LoadIndex(ctx)
|
||||
Verbosef("loading all snapshots...\n")
|
||||
snapshots, err := restic.LoadAllSnapshots(gopts.ctx, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var stats struct {
|
||||
blobs int
|
||||
packs int
|
||||
snapshots int
|
||||
bytes int64
|
||||
}
|
||||
|
||||
Verbosef("counting files in repo\n")
|
||||
err = repo.List(ctx, restic.PackFile, func(restic.ID, int64) error {
|
||||
stats.packs++
|
||||
return nil
|
||||
})
|
||||
Verbosef("loading indexes...\n")
|
||||
err = repo.LoadIndex(gopts.ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Verbosef("building new index for repo\n")
|
||||
|
||||
bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
|
||||
idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, id := range invalidFiles {
|
||||
Warnf("incomplete pack file (will be removed): %v\n", id)
|
||||
}
|
||||
|
||||
blobs := 0
|
||||
for _, pack := range idx.Packs {
|
||||
stats.bytes += pack.Size
|
||||
blobs += len(pack.Entries)
|
||||
}
|
||||
Verbosef("repository contains %v packs (%v blobs) with %v\n",
|
||||
len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
|
||||
|
||||
blobCount := make(map[restic.BlobHandle]int)
|
||||
var duplicateBlobs uint64
|
||||
var duplicateBytes uint64
|
||||
|
||||
// find duplicate blobs
|
||||
for _, p := range idx.Packs {
|
||||
for _, entry := range p.Entries {
|
||||
stats.blobs++
|
||||
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
|
||||
blobCount[h]++
|
||||
|
||||
if blobCount[h] > 1 {
|
||||
duplicateBlobs++
|
||||
duplicateBytes += uint64(entry.Length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
|
||||
stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
|
||||
Verbosef("load all snapshots\n")
|
||||
|
||||
// find referenced blobs
|
||||
snapshots, err := restic.LoadAllSnapshots(ctx, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.snapshots = len(snapshots)
|
||||
|
||||
usedBlobs, err := getUsedBlobs(gopts, repo, snapshots)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var missingBlobs []restic.BlobHandle
|
||||
for h := range usedBlobs {
|
||||
if _, ok := blobCount[h]; !ok {
|
||||
missingBlobs = append(missingBlobs, h)
|
||||
return prune(opts, gopts, repo, usedBlobs)
|
||||
}
|
||||
|
||||
type packInfo struct {
|
||||
usedBlobs uint
|
||||
unusedBlobs uint
|
||||
duplicateBlobs uint
|
||||
usedSize uint64
|
||||
unusedSize uint64
|
||||
tpe restic.BlobType
|
||||
}
|
||||
|
||||
type packInfoWithID struct {
|
||||
ID restic.ID
|
||||
packInfo
|
||||
}
|
||||
|
||||
func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedBlobs restic.BlobSet) error {
|
||||
ctx := gopts.ctx
|
||||
|
||||
var stats struct {
|
||||
blobs struct {
|
||||
used uint
|
||||
duplicate uint
|
||||
unused uint
|
||||
remove uint
|
||||
repack uint
|
||||
repackrm uint
|
||||
}
|
||||
size struct {
|
||||
used uint64
|
||||
duplicate uint64
|
||||
unused uint64
|
||||
remove uint64
|
||||
repack uint64
|
||||
repackrm uint64
|
||||
unref uint64
|
||||
}
|
||||
packs struct {
|
||||
used uint
|
||||
unused uint
|
||||
partlyUsed uint
|
||||
keep uint
|
||||
}
|
||||
}
|
||||
if len(missingBlobs) > 0 {
|
||||
return errors.Fatalf("%v not found in the new index\n"+
|
||||
|
||||
Verbosef("searching used packs...\n")
|
||||
|
||||
keepBlobs := restic.NewBlobSet()
|
||||
duplicateBlobs := restic.NewBlobSet()
|
||||
|
||||
// iterate over all blobs in index to find out which blobs are duplicates
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
bh := blob.Handle()
|
||||
switch {
|
||||
case usedBlobs.Has(bh): // used blob, move to keepBlobs
|
||||
usedBlobs.Delete(bh)
|
||||
keepBlobs.Insert(bh)
|
||||
case keepBlobs.Has(bh): // duplicate blob
|
||||
duplicateBlobs.Insert(bh)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if all used blobs have been found in index
|
||||
if len(usedBlobs) != 0 {
|
||||
Warnf("%v not found in the new index\n"+
|
||||
"Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+
|
||||
"Please report this error (along with the output of the 'prune' run) at\n"+
|
||||
"https://github.com/restic/restic/issues/new/choose", missingBlobs)
|
||||
"https://github.com/restic/restic/issues/new/choose", usedBlobs)
|
||||
return errorIndexIncomplete
|
||||
}
|
||||
|
||||
Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
|
||||
len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))
|
||||
indexPack := make(map[restic.ID]packInfo)
|
||||
|
||||
// find packs that need a rewrite
|
||||
rewritePacks := restic.NewIDSet()
|
||||
for _, pack := range idx.Packs {
|
||||
if mixedBlobs(pack.Entries) {
|
||||
rewritePacks.Insert(pack.ID)
|
||||
continue
|
||||
// iterate over all blobs in index to generate packInfo
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
ip, ok := indexPack[blob.PackID]
|
||||
if !ok {
|
||||
ip = packInfo{tpe: blob.Type, usedSize: pack.HeaderSize}
|
||||
}
|
||||
// mark mixed packs with "Invalid blob type"
|
||||
if ip.tpe != blob.Type {
|
||||
ip.tpe = restic.InvalidBlob
|
||||
}
|
||||
|
||||
for _, blob := range pack.Entries {
|
||||
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
||||
if !usedBlobs.Has(h) {
|
||||
rewritePacks.Insert(pack.ID)
|
||||
continue
|
||||
}
|
||||
|
||||
if blobCount[h] > 1 {
|
||||
rewritePacks.Insert(pack.ID)
|
||||
}
|
||||
bh := blob.Handle()
|
||||
size := uint64(pack.PackedSizeOfBlob(blob.Length))
|
||||
switch {
|
||||
case duplicateBlobs.Has(bh): // duplicate blob
|
||||
ip.usedSize += size
|
||||
ip.duplicateBlobs++
|
||||
stats.size.duplicate += size
|
||||
stats.blobs.duplicate++
|
||||
case keepBlobs.Has(bh): // used blob, not duplicate
|
||||
ip.usedSize += size
|
||||
ip.usedBlobs++
|
||||
stats.size.used += size
|
||||
stats.blobs.used++
|
||||
default: // unused blob
|
||||
ip.unusedSize += size
|
||||
ip.unusedBlobs++
|
||||
stats.size.unused += size
|
||||
stats.blobs.unused++
|
||||
}
|
||||
// update indexPack
|
||||
indexPack[blob.PackID] = ip
|
||||
}
|
||||
|
||||
removeBytes := duplicateBytes
|
||||
|
||||
// find packs that are unneeded
|
||||
Verbosef("collecting packs for deletion and repacking\n")
|
||||
removePacksFirst := restic.NewIDSet()
|
||||
removePacks := restic.NewIDSet()
|
||||
repackPacks := restic.NewIDSet()
|
||||
|
||||
Verbosef("will remove %d invalid files\n", len(invalidFiles))
|
||||
for _, id := range invalidFiles {
|
||||
removePacks.Insert(id)
|
||||
var repackCandidates []packInfoWithID
|
||||
|
||||
repack := func(id restic.ID, p packInfo) {
|
||||
repackPacks.Insert(id)
|
||||
stats.blobs.repack += p.unusedBlobs + p.duplicateBlobs + p.usedBlobs
|
||||
stats.size.repack += p.unusedSize + p.usedSize
|
||||
stats.blobs.repackrm += p.unusedBlobs
|
||||
stats.size.repackrm += p.unusedSize
|
||||
}
|
||||
|
||||
for packID, p := range idx.Packs {
|
||||
|
||||
hasActiveBlob := false
|
||||
for _, blob := range p.Entries {
|
||||
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
||||
if usedBlobs.Has(h) {
|
||||
hasActiveBlob = true
|
||||
continue
|
||||
}
|
||||
|
||||
removeBytes += uint64(blob.Length)
|
||||
// loop over all packs and decide what to do
|
||||
bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed")
|
||||
bar.Start()
|
||||
err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
|
||||
p, ok := indexPack[id]
|
||||
if !ok {
|
||||
// Pack was not referenced in index and is not used => immediately remove!
|
||||
Verboseff("will remove pack %v as it is unused and not indexed\n", id.Str())
|
||||
removePacksFirst.Insert(id)
|
||||
stats.size.unref += uint64(packSize)
|
||||
return nil
|
||||
}
|
||||
|
||||
if hasActiveBlob {
|
||||
continue
|
||||
if p.unusedSize+p.usedSize != uint64(packSize) {
|
||||
Warnf("pack %s: calculated size %d does not match real size %d\nRun 'restic rebuild-index'.",
|
||||
id.Str(), p.unusedSize+p.usedSize, packSize)
|
||||
return errorSizeNotMatching
|
||||
}
|
||||
|
||||
removePacks.Insert(packID)
|
||||
|
||||
if !rewritePacks.Has(packID) {
|
||||
return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
|
||||
// statistics
|
||||
switch {
|
||||
case p.usedBlobs == 0 && p.duplicateBlobs == 0:
|
||||
stats.packs.unused++
|
||||
case p.unusedBlobs == 0:
|
||||
stats.packs.used++
|
||||
default:
|
||||
stats.packs.partlyUsed++
|
||||
}
|
||||
|
||||
rewritePacks.Delete(packID)
|
||||
}
|
||||
// decide what to do
|
||||
switch {
|
||||
case p.usedBlobs == 0 && p.duplicateBlobs == 0:
|
||||
// All blobs in pack are no longer used => remove pack!
|
||||
removePacks.Insert(id)
|
||||
stats.blobs.remove += p.unusedBlobs
|
||||
stats.size.remove += p.unusedSize
|
||||
|
||||
Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
|
||||
len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))
|
||||
case opts.RepackCachableOnly && p.tpe == restic.DataBlob,
|
||||
// if this is a data pack and --repack-cacheable-only is set => keep pack!
|
||||
p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob: // && (!opts.RepackSmall || packSize >= repository.MinPackSize)
|
||||
// All blobs in pack are used and not duplicates/mixed => keep pack!
|
||||
stats.packs.keep++
|
||||
|
||||
var obsoletePacks restic.IDSet
|
||||
if len(rewritePacks) != 0 {
|
||||
bar := newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten")
|
||||
obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar)
|
||||
if err != nil {
|
||||
return err
|
||||
default:
|
||||
// all other packs are candidates for repacking
|
||||
repackCandidates = append(repackCandidates, packInfoWithID{ID: id, packInfo: p})
|
||||
}
|
||||
}
|
||||
|
||||
removePacks.Merge(obsoletePacks)
|
||||
|
||||
if err = rebuildIndex(ctx, repo, removePacks); err != nil {
|
||||
delete(indexPack, id)
|
||||
bar.Report(restic.Stat{Blobs: 1})
|
||||
return nil
|
||||
})
|
||||
bar.Done()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(indexPack) != 0 {
|
||||
Warnf("The index references pack files which are missing from the repository: %v\n", indexPack)
|
||||
return errorPacksMissing
|
||||
}
|
||||
|
||||
repackAllPacksWithDuplicates := true
|
||||
|
||||
maxUnusedSizeAfter := opts.MaxUnusedBytes
|
||||
if opts.MaxUnusedPercent < 100.0 {
|
||||
maxUnusedSizePercent := uint64(opts.MaxUnusedPercent / (100.0 - opts.MaxUnusedPercent) * float64(stats.size.used))
|
||||
if maxUnusedSizePercent < maxUnusedSizeAfter {
|
||||
maxUnusedSizeAfter = maxUnusedSizePercent
|
||||
}
|
||||
}
|
||||
|
||||
// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
|
||||
// This is equivalent to sorting by unused / total space.
|
||||
// Instead of unused[i] / used[i] > unused[j] / used[j] we use
|
||||
// unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64
|
||||
// Morover duplicates and mixed are sorted to the beginning
|
||||
sort.Slice(repackCandidates, func(i, j int) bool {
|
||||
pi := repackCandidates[i].packInfo
|
||||
pj := repackCandidates[j].packInfo
|
||||
switch {
|
||||
case pi.duplicateBlobs > 0 && pj.duplicateBlobs == 0:
|
||||
return true
|
||||
case pj.duplicateBlobs > 0 && pi.duplicateBlobs == 0:
|
||||
return false
|
||||
case pi.tpe == restic.InvalidBlob && pj.tpe != restic.InvalidBlob:
|
||||
return true
|
||||
case pj.tpe == restic.InvalidBlob && pi.tpe != restic.InvalidBlob:
|
||||
return false
|
||||
//case opts.RepackSmall && pi.unusedSize+pi.usedSize < repository.MinPackSize && pj.unusedSize+pj.usedSize >= repository.MinPackSize:
|
||||
// return true
|
||||
//case opts.RepackSmall && pj.unusedSize+pj.usedSize < repository.MinPackSize && pi.unusedSize+pi.usedSize >= repository.MinPackSize:
|
||||
// return false
|
||||
}
|
||||
return pi.unusedSize*pj.usedSize > pj.unusedSize*pi.usedSize
|
||||
})
|
||||
|
||||
for _, p := range repackCandidates {
|
||||
reachedUnusedSizeAfter := (stats.size.unused-stats.size.remove-stats.size.repackrm < maxUnusedSizeAfter)
|
||||
reachedRepackSize := (len(opts.MaxRepackSize) > 0 && stats.size.repack+p.unusedSize+p.usedSize > opts.MaxRepackBytes)
|
||||
switch {
|
||||
case !reachedRepackSize && (p.duplicateBlobs > 0 || p.tpe == restic.InvalidBlob):
|
||||
// repacking duplicates/mixed is only limited by repackSize
|
||||
repack(p.ID, p.packInfo)
|
||||
|
||||
case reachedUnusedSizeAfter, reachedRepackSize:
|
||||
// for all other packs stop repacking if tolerated unused size is reached.
|
||||
stats.packs.keep++
|
||||
if p.duplicateBlobs > 0 {
|
||||
repackAllPacksWithDuplicates = false
|
||||
}
|
||||
|
||||
default:
|
||||
repack(p.ID, p.packInfo)
|
||||
}
|
||||
}
|
||||
|
||||
// if all duplicates are repacked, print out correct statistics
|
||||
if repackAllPacksWithDuplicates {
|
||||
stats.blobs.repackrm += stats.blobs.duplicate
|
||||
stats.size.repackrm += stats.size.duplicate
|
||||
}
|
||||
|
||||
Verboseff("\nused: %10d blobs / %s\n", stats.blobs.used, formatBytes(stats.size.used))
|
||||
if stats.blobs.duplicate > 0 {
|
||||
Verboseff("duplicates: %10d blobs / %s\n", stats.blobs.duplicate, formatBytes(stats.size.duplicate))
|
||||
}
|
||||
Verboseff("unused: %10d blobs / %s\n", stats.blobs.unused, formatBytes(stats.size.unused))
|
||||
if stats.size.unref > 0 {
|
||||
Verboseff("unreferenced: %s\n", formatBytes(stats.size.unref))
|
||||
}
|
||||
totalBlobs := stats.blobs.used + stats.blobs.unused + stats.blobs.duplicate
|
||||
totalSize := stats.size.used + stats.size.duplicate + stats.size.unused + stats.size.unref
|
||||
Verboseff("total: %10d blobs / %s\n", totalBlobs, formatBytes(totalSize))
|
||||
Verboseff("unused size: %s of total size\n", formatPercent(stats.size.unused, totalSize))
|
||||
|
||||
Verbosef("\nto repack: %10d blobs / %s\n", stats.blobs.repack, formatBytes(stats.size.repack))
|
||||
Verbosef("this removes %10d blobs / %s\n", stats.blobs.repackrm, formatBytes(stats.size.repackrm))
|
||||
Verbosef("to delete: %10d blobs / %s\n", stats.blobs.remove, formatBytes(stats.size.remove+stats.size.unref))
|
||||
totalPruneSize := stats.size.remove + stats.size.repackrm + stats.size.unref
|
||||
Verbosef("total prune: %10d blobs / %s\n", stats.blobs.remove+stats.blobs.repackrm, formatBytes(totalPruneSize))
|
||||
Verbosef("remaining: %10d blobs / %s\n", totalBlobs-(stats.blobs.remove+stats.blobs.repackrm), formatBytes(totalSize-totalPruneSize))
|
||||
unusedAfter := stats.size.unused - stats.size.remove - stats.size.repackrm
|
||||
Verbosef("unused size after prune: %s (%s of remaining size)\n",
|
||||
formatBytes(unusedAfter), formatPercent(unusedAfter, totalSize-totalPruneSize))
|
||||
Verbosef("\n")
|
||||
Verboseff("totally used packs: %10d\n", stats.packs.used)
|
||||
Verboseff("partly used packs: %10d\n", stats.packs.partlyUsed)
|
||||
Verboseff("unused packs: %10d\n\n", stats.packs.unused)
|
||||
|
||||
Verboseff("to keep: %10d packs\n", stats.packs.keep)
|
||||
Verboseff("to repack: %10d packs\n", len(repackPacks))
|
||||
Verboseff("to delete: %10d packs\n", len(removePacks))
|
||||
if len(removePacksFirst) > 0 {
|
||||
Verboseff("to delete: %10d unreferenced packs\n\n", len(removePacksFirst))
|
||||
}
|
||||
|
||||
if opts.DryRun {
|
||||
if !gopts.JSON && gopts.verbosity >= 2 {
|
||||
if len(removePacksFirst) > 0 {
|
||||
Printf("Would have removed the following unreferenced packs:\n%v\n\n", removePacksFirst)
|
||||
}
|
||||
Printf("Would have repacked and removed the following packs:\n%v\n\n", repackPacks)
|
||||
Printf("Would have removed the following no longer used packs:\n%v\n\n", removePacks)
|
||||
}
|
||||
// Always quit here if DryRun was set!
|
||||
return nil
|
||||
}
|
||||
|
||||
// unreferenced packs can be safely deleted first
|
||||
if len(removePacksFirst) != 0 {
|
||||
Verbosef("deleting unreferenced packs\n")
|
||||
DeleteFiles(gopts, repo, removePacksFirst, restic.PackFile)
|
||||
}
|
||||
|
||||
if len(repackPacks) != 0 {
|
||||
Verbosef("repacking packs\n")
|
||||
bar := newProgressMax(!gopts.Quiet, uint64(len(repackPacks)), "packs repacked")
|
||||
_, err := repository.Repack(ctx, repo, repackPacks, keepBlobs, bar)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Also remove repacked packs
|
||||
removePacks.Merge(repackPacks)
|
||||
}
|
||||
|
||||
if len(removePacks) != 0 {
|
||||
Verbosef("remove %d old packs\n", len(removePacks))
|
||||
if err = rebuildIndex(ctx, repo, removePacks); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Verbosef("removing %d old packs\n", len(removePacks))
|
||||
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
||||
}
|
||||
|
||||
|
@ -263,7 +477,7 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
|
|||
func getUsedBlobs(gopts GlobalOptions, repo restic.Repository, snapshots []*restic.Snapshot) (usedBlobs restic.BlobSet, err error) {
|
||||
ctx := gopts.ctx
|
||||
|
||||
Verbosef("find data that is still in use for %d snapshots\n", len(snapshots))
|
||||
Verbosef("finding data that is still in use for %d snapshots\n", len(snapshots))
|
||||
|
||||
usedBlobs = restic.NewBlobSet()
|
||||
|
||||
|
|
|
@ -270,8 +270,8 @@ func testRunForgetJSON(t testing.TB, gopts GlobalOptions, args ...string) {
|
|||
"Expected 2 snapshots to be removed, got %v", len(forgets[0].Remove))
|
||||
}
|
||||
|
||||
func testRunPrune(t testing.TB, gopts GlobalOptions) {
|
||||
rtest.OK(t, runPrune(gopts))
|
||||
func testRunPrune(t testing.TB, gopts GlobalOptions, opts PruneOptions) {
|
||||
rtest.OK(t, runPrune(opts, gopts))
|
||||
}
|
||||
|
||||
func testSetupBackupData(t testing.TB, env *testEnvironment) string {
|
||||
|
@ -1386,6 +1386,41 @@ func TestCheckRestoreNoLock(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestPrune(t *testing.T) {
|
||||
t.Run("0", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnusedPercent: 0.0}
|
||||
checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("50", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnusedPercent: 50.0}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("100", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnusedPercent: 100.0}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("CachableOnly", func(t *testing.T) {
|
||||
opts := PruneOptions{RepackCachableOnly: true}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
/* repack-small option will come in future
|
||||
t.Run("Small", func(t *testing.T) {
|
||||
opts = PruneOptions{MaxUnusedPercent: 100.0, RepackSmall: true}
|
||||
// The test case only produces small files; hence no unused blobs should remain.
|
||||
checkOpts = CheckOptions{ReadData: true, CheckUnused: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
*/
|
||||
}
|
||||
|
||||
func testPrune(t *testing.T, pruneOpts PruneOptions, checkOpts CheckOptions) {
|
||||
env, cleanup := withTestEnvironment(t)
|
||||
defer cleanup()
|
||||
|
||||
|
@ -1406,10 +1441,12 @@ func TestPrune(t *testing.T) {
|
|||
|
||||
testRunForgetJSON(t, env.gopts)
|
||||
testRunForget(t, env.gopts, firstSnapshot[0].String())
|
||||
testRunPrune(t, env.gopts)
|
||||
testRunCheck(t, env.gopts)
|
||||
testRunPrune(t, env.gopts, pruneOpts)
|
||||
rtest.OK(t, runCheck(checkOpts, env.gopts, nil))
|
||||
}
|
||||
|
||||
var pruneDefaultOptions = PruneOptions{MaxUnusedPercent: 1.5}
|
||||
|
||||
func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet {
|
||||
r, err := OpenRepository(gopts)
|
||||
rtest.OK(t, err)
|
||||
|
@ -1452,14 +1489,8 @@ func TestPruneWithDamagedRepository(t *testing.T) {
|
|||
"expected one snapshot, got %v", snapshotIDs)
|
||||
|
||||
// prune should fail
|
||||
err := runPrune(env.gopts)
|
||||
if err == nil {
|
||||
t.Fatalf("expected prune to fail")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "blobs seem to be missing") {
|
||||
t.Fatalf("did not find hint for missing blobs")
|
||||
}
|
||||
t.Log(err)
|
||||
rtest.Assert(t, runPrune(pruneDefaultOptions, env.gopts) == errorPacksMissing,
|
||||
"prune should have reported index not complete error")
|
||||
}
|
||||
|
||||
// Test repos for edge cases
|
||||
|
@ -1469,37 +1500,37 @@ func TestEdgeCaseRepos(t *testing.T) {
|
|||
// repo where index is completely missing
|
||||
// => check and prune should fail
|
||||
t.Run("no-index", func(t *testing.T) {
|
||||
testEdgeCaseRepo(t, "repo-index-missing.tar.gz", opts, false, false)
|
||||
testEdgeCaseRepo(t, "repo-index-missing.tar.gz", opts, pruneDefaultOptions, false, false)
|
||||
})
|
||||
|
||||
// repo where an existing and used blob is missing from the index
|
||||
// => check should fail, prune should heal this
|
||||
// => check and prune should fail
|
||||
t.Run("index-missing-blob", func(t *testing.T) {
|
||||
testEdgeCaseRepo(t, "repo-index-missing-blob.tar.gz", opts, false, true)
|
||||
testEdgeCaseRepo(t, "repo-index-missing-blob.tar.gz", opts, pruneDefaultOptions, false, false)
|
||||
})
|
||||
|
||||
// repo where a blob is missing
|
||||
// => check and prune should fail
|
||||
t.Run("no-data", func(t *testing.T) {
|
||||
testEdgeCaseRepo(t, "repo-data-missing.tar.gz", opts, false, false)
|
||||
testEdgeCaseRepo(t, "repo-data-missing.tar.gz", opts, pruneDefaultOptions, false, false)
|
||||
})
|
||||
|
||||
// repo where data exists that is not referenced
|
||||
// => check and prune should fully work
|
||||
t.Run("unreferenced-data", func(t *testing.T) {
|
||||
testEdgeCaseRepo(t, "repo-unreferenced-data.tar.gz", opts, true, true)
|
||||
testEdgeCaseRepo(t, "repo-unreferenced-data.tar.gz", opts, pruneDefaultOptions, true, true)
|
||||
})
|
||||
|
||||
// repo where an obsolete index still exists
|
||||
// => check and prune should fully work
|
||||
t.Run("obsolete-index", func(t *testing.T) {
|
||||
testEdgeCaseRepo(t, "repo-obsolete-index.tar.gz", opts, true, true)
|
||||
testEdgeCaseRepo(t, "repo-obsolete-index.tar.gz", opts, pruneDefaultOptions, true, true)
|
||||
})
|
||||
|
||||
// repo which contains mixed (data/tree) packs
|
||||
// => check and prune should fully work
|
||||
t.Run("mixed-packs", func(t *testing.T) {
|
||||
testEdgeCaseRepo(t, "repo-mixed.tar.gz", opts, true, true)
|
||||
testEdgeCaseRepo(t, "repo-mixed.tar.gz", opts, pruneDefaultOptions, true, true)
|
||||
})
|
||||
|
||||
// repo which contains duplicate blobs
|
||||
|
@ -1510,11 +1541,11 @@ func TestEdgeCaseRepos(t *testing.T) {
|
|||
CheckUnused: true,
|
||||
}
|
||||
t.Run("duplicates", func(t *testing.T) {
|
||||
testEdgeCaseRepo(t, "repo-duplicates.tar.gz", opts, false, true)
|
||||
testEdgeCaseRepo(t, "repo-duplicates.tar.gz", opts, pruneDefaultOptions, false, true)
|
||||
})
|
||||
}
|
||||
|
||||
func testEdgeCaseRepo(t *testing.T, tarfile string, options CheckOptions, checkOK, pruneOK bool) {
|
||||
func testEdgeCaseRepo(t *testing.T, tarfile string, optionsCheck CheckOptions, optionsPrune PruneOptions, checkOK, pruneOK bool) {
|
||||
env, cleanup := withTestEnvironment(t)
|
||||
defer cleanup()
|
||||
|
||||
|
@ -1524,15 +1555,15 @@ func testEdgeCaseRepo(t *testing.T, tarfile string, options CheckOptions, checkO
|
|||
if checkOK {
|
||||
testRunCheck(t, env.gopts)
|
||||
} else {
|
||||
rtest.Assert(t, runCheck(options, env.gopts, nil) != nil,
|
||||
rtest.Assert(t, runCheck(optionsCheck, env.gopts, nil) != nil,
|
||||
"check should have reported an error")
|
||||
}
|
||||
|
||||
if pruneOK {
|
||||
testRunPrune(t, env.gopts)
|
||||
testRunPrune(t, env.gopts, optionsPrune)
|
||||
testRunCheck(t, env.gopts)
|
||||
} else {
|
||||
rtest.Assert(t, runPrune(env.gopts) != nil,
|
||||
rtest.Assert(t, runPrune(optionsPrune, env.gopts) != nil,
|
||||
"prune should have reported an error")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,12 +23,11 @@ data that was referenced by the snapshot from the repository. This can
|
|||
be automated with the ``--prune`` option of the ``forget`` command,
|
||||
which runs ``prune`` automatically if snapshots have been removed.
|
||||
|
||||
.. Warning::
|
||||
|
||||
Pruning snapshots can be a very time-consuming process, taking nearly
|
||||
as long as backups themselves. During a prune operation, the index is
|
||||
locked and backups cannot be completed. Performance improvements are
|
||||
planned for this feature.
|
||||
Pruning snapshots can be a time-consuming process, depending on the
|
||||
amount of snapshots and data to process. During a prune operation, the
|
||||
repository is locked and backups cannot be completed. Please plan your
|
||||
pruning so that there's time to complete it and it doesn't interfere with
|
||||
regular backup runs.
|
||||
|
||||
It is advisable to run ``restic check`` after pruning, to make sure
|
||||
you are alerted, should the internal data structures of the repository
|
||||
|
@ -82,20 +81,32 @@ command must be run:
|
|||
|
||||
$ restic -r /srv/restic-repo prune
|
||||
enter password for repository:
|
||||
repository 33002c5e opened successfully, password is correct
|
||||
loading all snapshots...
|
||||
loading indexes...
|
||||
finding data that is still in use for 4 snapshots
|
||||
[0:00] 100.00% 4 / 4 snapshots
|
||||
searching used packs...
|
||||
collecting packs for deletion and repacking
|
||||
[0:00] 100.00% 5 / 5 packs processed
|
||||
|
||||
to repack: 69 blobs / 1.078 MiB
|
||||
this removes 67 blobs / 1.047 MiB
|
||||
to delete: 7 blobs / 25.726 KiB
|
||||
total prune: 74 blobs / 1.072 MiB
|
||||
remaining: 16 blobs / 38.003 KiB
|
||||
unused size after prune: 0 B (0.00% of remaining size)
|
||||
|
||||
repacking packs
|
||||
[0:00] 100.00% 2 / 2 packs repacked
|
||||
counting files in repo
|
||||
building new index for repo
|
||||
[0:00] 100.00% 22 / 22 files
|
||||
repository contains 22 packs (8512 blobs) with 100.092 MiB bytes
|
||||
processed 8512 blobs: 0 duplicate blobs, 0B duplicate
|
||||
load all snapshots
|
||||
find data that is still in use for 1 snapshots
|
||||
[0:00] 100.00% 1 / 1 snapshots
|
||||
found 8433 of 8512 data blobs still in use
|
||||
will rewrite 3 packs
|
||||
creating new index
|
||||
[0:00] 86.36% 19 / 22 files
|
||||
saved new index as 544a5084
|
||||
[0:00] 100.00% 3 / 3 packs
|
||||
finding old index files
|
||||
saved new indexes as [59270b3a]
|
||||
remove 4 old index files
|
||||
[0:00] 100.00% 4 / 4 files deleted
|
||||
removing 3 old packs
|
||||
[0:00] 100.00% 3 / 3 files deleted
|
||||
done
|
||||
|
||||
Afterwards the repository is smaller.
|
||||
|
@ -119,19 +130,31 @@ to ``forget``:
|
|||
8c02b94b 2017-02-21 10:48:33 mopped /home/user/work
|
||||
|
||||
1 snapshots have been removed, running prune
|
||||
counting files in repo
|
||||
building new index for repo
|
||||
[0:00] 100.00% 37 / 37 packs
|
||||
repository contains 37 packs (5521 blobs) with 151.012 MiB bytes
|
||||
processed 5521 blobs: 0 duplicate blobs, 0B duplicate
|
||||
load all snapshots
|
||||
find data that is still in use for 1 snapshots
|
||||
loading all snapshots...
|
||||
loading indexes...
|
||||
finding data that is still in use for 1 snapshots
|
||||
[0:00] 100.00% 1 / 1 snapshots
|
||||
found 5323 of 5521 data blobs still in use, removing 198 blobs
|
||||
will delete 0 packs and rewrite 27 packs, this frees 22.106 MiB
|
||||
creating new index
|
||||
[0:00] 100.00% 30 / 30 packs
|
||||
saved new index as b49f3e68
|
||||
searching used packs...
|
||||
collecting packs for deletion and repacking
|
||||
[0:00] 100.00% 5 / 5 packs processed
|
||||
|
||||
to repack: 69 blobs / 1.078 MiB
|
||||
this removes 67 blobs / 1.047 MiB
|
||||
to delete: 7 blobs / 25.726 KiB
|
||||
total prune: 74 blobs / 1.072 MiB
|
||||
remaining: 16 blobs / 38.003 KiB
|
||||
unused size after prune: 0 B (0.00% of remaining size)
|
||||
|
||||
repacking packs
|
||||
[0:00] 100.00% 2 / 2 packs repacked
|
||||
counting files in repo
|
||||
[0:00] 100.00% 3 / 3 packs
|
||||
finding old index files
|
||||
saved new indexes as [59270b3a]
|
||||
remove 4 old index files
|
||||
[0:00] 100.00% 4 / 4 files deleted
|
||||
removing 3 old packs
|
||||
[0:00] 100.00% 3 / 3 files deleted
|
||||
done
|
||||
|
||||
Removing snapshots according to a policy
|
||||
|
@ -282,3 +305,44 @@ last-day-of-the-months (11 or 12 depends if the 5 weeklies cross a month).
|
|||
And finally 75 last-day-of-the-year snapshots. All other snapshots are
|
||||
removed.
|
||||
|
||||
Customize pruning
|
||||
*****************
|
||||
|
||||
To understand the custom options, we first explain how the pruning process works:
|
||||
|
||||
- First all snapshots and directories within snapshots are scanned to determine
|
||||
which data is still in use.
|
||||
- Then for all pack files ``prune`` finds out if the file is fully used, partly
|
||||
used or completely unused.
|
||||
- Completely unused packs are marked for deletion. Fully used packs are kept.
|
||||
A partially used pack is either kept or marked for repacking depending on user
|
||||
options.
|
||||
Note that for repacking, restic must download the file from the repository
|
||||
storage and reupload the needed data in the repository. This can be very
|
||||
time-consuming for remote repositories.
|
||||
- After deciding what to do, ``prune`` will actually perform the repack, modify
|
||||
the index according to the changes and delete the obsolete files.
|
||||
|
||||
The ``prune`` command accepts the following options:
|
||||
|
||||
- ``--max-unused limit`` allow unused data up to the specified limit within the repository.
|
||||
This allows restic to keep partly used packs instead of repacking them.
|
||||
The limit can be specified as size, e.g. "200M" or in percentage with respect to the total
|
||||
repository size, e.g. "0.5%".
|
||||
``prune`` tries to repack as little data as possible while still ensuring this
|
||||
limit for unused data.
|
||||
If you want to minimize the space used by your repository, use a value of 0%.
|
||||
If you want to minimize the time and bandwidth used by the ``prune`` command, use a
|
||||
high value. A value of 100% will not require any pack file to be repacked.
|
||||
The default value is 5%.
|
||||
- ``--max-repack-size size`` if set limits the total size of packs to repack.
|
||||
As ``prune`` first stores all repacked packs and deletes the obsolete packs at the end,
|
||||
this option might be handy if you expect many packs to be repacked and fear to run low
|
||||
on storage.
|
||||
- ``--repack-cacheable-only`` if set to true only pack files which are cacheable are repacked.
|
||||
Other pack files are not repacked, if this option is set.
|
||||
This allows a very fast repacking using only cached data. It can, however, imply that the
|
||||
unused data in your repository exceeds the value given by ``--max-unused-percent``.
|
||||
The default value is false.
|
||||
- ``--dry-run`` only show what ``prune`` would do.
|
||||
- ``--verbose`` increased verbosity shows additional statistics for ``prune``.
|
||||
|
|
Loading…
Reference in a new issue