2016-08-04 17:42:40 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2017-07-23 12:21:03 +00:00
|
|
|
"github.com/restic/restic/internal/debug"
|
|
|
|
"github.com/restic/restic/internal/errors"
|
|
|
|
"github.com/restic/restic/internal/index"
|
|
|
|
"github.com/restic/restic/internal/repository"
|
2017-07-24 15:42:25 +00:00
|
|
|
"github.com/restic/restic/internal/restic"
|
2017-07-23 12:21:03 +00:00
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
"github.com/spf13/cobra"
|
2016-08-04 17:42:40 +00:00
|
|
|
)
|
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
var cmdPrune = &cobra.Command{
|
|
|
|
Use: "prune [flags]",
|
2017-09-11 16:32:44 +00:00
|
|
|
Short: "Remove unneeded data from the repository",
|
2016-09-17 10:36:05 +00:00
|
|
|
Long: `
|
|
|
|
The "prune" command checks the repository and removes data that is not
|
|
|
|
referenced and therefore not needed any more.
|
2019-11-05 06:03:38 +00:00
|
|
|
|
|
|
|
EXIT STATUS
|
|
|
|
===========
|
|
|
|
|
|
|
|
Exit status is 0 if the command was successful, and non-zero if there was any error.
|
2016-09-17 10:36:05 +00:00
|
|
|
`,
|
2017-08-06 19:02:16 +00:00
|
|
|
DisableAutoGenTag: true,
|
2016-09-17 10:36:05 +00:00
|
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
|
|
return runPrune(globalOptions)
|
|
|
|
},
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func init() {
|
2016-09-17 10:36:05 +00:00
|
|
|
cmdRoot.AddCommand(cmdPrune)
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
|
|
|
|
2017-06-15 10:40:03 +00:00
|
|
|
func shortenStatus(maxLength int, s string) string {
|
|
|
|
if len(s) <= maxLength {
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
if maxLength < 3 {
|
|
|
|
return s[:maxLength]
|
|
|
|
}
|
|
|
|
|
|
|
|
return s[:maxLength-3] + "..."
|
|
|
|
}
|
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
func runPrune(gopts GlobalOptions) error {
|
|
|
|
repo, err := OpenRepository(gopts)
|
2016-08-04 17:42:40 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
lock, err := lockRepoExclusive(repo)
|
|
|
|
defer unlockRepo(lock)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-06-12 07:24:38 +00:00
|
|
|
// we do not need index updates while pruning!
|
|
|
|
repo.DisableAutoIndexUpdate()
|
|
|
|
|
2017-02-21 09:58:30 +00:00
|
|
|
return pruneRepository(gopts, repo)
|
|
|
|
}
|
|
|
|
|
2017-07-20 20:22:51 +00:00
|
|
|
func mixedBlobs(list []restic.Blob) bool {
|
|
|
|
var tree, data bool
|
|
|
|
|
|
|
|
for _, pb := range list {
|
|
|
|
switch pb.Type {
|
|
|
|
case restic.TreeBlob:
|
|
|
|
tree = true
|
|
|
|
case restic.DataBlob:
|
|
|
|
data = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if tree && data {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2017-02-21 09:58:30 +00:00
|
|
|
func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
|
2017-06-04 09:16:55 +00:00
|
|
|
ctx := gopts.ctx
|
|
|
|
|
|
|
|
err := repo.LoadIndex(ctx)
|
2016-08-04 17:42:40 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
var stats struct {
|
|
|
|
blobs int
|
|
|
|
packs int
|
|
|
|
snapshots int
|
2016-08-15 18:13:56 +00:00
|
|
|
bytes int64
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
Verbosef("counting files in repo\n")
|
2020-08-16 09:16:38 +00:00
|
|
|
err = repo.List(ctx, restic.PackFile, func(restic.ID, int64) error {
|
2016-08-15 19:10:20 +00:00
|
|
|
stats.packs++
|
2018-01-21 16:25:36 +00:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2016-08-15 19:10:20 +00:00
|
|
|
}
|
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
Verbosef("building new index for repo\n")
|
2016-08-15 19:10:20 +00:00
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
|
2017-06-15 13:03:05 +00:00
|
|
|
idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar)
|
2016-08-15 19:10:20 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-06-15 13:03:05 +00:00
|
|
|
for _, id := range invalidFiles {
|
|
|
|
Warnf("incomplete pack file (will be removed): %v\n", id)
|
|
|
|
}
|
|
|
|
|
2017-01-15 14:45:52 +00:00
|
|
|
blobs := 0
|
2016-08-15 18:13:56 +00:00
|
|
|
for _, pack := range idx.Packs {
|
|
|
|
stats.bytes += pack.Size
|
2017-01-15 14:45:52 +00:00
|
|
|
blobs += len(pack.Entries)
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
2017-09-22 08:07:24 +00:00
|
|
|
Verbosef("repository contains %v packs (%v blobs) with %v\n",
|
2017-01-15 14:45:52 +00:00
|
|
|
len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
|
2016-08-04 17:42:40 +00:00
|
|
|
|
2016-09-01 14:04:29 +00:00
|
|
|
blobCount := make(map[restic.BlobHandle]int)
|
2018-08-14 20:06:05 +00:00
|
|
|
var duplicateBlobs uint64
|
|
|
|
var duplicateBytes uint64
|
2016-08-04 17:42:40 +00:00
|
|
|
|
2016-08-15 18:13:56 +00:00
|
|
|
// find duplicate blobs
|
|
|
|
for _, p := range idx.Packs {
|
|
|
|
for _, entry := range p.Entries {
|
2016-08-04 17:42:40 +00:00
|
|
|
stats.blobs++
|
2016-09-01 14:04:29 +00:00
|
|
|
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
|
2016-08-15 18:13:56 +00:00
|
|
|
blobCount[h]++
|
2016-08-04 17:42:40 +00:00
|
|
|
|
2016-08-15 18:13:56 +00:00
|
|
|
if blobCount[h] > 1 {
|
2016-08-04 17:42:40 +00:00
|
|
|
duplicateBlobs++
|
2018-08-14 20:06:05 +00:00
|
|
|
duplicateBytes += uint64(entry.Length)
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
|
2016-08-15 19:13:38 +00:00
|
|
|
stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
|
2016-09-17 10:36:05 +00:00
|
|
|
Verbosef("load all snapshots\n")
|
2016-08-04 17:42:40 +00:00
|
|
|
|
2016-08-15 18:13:56 +00:00
|
|
|
// find referenced blobs
|
2017-06-04 09:16:55 +00:00
|
|
|
snapshots, err := restic.LoadAllSnapshots(ctx, repo)
|
2016-08-04 17:42:40 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
stats.snapshots = len(snapshots)
|
|
|
|
|
2020-07-19 07:48:53 +00:00
|
|
|
usedBlobs, err := getUsedBlobs(gopts, repo, snapshots)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
|
|
|
|
2020-03-31 21:31:33 +00:00
|
|
|
var missingBlobs []restic.BlobHandle
|
2020-03-31 12:33:32 +00:00
|
|
|
for h := range usedBlobs {
|
|
|
|
if _, ok := blobCount[h]; !ok {
|
2020-03-31 21:31:33 +00:00
|
|
|
missingBlobs = append(missingBlobs, h)
|
2020-03-31 12:33:32 +00:00
|
|
|
}
|
2017-09-19 08:50:07 +00:00
|
|
|
}
|
2020-03-31 21:31:33 +00:00
|
|
|
if len(missingBlobs) > 0 {
|
|
|
|
return errors.Fatalf("%v not found in the new index\n"+
|
|
|
|
"Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+
|
|
|
|
"Please report this error (along with the output of the 'prune' run) at\n"+
|
|
|
|
"https://github.com/restic/restic/issues/new/choose", missingBlobs)
|
|
|
|
}
|
2017-09-19 08:50:07 +00:00
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
|
2016-09-12 12:26:47 +00:00
|
|
|
len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))
|
2016-08-04 17:42:40 +00:00
|
|
|
|
2016-08-15 18:13:56 +00:00
|
|
|
// find packs that need a rewrite
|
2016-09-01 14:04:29 +00:00
|
|
|
rewritePacks := restic.NewIDSet()
|
2017-01-15 14:45:52 +00:00
|
|
|
for _, pack := range idx.Packs {
|
2017-07-20 20:22:51 +00:00
|
|
|
if mixedBlobs(pack.Entries) {
|
|
|
|
rewritePacks.Insert(pack.ID)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2017-01-15 14:45:52 +00:00
|
|
|
for _, blob := range pack.Entries {
|
|
|
|
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
|
|
|
if !usedBlobs.Has(h) {
|
|
|
|
rewritePacks.Insert(pack.ID)
|
|
|
|
continue
|
|
|
|
}
|
2016-08-04 17:42:40 +00:00
|
|
|
|
2017-01-15 14:45:52 +00:00
|
|
|
if blobCount[h] > 1 {
|
|
|
|
rewritePacks.Insert(pack.ID)
|
|
|
|
}
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-04 14:17:44 +00:00
|
|
|
removeBytes := duplicateBytes
|
2016-09-12 12:26:47 +00:00
|
|
|
|
2016-08-25 20:35:22 +00:00
|
|
|
// find packs that are unneeded
|
2016-09-01 14:04:29 +00:00
|
|
|
removePacks := restic.NewIDSet()
|
2017-06-15 13:03:05 +00:00
|
|
|
|
|
|
|
Verbosef("will remove %d invalid files\n", len(invalidFiles))
|
|
|
|
for _, id := range invalidFiles {
|
|
|
|
removePacks.Insert(id)
|
|
|
|
}
|
|
|
|
|
2016-08-25 20:35:22 +00:00
|
|
|
for packID, p := range idx.Packs {
|
2016-09-12 12:26:47 +00:00
|
|
|
|
|
|
|
hasActiveBlob := false
|
2016-08-25 20:35:22 +00:00
|
|
|
for _, blob := range p.Entries {
|
2016-09-01 14:04:29 +00:00
|
|
|
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
2016-08-25 20:35:22 +00:00
|
|
|
if usedBlobs.Has(h) {
|
2016-09-12 12:26:47 +00:00
|
|
|
hasActiveBlob = true
|
|
|
|
continue
|
2016-08-25 20:35:22 +00:00
|
|
|
}
|
2016-09-12 12:26:47 +00:00
|
|
|
|
2018-08-14 20:06:05 +00:00
|
|
|
removeBytes += uint64(blob.Length)
|
2016-09-12 12:26:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if hasActiveBlob {
|
|
|
|
continue
|
2016-08-25 20:35:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
removePacks.Insert(packID)
|
|
|
|
|
|
|
|
if !rewritePacks.Has(packID) {
|
2016-09-01 20:17:37 +00:00
|
|
|
return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
|
2016-08-25 20:35:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
rewritePacks.Delete(packID)
|
|
|
|
}
|
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
|
2016-09-12 12:26:47 +00:00
|
|
|
len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))
|
2016-08-04 17:42:40 +00:00
|
|
|
|
2017-07-18 21:07:40 +00:00
|
|
|
var obsoletePacks restic.IDSet
|
2017-03-04 16:38:34 +00:00
|
|
|
if len(rewritePacks) != 0 {
|
2020-08-03 17:32:46 +00:00
|
|
|
bar := newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten")
|
2017-07-18 21:07:40 +00:00
|
|
|
obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar)
|
2017-03-04 16:38:34 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-08-04 17:42:40 +00:00
|
|
|
}
|
|
|
|
|
2017-07-18 21:07:40 +00:00
|
|
|
removePacks.Merge(obsoletePacks)
|
|
|
|
|
2017-06-15 11:12:46 +00:00
|
|
|
if err = rebuildIndex(ctx, repo, removePacks); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-03-04 16:38:34 +00:00
|
|
|
if len(removePacks) != 0 {
|
2020-07-17 20:14:46 +00:00
|
|
|
Verbosef("remove %d old packs\n", len(removePacks))
|
2020-08-16 09:16:38 +00:00
|
|
|
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
2016-08-25 20:35:22 +00:00
|
|
|
}
|
|
|
|
|
2016-09-17 10:36:05 +00:00
|
|
|
Verbosef("done\n")
|
2016-08-04 17:42:40 +00:00
|
|
|
return nil
|
|
|
|
}
|
2020-07-19 07:48:53 +00:00
|
|
|
|
|
|
|
func getUsedBlobs(gopts GlobalOptions, repo restic.Repository, snapshots []*restic.Snapshot) (usedBlobs restic.BlobSet, err error) {
|
|
|
|
ctx := gopts.ctx
|
|
|
|
|
|
|
|
Verbosef("find data that is still in use for %d snapshots\n", len(snapshots))
|
|
|
|
|
|
|
|
usedBlobs = restic.NewBlobSet()
|
|
|
|
|
|
|
|
bar := newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots")
|
|
|
|
bar.Start()
|
2020-08-03 17:31:49 +00:00
|
|
|
defer bar.Done()
|
2020-07-19 07:48:53 +00:00
|
|
|
for _, sn := range snapshots {
|
|
|
|
debug.Log("process snapshot %v", sn.ID())
|
|
|
|
|
|
|
|
err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs)
|
|
|
|
if err != nil {
|
|
|
|
if repo.Backend().IsNotExist(err) {
|
|
|
|
return nil, errors.Fatal("unable to load a tree from the repo: " + err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
debug.Log("processed snapshot %v", sn.ID())
|
|
|
|
bar.Report(restic.Stat{Blobs: 1})
|
|
|
|
}
|
|
|
|
return usedBlobs, nil
|
|
|
|
}
|