restic/cmd/restic/cmd_prune.go

280 lines
6.3 KiB
Go
Raw Normal View History

package main
import (
"fmt"
"time"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/repository"
2017-07-24 15:42:25 +00:00
"github.com/restic/restic/internal/restic"
2017-07-23 12:21:03 +00:00
2016-09-17 10:36:05 +00:00
"github.com/spf13/cobra"
)
2016-09-17 10:36:05 +00:00
var cmdPrune = &cobra.Command{
Use: "prune [flags]",
Short: "Remove unneeded data from the repository",
2016-09-17 10:36:05 +00:00
Long: `
The "prune" command checks the repository and removes data that is not
referenced and therefore not needed any more.
`,
DisableAutoGenTag: true,
2016-09-17 10:36:05 +00:00
RunE: func(cmd *cobra.Command, args []string) error {
return runPrune(globalOptions)
},
}
func init() {
2016-09-17 10:36:05 +00:00
cmdRoot.AddCommand(cmdPrune)
}
func shortenStatus(maxLength int, s string) string {
if len(s) <= maxLength {
return s
}
if maxLength < 3 {
return s[:maxLength]
}
return s[:maxLength-3] + "..."
}
// newProgressMax returns a progress that counts blobs.
func newProgressMax(show bool, max uint64, description string) *restic.Progress {
if !show {
return nil
}
p := restic.NewProgress()
p.OnUpdate = func(s restic.Stat, d time.Duration, ticker bool) {
status := fmt.Sprintf("[%s] %s %d / %d %s",
formatDuration(d),
formatPercent(s.Blobs, max),
s.Blobs, max, description)
if w := stdoutTerminalWidth(); w > 0 {
status = shortenStatus(w, status)
}
PrintProgress("%s", status)
}
p.OnDone = func(s restic.Stat, d time.Duration, ticker bool) {
fmt.Printf("\n")
}
return p
}
2016-09-17 10:36:05 +00:00
func runPrune(gopts GlobalOptions) error {
repo, err := OpenRepository(gopts)
if err != nil {
return err
}
lock, err := lockRepoExclusive(repo)
defer unlockRepo(lock)
if err != nil {
return err
}
2017-02-21 09:58:30 +00:00
return pruneRepository(gopts, repo)
}
func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
2017-06-04 09:16:55 +00:00
ctx := gopts.ctx
err := repo.LoadIndex(ctx)
if err != nil {
return err
}
var stats struct {
blobs int
packs int
snapshots int
2016-08-15 18:13:56 +00:00
bytes int64
}
2016-09-17 10:36:05 +00:00
Verbosef("counting files in repo\n")
2017-06-04 09:16:55 +00:00
for range repo.List(ctx, restic.DataFile) {
2016-08-15 19:10:20 +00:00
stats.packs++
}
2016-09-17 10:36:05 +00:00
Verbosef("building new index for repo\n")
2016-08-15 19:10:20 +00:00
2016-09-17 10:36:05 +00:00
bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar)
2016-08-15 19:10:20 +00:00
if err != nil {
return err
}
for _, id := range invalidFiles {
Warnf("incomplete pack file (will be removed): %v\n", id)
}
blobs := 0
2016-08-15 18:13:56 +00:00
for _, pack := range idx.Packs {
stats.bytes += pack.Size
blobs += len(pack.Entries)
}
2017-09-22 08:07:24 +00:00
Verbosef("repository contains %v packs (%v blobs) with %v\n",
len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
2016-09-01 14:04:29 +00:00
blobCount := make(map[restic.BlobHandle]int)
duplicateBlobs := 0
duplicateBytes := 0
2016-08-15 18:13:56 +00:00
// find duplicate blobs
for _, p := range idx.Packs {
for _, entry := range p.Entries {
stats.blobs++
2016-09-01 14:04:29 +00:00
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
2016-08-15 18:13:56 +00:00
blobCount[h]++
2016-08-15 18:13:56 +00:00
if blobCount[h] > 1 {
duplicateBlobs++
2016-08-15 18:13:56 +00:00
duplicateBytes += int(entry.Length)
}
}
}
2016-09-17 10:36:05 +00:00
Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
2016-08-15 19:13:38 +00:00
stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
2016-09-17 10:36:05 +00:00
Verbosef("load all snapshots\n")
2016-08-15 18:13:56 +00:00
// find referenced blobs
2017-06-04 09:16:55 +00:00
snapshots, err := restic.LoadAllSnapshots(ctx, repo)
if err != nil {
return err
}
stats.snapshots = len(snapshots)
2016-09-17 10:36:05 +00:00
Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots)
2016-09-01 14:04:29 +00:00
usedBlobs := restic.NewBlobSet()
seenBlobs := restic.NewBlobSet()
2016-09-17 10:36:05 +00:00
bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots")
bar.Start()
for _, sn := range snapshots {
2016-09-27 20:35:08 +00:00
debug.Log("process snapshot %v", sn.ID().Str())
2017-06-04 09:16:55 +00:00
err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs, seenBlobs)
if err != nil {
2017-06-15 11:40:27 +00:00
if repo.Backend().IsNotExist(err) {
return errors.Fatal("unable to load a tree from the repo: " + err.Error())
}
return err
}
2017-06-10 20:16:42 +00:00
debug.Log("processed snapshot %v", sn.ID().Str())
bar.Report(restic.Stat{Blobs: 1})
}
bar.Done()
2017-09-19 08:50:07 +00:00
if len(usedBlobs) > stats.blobs {
return errors.Fatalf("number of used blobs is larger than number of available blobs!\n" +
"Please report this error (along with the output of the 'prune' run) at\n" +
"https://github.com/restic/restic/issues/new")
}
2016-09-17 10:36:05 +00:00
Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))
2016-08-15 18:13:56 +00:00
// find packs that need a rewrite
2016-09-01 14:04:29 +00:00
rewritePacks := restic.NewIDSet()
for _, pack := range idx.Packs {
for _, blob := range pack.Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !usedBlobs.Has(h) {
rewritePacks.Insert(pack.ID)
continue
}
if blobCount[h] > 1 {
rewritePacks.Insert(pack.ID)
}
}
}
removeBytes := duplicateBytes
// find packs that are unneeded
2016-09-01 14:04:29 +00:00
removePacks := restic.NewIDSet()
Verbosef("will remove %d invalid files\n", len(invalidFiles))
for _, id := range invalidFiles {
removePacks.Insert(id)
}
for packID, p := range idx.Packs {
hasActiveBlob := false
for _, blob := range p.Entries {
2016-09-01 14:04:29 +00:00
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if usedBlobs.Has(h) {
hasActiveBlob = true
continue
}
removeBytes += int(blob.Length)
}
if hasActiveBlob {
continue
}
removePacks.Insert(packID)
if !rewritePacks.Has(packID) {
2016-09-01 20:17:37 +00:00
return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
}
rewritePacks.Delete(packID)
}
2016-09-17 10:36:05 +00:00
Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))
var obsoletePacks restic.IDSet
if len(rewritePacks) != 0 {
2017-03-04 16:43:58 +00:00
bar = newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten")
bar.Start()
obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar)
if err != nil {
return err
}
bar.Done()
}
removePacks.Merge(obsoletePacks)
if err = rebuildIndex(ctx, repo, removePacks); err != nil {
return err
}
if len(removePacks) != 0 {
bar = newProgressMax(!gopts.Quiet, uint64(len(removePacks)), "packs deleted")
bar.Start()
for packID := range removePacks {
h := restic.Handle{Type: restic.DataFile, Name: packID.String()}
2017-06-04 09:16:55 +00:00
err = repo.Backend().Remove(ctx, h)
if err != nil {
Warnf("unable to remove file %v from the repository\n", packID.Str())
}
bar.Report(restic.Stat{Blobs: 1})
}
bar.Done()
}
2016-09-17 10:36:05 +00:00
Verbosef("done\n")
return nil
}