2016-08-04 17:42:40 +00:00
package main
import (
2020-11-03 09:53:38 +00:00
"math"
2020-07-19 05:55:14 +00:00
"sort"
"strconv"
2020-11-03 09:53:38 +00:00
"strings"
2020-07-19 05:55:14 +00:00
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
2022-02-13 13:25:38 +00:00
"github.com/restic/restic/internal/pack"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/repository"
2017-07-24 15:42:25 +00:00
"github.com/restic/restic/internal/restic"
2017-07-23 12:21:03 +00:00
2016-09-17 10:36:05 +00:00
"github.com/spf13/cobra"
2016-08-04 17:42:40 +00:00
)
2020-07-19 05:55:14 +00:00
var errorIndexIncomplete = errors . Fatal ( "index is not complete" )
var errorPacksMissing = errors . Fatal ( "packs from index missing in repo" )
var errorSizeNotMatching = errors . Fatal ( "pack size does not match calculated size from index" )
2016-09-17 10:36:05 +00:00
var cmdPrune = & cobra . Command {
Use : "prune [flags]" ,
2017-09-11 16:32:44 +00:00
Short : "Remove unneeded data from the repository" ,
2016-09-17 10:36:05 +00:00
Long : `
The "prune" command checks the repository and removes data that is not
referenced and therefore not needed any more .
2019-11-05 06:03:38 +00:00
EXIT STATUS
== == == == == =
Exit status is 0 if the command was successful , and non - zero if there was any error .
2016-09-17 10:36:05 +00:00
` ,
2017-08-06 19:02:16 +00:00
DisableAutoGenTag : true ,
2016-09-17 10:36:05 +00:00
RunE : func ( cmd * cobra . Command , args [ ] string ) error {
2020-07-19 05:55:14 +00:00
return runPrune ( pruneOptions , globalOptions )
2016-09-17 10:36:05 +00:00
} ,
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
// PruneOptions collects all options for the cleanup command.
type PruneOptions struct {
2021-08-16 14:02:01 +00:00
DryRun bool
UnsafeNoSpaceRecovery string
unsafeRecovery bool
2020-11-03 10:14:53 +00:00
2020-11-03 09:53:38 +00:00
MaxUnused string
maxUnusedBytes func ( used uint64 ) ( unused uint64 ) // calculates the number of unused bytes after repacking, according to MaxUnused
2020-11-03 10:14:53 +00:00
MaxRepackSize string
MaxRepackBytes uint64
2020-07-19 05:55:14 +00:00
RepackCachableOnly bool
2022-05-01 10:02:05 +00:00
RepackUncompressed bool
2020-07-19 05:55:14 +00:00
}
var pruneOptions PruneOptions
2016-08-04 17:42:40 +00:00
func init ( ) {
2016-09-17 10:36:05 +00:00
cmdRoot . AddCommand ( cmdPrune )
2020-07-19 05:55:14 +00:00
f := cmdPrune . Flags ( )
f . BoolVarP ( & pruneOptions . DryRun , "dry-run" , "n" , false , "do not modify the repository, just print what would be done" )
2021-08-16 14:02:01 +00:00
f . StringVarP ( & pruneOptions . UnsafeNoSpaceRecovery , "unsafe-recover-no-free-space" , "" , "" , "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first." )
2020-07-19 05:55:14 +00:00
addPruneOptions ( cmdPrune )
}
func addPruneOptions ( c * cobra . Command ) {
f := c . Flags ( )
2020-11-03 09:53:38 +00:00
f . StringVar ( & pruneOptions . MaxUnused , "max-unused" , "5%" , "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')" )
2020-07-19 05:55:14 +00:00
f . StringVar ( & pruneOptions . MaxRepackSize , "max-repack-size" , "" , "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)" )
f . BoolVar ( & pruneOptions . RepackCachableOnly , "repack-cacheable-only" , false , "only repack packs which are cacheable" )
2022-05-01 10:02:05 +00:00
f . BoolVar ( & pruneOptions . RepackUncompressed , "repack-uncompressed" , false , "repack all uncompressed data" )
2020-07-19 05:55:14 +00:00
}
func verifyPruneOptions ( opts * PruneOptions ) error {
2021-12-07 19:45:36 +00:00
opts . MaxRepackBytes = math . MaxUint64
2020-07-19 05:55:14 +00:00
if len ( opts . MaxRepackSize ) > 0 {
size , err := parseSizeStr ( opts . MaxRepackSize )
if err != nil {
return err
}
opts . MaxRepackBytes = uint64 ( size )
}
2021-08-16 14:02:01 +00:00
if opts . UnsafeNoSpaceRecovery != "" {
// prevent repacking data to make sure users cannot get stuck.
opts . MaxRepackBytes = 0
}
2020-07-19 05:55:14 +00:00
2020-11-03 09:53:38 +00:00
maxUnused := strings . TrimSpace ( opts . MaxUnused )
if maxUnused == "" {
return errors . Fatalf ( "invalid value for --max-unused: %q" , opts . MaxUnused )
2020-07-19 05:55:14 +00:00
}
2020-11-03 09:53:38 +00:00
// parse MaxUnused either as unlimited, a percentage, or an absolute number of bytes
switch {
case maxUnused == "unlimited" :
opts . maxUnusedBytes = func ( used uint64 ) uint64 {
return math . MaxUint64
}
case strings . HasSuffix ( maxUnused , "%" ) :
maxUnused = strings . TrimSuffix ( maxUnused , "%" )
p , err := strconv . ParseFloat ( maxUnused , 64 )
if err != nil {
return errors . Fatalf ( "invalid percentage %q passed for --max-unused: %v" , opts . MaxUnused , err )
}
if p < 0 {
return errors . Fatal ( "percentage for --max-unused must be positive" )
}
2020-07-19 05:55:14 +00:00
2020-11-03 09:53:38 +00:00
if p >= 100 {
return errors . Fatal ( "percentage for --max-unused must be below 100%" )
}
opts . maxUnusedBytes = func ( used uint64 ) uint64 {
return uint64 ( p / ( 100 - p ) * float64 ( used ) )
}
default :
size , err := parseSizeStr ( maxUnused )
if err != nil {
return errors . Fatalf ( "invalid number of bytes %q for --max-unused: %v" , opts . MaxUnused , err )
}
opts . maxUnusedBytes = func ( used uint64 ) uint64 {
return uint64 ( size )
}
2020-07-19 05:55:14 +00:00
}
return nil
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
func runPrune ( opts PruneOptions , gopts GlobalOptions ) error {
err := verifyPruneOptions ( & opts )
if err != nil {
return err
}
2022-05-01 10:02:05 +00:00
if opts . RepackUncompressed && gopts . Compression == repository . CompressionOff {
return errors . Fatal ( "disabled compression and `--repack-uncompressed` are mutually exclusive" )
}
2016-09-17 10:36:05 +00:00
repo , err := OpenRepository ( gopts )
2016-08-04 17:42:40 +00:00
if err != nil {
return err
}
2022-04-23 09:32:52 +00:00
if repo . Backend ( ) . Connections ( ) < 2 {
return errors . Fatal ( "prune requires a backend connection limit of at least two" )
}
2022-05-01 10:02:05 +00:00
if repo . Config ( ) . Version < 2 && opts . RepackUncompressed {
return errors . Fatal ( "compression requires at least repository format version 2" )
}
2021-08-16 14:02:01 +00:00
if opts . UnsafeNoSpaceRecovery != "" {
repoID := repo . Config ( ) . ID
if opts . UnsafeNoSpaceRecovery != repoID {
return errors . Fatalf ( "must pass id '%s' to --unsafe-recover-no-free-space" , repoID )
}
opts . unsafeRecovery = true
}
2020-08-09 11:24:47 +00:00
lock , err := lockRepoExclusive ( gopts . ctx , repo )
2016-08-04 17:42:40 +00:00
defer unlockRepo ( lock )
if err != nil {
return err
}
2020-07-19 05:13:41 +00:00
return runPruneWithRepo ( opts , gopts , repo , restic . NewIDSet ( ) )
2017-02-21 09:58:30 +00:00
}
2020-07-19 05:13:41 +00:00
func runPruneWithRepo ( opts PruneOptions , gopts GlobalOptions , repo * repository . Repository , ignoreSnapshots restic . IDSet ) error {
2020-07-19 05:55:14 +00:00
// we do not need index updates while pruning!
repo . DisableAutoIndexUpdate ( )
2017-06-04 09:16:55 +00:00
2020-11-08 19:15:58 +00:00
if repo . Cache == nil {
Print ( "warning: running prune without a cache, this may be very slow!\n" )
}
2020-07-19 05:55:14 +00:00
Verbosef ( "loading indexes...\n" )
2021-11-05 23:32:46 +00:00
// loading the index before the snapshots is ok, as we use an exclusive lock here
2020-12-06 04:22:27 +00:00
err := repo . LoadIndex ( gopts . ctx )
2018-01-21 16:25:36 +00:00
if err != nil {
return err
2016-08-15 19:10:20 +00:00
}
2020-12-06 04:22:27 +00:00
usedBlobs , err := getUsedBlobs ( gopts , repo , ignoreSnapshots )
2016-08-15 19:10:20 +00:00
if err != nil {
return err
}
2020-07-19 05:55:14 +00:00
return prune ( opts , gopts , repo , usedBlobs )
}
2017-06-15 13:03:05 +00:00
2020-07-19 05:55:14 +00:00
type packInfo struct {
usedBlobs uint
unusedBlobs uint
duplicateBlobs uint
usedSize uint64
unusedSize uint64
tpe restic . BlobType
2022-04-10 09:57:01 +00:00
uncompressed bool
2020-07-19 05:55:14 +00:00
}
2016-08-04 17:42:40 +00:00
2020-07-19 05:55:14 +00:00
type packInfoWithID struct {
ID restic . ID
packInfo
}
2016-08-04 17:42:40 +00:00
2020-11-03 11:50:33 +00:00
// prune selects which files to rewrite and then does that. The map usedBlobs is
// modified in the process.
2020-07-19 05:55:14 +00:00
func prune ( opts PruneOptions , gopts GlobalOptions , repo restic . Repository , usedBlobs restic . BlobSet ) error {
ctx := gopts . ctx
var stats struct {
blobs struct {
used uint
duplicate uint
unused uint
remove uint
repack uint
repackrm uint
}
size struct {
used uint64
duplicate uint64
unused uint64
remove uint64
repack uint64
repackrm uint64
unref uint64
}
packs struct {
used uint
unused uint
partlyUsed uint
keep uint
}
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
Verbosef ( "searching used packs...\n" )
2016-08-04 17:42:40 +00:00
2020-07-19 05:55:14 +00:00
keepBlobs := restic . NewBlobSet ( )
duplicateBlobs := restic . NewBlobSet ( )
2016-08-04 17:42:40 +00:00
2020-07-19 05:55:14 +00:00
// iterate over all blobs in index to find out which blobs are duplicates
for blob := range repo . Index ( ) . Each ( ctx ) {
2020-11-05 20:52:34 +00:00
bh := blob . BlobHandle
2020-11-16 04:04:13 +00:00
size := uint64 ( blob . Length )
2020-07-19 05:55:14 +00:00
switch {
case usedBlobs . Has ( bh ) : // used blob, move to keepBlobs
usedBlobs . Delete ( bh )
keepBlobs . Insert ( bh )
2020-11-18 21:30:22 +00:00
stats . size . used += size
stats . blobs . used ++
2020-07-19 05:55:14 +00:00
case keepBlobs . Has ( bh ) : // duplicate blob
duplicateBlobs . Insert ( bh )
2020-11-18 21:30:22 +00:00
stats . size . duplicate += size
stats . blobs . duplicate ++
default :
stats . size . unused += size
stats . blobs . unused ++
2020-03-31 12:33:32 +00:00
}
2017-09-19 08:50:07 +00:00
}
2020-07-19 05:55:14 +00:00
// Check if all used blobs have been found in index
if len ( usedBlobs ) != 0 {
2021-01-28 20:48:24 +00:00
Warnf ( "%v not found in the index\n\n" +
"Integrity check failed: Data seems to be missing.\n" +
"Will not start prune to prevent (additional) data loss!\n" +
2020-03-31 21:31:33 +00:00
"Please report this error (along with the output of the 'prune' run) at\n" +
2021-06-20 12:25:40 +00:00
"https://github.com/restic/restic/issues/new/choose\n" , usedBlobs )
2020-07-19 05:55:14 +00:00
return errorIndexIncomplete
2020-03-31 21:31:33 +00:00
}
2017-09-19 08:50:07 +00:00
2020-07-19 05:55:14 +00:00
indexPack := make ( map [ restic . ID ] packInfo )
2016-08-04 17:42:40 +00:00
2020-11-16 04:04:13 +00:00
// save computed pack header size
2022-02-13 13:25:38 +00:00
for pid , hdrSize := range pack . Size ( ctx , repo . Index ( ) , true ) {
2020-11-16 04:04:13 +00:00
// initialize tpe with NumBlobTypes to indicate it's not set
indexPack [ pid ] = packInfo { tpe : restic . NumBlobTypes , usedSize : uint64 ( hdrSize ) }
}
2020-07-19 05:55:14 +00:00
// iterate over all blobs in index to generate packInfo
for blob := range repo . Index ( ) . Each ( ctx ) {
2020-11-16 04:04:13 +00:00
ip := indexPack [ blob . PackID ]
// Set blob type if not yet set
if ip . tpe == restic . NumBlobTypes {
ip . tpe = blob . Type
2020-07-19 05:55:14 +00:00
}
2020-11-16 04:04:13 +00:00
2020-07-19 05:55:14 +00:00
// mark mixed packs with "Invalid blob type"
if ip . tpe != blob . Type {
ip . tpe = restic . InvalidBlob
2017-07-20 20:22:51 +00:00
}
2020-11-05 20:52:34 +00:00
bh := blob . BlobHandle
2020-11-16 04:04:13 +00:00
size := uint64 ( blob . Length )
2020-07-19 05:55:14 +00:00
switch {
case duplicateBlobs . Has ( bh ) : // duplicate blob
ip . usedSize += size
ip . duplicateBlobs ++
case keepBlobs . Has ( bh ) : // used blob, not duplicate
ip . usedSize += size
ip . usedBlobs ++
default : // unused blob
ip . unusedSize += size
ip . unusedBlobs ++
2016-08-04 17:42:40 +00:00
}
2022-04-10 09:57:01 +00:00
if ! blob . IsCompressed ( ) {
ip . uncompressed = true
}
2020-07-19 05:55:14 +00:00
// update indexPack
indexPack [ blob . PackID ] = ip
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
Verbosef ( "collecting packs for deletion and repacking\n" )
removePacksFirst := restic . NewIDSet ( )
2016-09-01 14:04:29 +00:00
removePacks := restic . NewIDSet ( )
2020-07-19 05:55:14 +00:00
repackPacks := restic . NewIDSet ( )
var repackCandidates [ ] packInfoWithID
2021-01-29 21:24:23 +00:00
repackAllPacksWithDuplicates := true
keep := func ( p packInfo ) {
stats . packs . keep ++
if p . duplicateBlobs > 0 {
repackAllPacksWithDuplicates = false
}
}
2017-06-15 13:03:05 +00:00
2022-04-10 09:57:01 +00:00
repoVersion := repo . Config ( ) . Version
2020-07-19 05:55:14 +00:00
// loop over all packs and decide what to do
bar := newProgressMax ( ! gopts . Quiet , uint64 ( len ( indexPack ) ) , "packs processed" )
err := repo . List ( ctx , restic . PackFile , func ( id restic . ID , packSize int64 ) error {
p , ok := indexPack [ id ]
if ! ok {
// Pack was not referenced in index and is not used => immediately remove!
Verboseff ( "will remove pack %v as it is unused and not indexed\n" , id . Str ( ) )
removePacksFirst . Insert ( id )
stats . size . unref += uint64 ( packSize )
return nil
}
2016-09-12 12:26:47 +00:00
2020-11-30 06:25:10 +00:00
if p . unusedSize + p . usedSize != uint64 ( packSize ) &&
! ( p . usedBlobs == 0 && p . duplicateBlobs == 0 ) {
// Pack size does not fit and pack is needed => error
// If the pack is not needed, this is no error, the pack can
// and will be simply removed, see below.
2021-06-20 12:25:40 +00:00
Warnf ( "pack %s: calculated size %d does not match real size %d\nRun 'restic rebuild-index'.\n" ,
2020-07-19 05:55:14 +00:00
id . Str ( ) , p . unusedSize + p . usedSize , packSize )
return errorSizeNotMatching
}
// statistics
switch {
case p . usedBlobs == 0 && p . duplicateBlobs == 0 :
stats . packs . unused ++
case p . unusedBlobs == 0 :
stats . packs . used ++
default :
stats . packs . partlyUsed ++
}
2022-05-01 10:02:05 +00:00
mustCompress := false
if repoVersion >= 2 {
// repo v2: always repack tree blobs if uncompressed
// compress data blobs if requested
mustCompress = ( p . tpe == restic . TreeBlob || opts . RepackUncompressed ) && p . uncompressed
}
2022-04-10 09:57:01 +00:00
// use a flag that pack must be compressed
p . uncompressed = mustCompress
2020-07-19 05:55:14 +00:00
// decide what to do
switch {
case p . usedBlobs == 0 && p . duplicateBlobs == 0 :
// All blobs in pack are no longer used => remove pack!
removePacks . Insert ( id )
stats . blobs . remove += p . unusedBlobs
stats . size . remove += p . unusedSize
2020-11-03 11:50:47 +00:00
case opts . RepackCachableOnly && p . tpe == restic . DataBlob :
2020-07-19 05:55:14 +00:00
// if this is a data pack and --repack-cacheable-only is set => keep pack!
2021-01-29 21:24:23 +00:00
keep ( p )
2020-11-03 11:50:47 +00:00
2022-04-10 09:57:01 +00:00
case p . unusedBlobs == 0 && p . duplicateBlobs == 0 && p . tpe != restic . InvalidBlob && ! mustCompress :
2020-07-19 05:55:14 +00:00
// All blobs in pack are used and not duplicates/mixed => keep pack!
2021-01-29 21:24:23 +00:00
keep ( p )
2020-07-19 05:55:14 +00:00
default :
// all other packs are candidates for repacking
repackCandidates = append ( repackCandidates , packInfoWithID { ID : id , packInfo : p } )
}
delete ( indexPack , id )
2020-11-04 13:11:29 +00:00
bar . Add ( 1 )
2020-07-19 05:55:14 +00:00
return nil
} )
bar . Done ( )
if err != nil {
return err
}
2020-11-30 06:25:10 +00:00
// At this point indexPacks contains only missing packs!
// missing packs that are not needed can be ignored
ignorePacks := restic . NewIDSet ( )
for id , p := range indexPack {
if p . usedBlobs == 0 && p . duplicateBlobs == 0 {
ignorePacks . Insert ( id )
stats . blobs . remove += p . unusedBlobs
stats . size . remove += p . unusedSize
delete ( indexPack , id )
}
}
2020-07-19 05:55:14 +00:00
if len ( indexPack ) != 0 {
2021-01-31 10:31:27 +00:00
Warnf ( "The index references %d needed pack files which are missing from the repository:\n" , len ( indexPack ) )
for id := range indexPack {
Warnf ( " %v\n" , id )
}
2020-07-19 05:55:14 +00:00
return errorPacksMissing
}
2020-11-30 06:25:10 +00:00
if len ( ignorePacks ) != 0 {
2021-01-28 20:42:26 +00:00
Warnf ( "Missing but unneeded pack files are referenced in the index, will be repaired\n" )
for id := range ignorePacks {
Warnf ( "will forget missing pack file %v\n" , id )
}
2020-11-30 06:25:10 +00:00
}
2020-07-19 05:55:14 +00:00
2020-11-03 09:53:38 +00:00
// calculate limit for number of unused bytes in the repo after repacking
maxUnusedSizeAfter := opts . maxUnusedBytes ( stats . size . used )
2016-09-12 12:26:47 +00:00
2020-07-19 05:55:14 +00:00
// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
// This is equivalent to sorting by unused / total space.
// Instead of unused[i] / used[i] > unused[j] / used[j] we use
// unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64
2021-01-15 15:42:04 +00:00
// Morover duplicates and packs containing trees are sorted to the beginning
2020-07-19 05:55:14 +00:00
sort . Slice ( repackCandidates , func ( i , j int ) bool {
pi := repackCandidates [ i ] . packInfo
pj := repackCandidates [ j ] . packInfo
switch {
case pi . duplicateBlobs > 0 && pj . duplicateBlobs == 0 :
return true
case pj . duplicateBlobs > 0 && pi . duplicateBlobs == 0 :
return false
2021-01-15 15:42:04 +00:00
case pi . tpe != restic . DataBlob && pj . tpe == restic . DataBlob :
2020-07-19 05:55:14 +00:00
return true
2021-01-15 15:42:04 +00:00
case pj . tpe != restic . DataBlob && pi . tpe == restic . DataBlob :
2020-07-19 05:55:14 +00:00
return false
2016-08-25 20:35:22 +00:00
}
2020-07-19 05:55:14 +00:00
return pi . unusedSize * pj . usedSize > pj . unusedSize * pi . usedSize
} )
2016-08-25 20:35:22 +00:00
2020-11-03 12:27:53 +00:00
repack := func ( id restic . ID , p packInfo ) {
repackPacks . Insert ( id )
stats . blobs . repack += p . unusedBlobs + p . duplicateBlobs + p . usedBlobs
stats . size . repack += p . unusedSize + p . usedSize
stats . blobs . repackrm += p . unusedBlobs
stats . size . repackrm += p . unusedSize
}
2020-07-19 05:55:14 +00:00
for _ , p := range repackCandidates {
reachedUnusedSizeAfter := ( stats . size . unused - stats . size . remove - stats . size . repackrm < maxUnusedSizeAfter )
2021-12-07 19:45:36 +00:00
reachedRepackSize := stats . size . repack + p . unusedSize + p . usedSize >= opts . MaxRepackBytes
2020-11-03 12:28:21 +00:00
2020-07-19 05:55:14 +00:00
switch {
2021-01-29 21:25:41 +00:00
case reachedRepackSize :
keep ( p . packInfo )
2022-04-10 09:57:01 +00:00
case p . duplicateBlobs > 0 , p . tpe != restic . DataBlob , p . uncompressed :
// repacking duplicates/non-data/uncompressed-trees is only limited by repackSize
2020-07-19 05:55:14 +00:00
repack ( p . ID , p . packInfo )
2021-01-29 21:25:41 +00:00
case reachedUnusedSizeAfter :
2020-07-19 05:55:14 +00:00
// for all other packs stop repacking if tolerated unused size is reached.
2021-01-29 21:24:23 +00:00
keep ( p . packInfo )
2016-08-25 20:35:22 +00:00
2020-07-19 05:55:14 +00:00
default :
repack ( p . ID , p . packInfo )
2016-08-25 20:35:22 +00:00
}
2020-07-19 05:55:14 +00:00
}
2016-08-25 20:35:22 +00:00
2020-07-19 05:55:14 +00:00
// if all duplicates are repacked, print out correct statistics
if repackAllPacksWithDuplicates {
stats . blobs . repackrm += stats . blobs . duplicate
stats . size . repackrm += stats . size . duplicate
2016-08-25 20:35:22 +00:00
}
2022-01-06 21:15:15 +00:00
Verboseff ( "\nused: %10d blobs / %s\n" , stats . blobs . used , formatBytes ( stats . size . used ) )
2020-07-19 05:55:14 +00:00
if stats . blobs . duplicate > 0 {
2022-01-06 21:15:15 +00:00
Verboseff ( "duplicates: %10d blobs / %s\n" , stats . blobs . duplicate , formatBytes ( stats . size . duplicate ) )
2020-07-19 05:55:14 +00:00
}
2022-01-06 21:15:15 +00:00
Verboseff ( "unused: %10d blobs / %s\n" , stats . blobs . unused , formatBytes ( stats . size . unused ) )
2020-07-19 05:55:14 +00:00
if stats . size . unref > 0 {
2022-01-06 21:15:15 +00:00
Verboseff ( "unreferenced: %s\n" , formatBytes ( stats . size . unref ) )
2020-07-19 05:55:14 +00:00
}
totalBlobs := stats . blobs . used + stats . blobs . unused + stats . blobs . duplicate
totalSize := stats . size . used + stats . size . duplicate + stats . size . unused + stats . size . unref
2020-11-18 21:30:22 +00:00
unusedSize := stats . size . duplicate + stats . size . unused
2022-01-06 21:15:15 +00:00
Verboseff ( "total: %10d blobs / %s\n" , totalBlobs , formatBytes ( totalSize ) )
2020-11-18 21:30:22 +00:00
Verboseff ( "unused size: %s of total size\n" , formatPercent ( unusedSize , totalSize ) )
2020-07-19 05:55:14 +00:00
2022-01-06 21:15:15 +00:00
Verbosef ( "\nto repack: %10d blobs / %s\n" , stats . blobs . repack , formatBytes ( stats . size . repack ) )
Verbosef ( "this removes: %10d blobs / %s\n" , stats . blobs . repackrm , formatBytes ( stats . size . repackrm ) )
Verbosef ( "to delete: %10d blobs / %s\n" , stats . blobs . remove , formatBytes ( stats . size . remove + stats . size . unref ) )
2020-07-19 05:55:14 +00:00
totalPruneSize := stats . size . remove + stats . size . repackrm + stats . size . unref
2022-01-06 21:15:15 +00:00
Verbosef ( "total prune: %10d blobs / %s\n" , stats . blobs . remove + stats . blobs . repackrm , formatBytes ( totalPruneSize ) )
Verbosef ( "remaining: %10d blobs / %s\n" , totalBlobs - ( stats . blobs . remove + stats . blobs . repackrm ) , formatBytes ( totalSize - totalPruneSize ) )
2020-11-18 21:30:22 +00:00
unusedAfter := unusedSize - stats . size . remove - stats . size . repackrm
2020-07-19 05:55:14 +00:00
Verbosef ( "unused size after prune: %s (%s of remaining size)\n" ,
formatBytes ( unusedAfter ) , formatPercent ( unusedAfter , totalSize - totalPruneSize ) )
Verbosef ( "\n" )
Verboseff ( "totally used packs: %10d\n" , stats . packs . used )
Verboseff ( "partly used packs: %10d\n" , stats . packs . partlyUsed )
Verboseff ( "unused packs: %10d\n\n" , stats . packs . unused )
2022-01-06 21:15:15 +00:00
Verboseff ( "to keep: %10d packs\n" , stats . packs . keep )
Verboseff ( "to repack: %10d packs\n" , len ( repackPacks ) )
Verboseff ( "to delete: %10d packs\n" , len ( removePacks ) )
2020-07-19 05:55:14 +00:00
if len ( removePacksFirst ) > 0 {
2022-01-06 21:15:15 +00:00
Verboseff ( "to delete: %10d unreferenced packs\n\n" , len ( removePacksFirst ) )
2020-07-19 05:55:14 +00:00
}
2016-08-04 17:42:40 +00:00
2020-07-19 05:55:14 +00:00
if opts . DryRun {
if ! gopts . JSON && gopts . verbosity >= 2 {
if len ( removePacksFirst ) > 0 {
Printf ( "Would have removed the following unreferenced packs:\n%v\n\n" , removePacksFirst )
}
Printf ( "Would have repacked and removed the following packs:\n%v\n\n" , repackPacks )
Printf ( "Would have removed the following no longer used packs:\n%v\n\n" , removePacks )
2017-03-04 16:38:34 +00:00
}
2020-07-19 05:55:14 +00:00
// Always quit here if DryRun was set!
return nil
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
// unreferenced packs can be safely deleted first
if len ( removePacksFirst ) != 0 {
Verbosef ( "deleting unreferenced packs\n" )
DeleteFiles ( gopts , repo , removePacksFirst , restic . PackFile )
}
2017-07-18 21:07:40 +00:00
2020-07-19 05:55:14 +00:00
if len ( repackPacks ) != 0 {
Verbosef ( "repacking packs\n" )
bar := newProgressMax ( ! gopts . Quiet , uint64 ( len ( repackPacks ) ) , "packs repacked" )
2021-09-11 22:03:41 +00:00
_ , err := repository . Repack ( ctx , repo , repo , repackPacks , keepBlobs , bar )
2020-11-08 20:03:59 +00:00
bar . Done ( )
2020-07-19 05:55:14 +00:00
if err != nil {
2020-12-19 13:22:59 +00:00
return errors . Fatalf ( "%s" , err )
2020-07-19 05:55:14 +00:00
}
2020-11-06 10:26:35 +00:00
2020-07-19 05:55:14 +00:00
// Also remove repacked packs
removePacks . Merge ( repackPacks )
2017-06-15 11:12:46 +00:00
}
2020-11-30 06:25:10 +00:00
if len ( ignorePacks ) == 0 {
ignorePacks = removePacks
} else {
ignorePacks . Merge ( removePacks )
}
2021-08-16 14:02:01 +00:00
if opts . unsafeRecovery {
Verbosef ( "deleting index files\n" )
indexFiles := repo . Index ( ) . ( * repository . MasterIndex ) . IDs ( )
err = DeleteFilesChecked ( gopts , repo , indexFiles , restic . IndexFile )
if err != nil {
return errors . Fatalf ( "%s" , err )
}
} else if len ( ignorePacks ) != 0 {
2020-11-30 06:25:10 +00:00
err = rebuildIndexFiles ( gopts , repo , ignorePacks , nil )
2020-10-10 16:54:13 +00:00
if err != nil {
2020-12-19 13:22:59 +00:00
return errors . Fatalf ( "%s" , err )
2020-07-19 05:55:14 +00:00
}
2020-11-30 06:25:10 +00:00
}
2020-07-19 05:55:14 +00:00
2020-11-30 06:25:10 +00:00
if len ( removePacks ) != 0 {
2020-07-19 05:55:14 +00:00
Verbosef ( "removing %d old packs\n" , len ( removePacks ) )
2020-08-16 09:16:38 +00:00
DeleteFiles ( gopts , repo , removePacks , restic . PackFile )
2016-08-25 20:35:22 +00:00
}
2021-08-16 14:02:01 +00:00
if opts . unsafeRecovery {
_ , err = writeIndexFiles ( gopts , repo , ignorePacks , nil )
if err != nil {
return errors . Fatalf ( "%s" , err )
}
}
2016-09-17 10:36:05 +00:00
Verbosef ( "done\n" )
2016-08-04 17:42:40 +00:00
return nil
}
2020-07-19 07:48:53 +00:00
2021-08-16 14:02:01 +00:00
func writeIndexFiles ( gopts GlobalOptions , repo restic . Repository , removePacks restic . IDSet , extraObsolete restic . IDs ) ( restic . IDSet , error ) {
2020-10-10 16:54:13 +00:00
Verbosef ( "rebuilding index\n" )
2020-12-05 12:59:18 +00:00
idx := ( repo . Index ( ) ) . ( * repository . MasterIndex )
packcount := uint64 ( len ( idx . Packs ( removePacks ) ) )
2020-10-10 16:54:13 +00:00
bar := newProgressMax ( ! gopts . Quiet , packcount , "packs processed" )
2020-12-05 12:59:18 +00:00
obsoleteIndexes , err := idx . Save ( gopts . ctx , repo , removePacks , extraObsolete , bar )
2020-11-04 13:11:29 +00:00
bar . Done ( )
2021-08-16 14:02:01 +00:00
return obsoleteIndexes , err
}
func rebuildIndexFiles ( gopts GlobalOptions , repo restic . Repository , removePacks restic . IDSet , extraObsolete restic . IDs ) error {
obsoleteIndexes , err := writeIndexFiles ( gopts , repo , removePacks , extraObsolete )
2020-10-10 16:54:13 +00:00
if err != nil {
return err
}
Verbosef ( "deleting obsolete index files\n" )
return DeleteFilesChecked ( gopts , repo , obsoleteIndexes , restic . IndexFile )
}
2020-12-06 04:22:27 +00:00
func getUsedBlobs ( gopts GlobalOptions , repo restic . Repository , ignoreSnapshots restic . IDSet ) ( usedBlobs restic . BlobSet , err error ) {
2020-07-19 07:48:53 +00:00
ctx := gopts . ctx
2020-12-06 04:22:27 +00:00
var snapshotTrees restic . IDs
Verbosef ( "loading all snapshots...\n" )
2021-11-06 00:14:24 +00:00
err = restic . ForAllSnapshots ( gopts . ctx , repo . Backend ( ) , repo , ignoreSnapshots ,
2020-12-06 04:22:27 +00:00
func ( id restic . ID , sn * restic . Snapshot , err error ) error {
if err != nil {
2022-05-23 20:32:59 +00:00
debug . Log ( "failed to load snapshot %v (error %v)" , id , err )
2020-12-06 04:22:27 +00:00
return err
}
2022-05-23 20:32:59 +00:00
debug . Log ( "add snapshot %v (tree %v)" , id , * sn . Tree )
2020-12-06 04:22:27 +00:00
snapshotTrees = append ( snapshotTrees , * sn . Tree )
return nil
} )
if err != nil {
2022-05-23 20:38:45 +00:00
return nil , errors . Fatalf ( "failed loading snapshot: %v" , err )
2020-12-06 04:22:27 +00:00
}
Verbosef ( "finding data that is still in use for %d snapshots\n" , len ( snapshotTrees ) )
2020-07-19 07:48:53 +00:00
usedBlobs = restic . NewBlobSet ( )
2020-12-06 04:22:27 +00:00
bar := newProgressMax ( ! gopts . Quiet , uint64 ( len ( snapshotTrees ) ) , "snapshots" )
2020-08-03 17:31:49 +00:00
defer bar . Done ( )
2020-07-19 07:48:53 +00:00
2020-11-07 13:16:04 +00:00
err = restic . FindUsedBlobs ( ctx , repo , snapshotTrees , usedBlobs , bar )
if err != nil {
if repo . Backend ( ) . IsNotExist ( err ) {
return nil , errors . Fatal ( "unable to load a tree from the repo: " + err . Error ( ) )
2020-07-19 07:48:53 +00:00
}
2020-11-07 13:16:04 +00:00
return nil , err
2020-07-19 07:48:53 +00:00
}
return usedBlobs , nil
}