forked from TrueCloudLab/restic
prune: Add unsafe option to recover from no free space
The new option allows prune to operate with nearly no scratch space by only removing no longer necessary pack files and first deleting the index before rebuilding it. By first deleting the index it becomes safe to just delete no longer necessary pack files. However, as a downside there's now the risk that the repository becomes inaccessible if prune fails. To recover from that problem a user might have to manually delete the repository index and then run (a full) `rebuild-index` again.
This commit is contained in:
parent
cf5cb673fb
commit
dbbeac7174
4 changed files with 92 additions and 11 deletions
9
changelog/unreleased/issue-1153
Normal file
9
changelog/unreleased/issue-1153
Normal file
|
@ -0,0 +1,9 @@
|
|||
Enhancement: Support pruning even after running out of disk space
|
||||
|
||||
When running out of disk space it was no longer possible to add or remove
|
||||
data from a repository. To help with recovering from such a deadlock, the
|
||||
prune command now supports an `--unsafe-recover-no-free-space` option to
|
||||
recover from such situations. Make sure to read the documentation first!
|
||||
|
||||
https://github.com/restic/restic/issues/1153
|
||||
https://github.com/restic/restic/pull/3481
|
|
@ -39,7 +39,10 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
|||
|
||||
// PruneOptions collects all options for the cleanup command.
|
||||
type PruneOptions struct {
|
||||
DryRun bool
|
||||
DryRun bool
|
||||
UnsafeNoSpaceRecovery string
|
||||
|
||||
unsafeRecovery bool
|
||||
|
||||
MaxUnused string
|
||||
maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
|
||||
|
@ -56,6 +59,7 @@ func init() {
|
|||
cmdRoot.AddCommand(cmdPrune)
|
||||
f := cmdPrune.Flags()
|
||||
f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
|
||||
f.StringVarP(&pruneOptions.UnsafeNoSpaceRecovery, "unsafe-recover-no-free-space", "", "", "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first.")
|
||||
addPruneOptions(cmdPrune)
|
||||
}
|
||||
|
||||
|
@ -75,6 +79,10 @@ func verifyPruneOptions(opts *PruneOptions) error {
|
|||
}
|
||||
opts.MaxRepackBytes = uint64(size)
|
||||
}
|
||||
if opts.UnsafeNoSpaceRecovery != "" {
|
||||
// prevent repacking data to make sure users cannot get stuck.
|
||||
opts.MaxRepackBytes = 0
|
||||
}
|
||||
|
||||
maxUnused := strings.TrimSpace(opts.MaxUnused)
|
||||
if maxUnused == "" {
|
||||
|
@ -136,6 +144,14 @@ func runPrune(opts PruneOptions, gopts GlobalOptions) error {
|
|||
return errors.Fatal("prune requires a backend connection limit of at least two")
|
||||
}
|
||||
|
||||
if opts.UnsafeNoSpaceRecovery != "" {
|
||||
repoID := repo.Config().ID
|
||||
if opts.UnsafeNoSpaceRecovery != repoID {
|
||||
return errors.Fatalf("must pass id '%s' to --unsafe-recover-no-free-space", repoID)
|
||||
}
|
||||
opts.unsafeRecovery = true
|
||||
}
|
||||
|
||||
lock, err := lockRepoExclusive(gopts.ctx, repo)
|
||||
defer unlockRepo(lock)
|
||||
if err != nil {
|
||||
|
@ -522,7 +538,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
ignorePacks.Merge(removePacks)
|
||||
}
|
||||
|
||||
if len(ignorePacks) != 0 {
|
||||
if opts.unsafeRecovery {
|
||||
Verbosef("deleting index files\n")
|
||||
indexFiles := repo.Index().(*repository.MasterIndex).IDs()
|
||||
err = DeleteFilesChecked(gopts, repo, indexFiles, restic.IndexFile)
|
||||
if err != nil {
|
||||
return errors.Fatalf("%s", err)
|
||||
}
|
||||
} else if len(ignorePacks) != 0 {
|
||||
err = rebuildIndexFiles(gopts, repo, ignorePacks, nil)
|
||||
if err != nil {
|
||||
return errors.Fatalf("%s", err)
|
||||
|
@ -534,11 +557,18 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
||||
}
|
||||
|
||||
if opts.unsafeRecovery {
|
||||
_, err = writeIndexFiles(gopts, repo, ignorePacks, nil)
|
||||
if err != nil {
|
||||
return errors.Fatalf("%s", err)
|
||||
}
|
||||
}
|
||||
|
||||
Verbosef("done\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error {
|
||||
func writeIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) (restic.IDSet, error) {
|
||||
Verbosef("rebuilding index\n")
|
||||
|
||||
idx := (repo.Index()).(*repository.MasterIndex)
|
||||
|
@ -546,6 +576,11 @@ func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks
|
|||
bar := newProgressMax(!gopts.Quiet, packcount, "packs processed")
|
||||
obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar)
|
||||
bar.Done()
|
||||
return obsoleteIndexes, err
|
||||
}
|
||||
|
||||
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error {
|
||||
obsoleteIndexes, err := writeIndexFiles(gopts, repo, removePacks, extraObsolete)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -1573,26 +1573,35 @@ func TestCheckRestoreNoLock(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestPrune(t *testing.T) {
|
||||
t.Run("0", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "0%"}
|
||||
testPruneVariants(t, false)
|
||||
testPruneVariants(t, true)
|
||||
}
|
||||
|
||||
func testPruneVariants(t *testing.T, unsafeNoSpaceRecovery bool) {
|
||||
suffix := ""
|
||||
if unsafeNoSpaceRecovery {
|
||||
suffix = "-recovery"
|
||||
}
|
||||
t.Run("0"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "0%", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("50", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "50%"}
|
||||
t.Run("50"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "50%", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("unlimited", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "unlimited"}
|
||||
t.Run("unlimited"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "unlimited", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("CachableOnly", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true}
|
||||
t.Run("CachableOnly"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true, unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
|
|
@ -444,3 +444,31 @@ The ``prune`` command accepts the following options:
|
|||
- ``--dry-run`` only show what ``prune`` would do.
|
||||
|
||||
- ``--verbose`` increased verbosity shows additional statistics for ``prune``.
|
||||
|
||||
|
||||
Recovering from "no free space" errors
|
||||
**************************************
|
||||
|
||||
In some cases when a repository has grown large enough to fill up all disk space or the
|
||||
allocated quota, then ``prune`` might fail to free space. ``prune`` works in such a way
|
||||
that a repository remains usable no matter at which point the command is interrupted.
|
||||
However, this also means that ``prune`` requires some scratch space to work.
|
||||
|
||||
In most cases it is sufficient to instruct ``prune`` to use as little scratch space as
|
||||
possible by running it as ``prune --max-repack-size 0``. Note that for restic versions
|
||||
before 0.13.0 ``prune --max-repack-size 1`` must be used. Obviously, this can only work
|
||||
if several snapshots have been removed using ``forget`` before. This then allows the
|
||||
``prune`` command to actually remove data from the repository. If the command succeeds,
|
||||
but there is still little free space, then remove a few more snapshots and run ``prune`` again.
|
||||
|
||||
If ``prune`` fails to complete, then ``prune --unsafe-recover-no-free-space SOME-ID``
|
||||
is available as a method of last resort. It allows prune to work with little to no free
|
||||
space. However, a **failed** ``prune`` run can cause the repository to become
|
||||
**temporarily unusable**. Therefore, make sure that you have a stable connection to the
|
||||
repository storage, before running this command. In case the command fails, it may become
|
||||
necessary to manually remove all files from the `index/` folder of the repository and
|
||||
run `rebuild-index` afterwards.
|
||||
|
||||
To prevent accidental usages of the ``--unsafe-recover-no-free-space`` option it is
|
||||
necessary to first run ``prune --unsafe-recover-no-free-space SOME-ID`` and then replace
|
||||
``SOME-ID`` with the requested ID.
|
||||
|
|
Loading…
Reference in a new issue