forked from TrueCloudLab/restic
Merge pull request #3481 from MichaelEischer/recover-enospace
Recover from no free space errors
This commit is contained in:
commit
ffbd48c0c6
6 changed files with 120 additions and 38 deletions
9
changelog/unreleased/issue-1153
Normal file
9
changelog/unreleased/issue-1153
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
Enhancement: Support pruning even after running out of disk space
|
||||||
|
|
||||||
|
When running out of disk space it was no longer possible to add or remove
|
||||||
|
data from a repository. To help with recovering from such a deadlock, the
|
||||||
|
prune command now supports an `--unsafe-recover-no-free-space` option to
|
||||||
|
recover from such situations. Make sure to read the documentation first!
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/1153
|
||||||
|
https://github.com/restic/restic/pull/3481
|
|
@ -39,7 +39,10 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
||||||
|
|
||||||
// PruneOptions collects all options for the cleanup command.
|
// PruneOptions collects all options for the cleanup command.
|
||||||
type PruneOptions struct {
|
type PruneOptions struct {
|
||||||
DryRun bool
|
DryRun bool
|
||||||
|
UnsafeNoSpaceRecovery string
|
||||||
|
|
||||||
|
unsafeRecovery bool
|
||||||
|
|
||||||
MaxUnused string
|
MaxUnused string
|
||||||
maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
|
maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
|
||||||
|
@ -56,6 +59,7 @@ func init() {
|
||||||
cmdRoot.AddCommand(cmdPrune)
|
cmdRoot.AddCommand(cmdPrune)
|
||||||
f := cmdPrune.Flags()
|
f := cmdPrune.Flags()
|
||||||
f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
|
f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
|
||||||
|
f.StringVarP(&pruneOptions.UnsafeNoSpaceRecovery, "unsafe-recover-no-free-space", "", "", "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first.")
|
||||||
addPruneOptions(cmdPrune)
|
addPruneOptions(cmdPrune)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,6 +79,10 @@ func verifyPruneOptions(opts *PruneOptions) error {
|
||||||
}
|
}
|
||||||
opts.MaxRepackBytes = uint64(size)
|
opts.MaxRepackBytes = uint64(size)
|
||||||
}
|
}
|
||||||
|
if opts.UnsafeNoSpaceRecovery != "" {
|
||||||
|
// prevent repacking data to make sure users cannot get stuck.
|
||||||
|
opts.MaxRepackBytes = 0
|
||||||
|
}
|
||||||
|
|
||||||
maxUnused := strings.TrimSpace(opts.MaxUnused)
|
maxUnused := strings.TrimSpace(opts.MaxUnused)
|
||||||
if maxUnused == "" {
|
if maxUnused == "" {
|
||||||
|
@ -136,6 +144,14 @@ func runPrune(opts PruneOptions, gopts GlobalOptions) error {
|
||||||
return errors.Fatal("prune requires a backend connection limit of at least two")
|
return errors.Fatal("prune requires a backend connection limit of at least two")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts.UnsafeNoSpaceRecovery != "" {
|
||||||
|
repoID := repo.Config().ID
|
||||||
|
if opts.UnsafeNoSpaceRecovery != repoID {
|
||||||
|
return errors.Fatalf("must pass id '%s' to --unsafe-recover-no-free-space", repoID)
|
||||||
|
}
|
||||||
|
opts.unsafeRecovery = true
|
||||||
|
}
|
||||||
|
|
||||||
lock, err := lockRepoExclusive(gopts.ctx, repo)
|
lock, err := lockRepoExclusive(gopts.ctx, repo)
|
||||||
defer unlockRepo(lock)
|
defer unlockRepo(lock)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -522,7 +538,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
||||||
ignorePacks.Merge(removePacks)
|
ignorePacks.Merge(removePacks)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(ignorePacks) != 0 {
|
if opts.unsafeRecovery {
|
||||||
|
Verbosef("deleting index files\n")
|
||||||
|
indexFiles := repo.Index().(*repository.MasterIndex).IDs()
|
||||||
|
err = DeleteFilesChecked(gopts, repo, indexFiles, restic.IndexFile)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Fatalf("%s", err)
|
||||||
|
}
|
||||||
|
} else if len(ignorePacks) != 0 {
|
||||||
err = rebuildIndexFiles(gopts, repo, ignorePacks, nil)
|
err = rebuildIndexFiles(gopts, repo, ignorePacks, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Fatalf("%s", err)
|
return errors.Fatalf("%s", err)
|
||||||
|
@ -534,11 +557,18 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
||||||
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts.unsafeRecovery {
|
||||||
|
_, err = writeIndexFiles(gopts, repo, ignorePacks, nil)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Fatalf("%s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Verbosef("done\n")
|
Verbosef("done\n")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error {
|
func writeIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) (restic.IDSet, error) {
|
||||||
Verbosef("rebuilding index\n")
|
Verbosef("rebuilding index\n")
|
||||||
|
|
||||||
idx := (repo.Index()).(*repository.MasterIndex)
|
idx := (repo.Index()).(*repository.MasterIndex)
|
||||||
|
@ -546,6 +576,11 @@ func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks
|
||||||
bar := newProgressMax(!gopts.Quiet, packcount, "packs processed")
|
bar := newProgressMax(!gopts.Quiet, packcount, "packs processed")
|
||||||
obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar)
|
obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar)
|
||||||
bar.Done()
|
bar.Done()
|
||||||
|
return obsoleteIndexes, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error {
|
||||||
|
obsoleteIndexes, err := writeIndexFiles(gopts, repo, removePacks, extraObsolete)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -1573,26 +1573,35 @@ func TestCheckRestoreNoLock(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPrune(t *testing.T) {
|
func TestPrune(t *testing.T) {
|
||||||
t.Run("0", func(t *testing.T) {
|
testPruneVariants(t, false)
|
||||||
opts := PruneOptions{MaxUnused: "0%"}
|
testPruneVariants(t, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testPruneVariants(t *testing.T, unsafeNoSpaceRecovery bool) {
|
||||||
|
suffix := ""
|
||||||
|
if unsafeNoSpaceRecovery {
|
||||||
|
suffix = "-recovery"
|
||||||
|
}
|
||||||
|
t.Run("0"+suffix, func(t *testing.T) {
|
||||||
|
opts := PruneOptions{MaxUnused: "0%", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||||
checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
|
checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
|
||||||
testPrune(t, opts, checkOpts)
|
testPrune(t, opts, checkOpts)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("50", func(t *testing.T) {
|
t.Run("50"+suffix, func(t *testing.T) {
|
||||||
opts := PruneOptions{MaxUnused: "50%"}
|
opts := PruneOptions{MaxUnused: "50%", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||||
checkOpts := CheckOptions{ReadData: true}
|
checkOpts := CheckOptions{ReadData: true}
|
||||||
testPrune(t, opts, checkOpts)
|
testPrune(t, opts, checkOpts)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("unlimited", func(t *testing.T) {
|
t.Run("unlimited"+suffix, func(t *testing.T) {
|
||||||
opts := PruneOptions{MaxUnused: "unlimited"}
|
opts := PruneOptions{MaxUnused: "unlimited", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||||
checkOpts := CheckOptions{ReadData: true}
|
checkOpts := CheckOptions{ReadData: true}
|
||||||
testPrune(t, opts, checkOpts)
|
testPrune(t, opts, checkOpts)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("CachableOnly", func(t *testing.T) {
|
t.Run("CachableOnly"+suffix, func(t *testing.T) {
|
||||||
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true}
|
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true, unsafeRecovery: unsafeNoSpaceRecovery}
|
||||||
checkOpts := CheckOptions{ReadData: true}
|
checkOpts := CheckOptions{ReadData: true}
|
||||||
testPrune(t, opts, checkOpts)
|
testPrune(t, opts, checkOpts)
|
||||||
})
|
})
|
||||||
|
|
|
@ -444,3 +444,31 @@ The ``prune`` command accepts the following options:
|
||||||
- ``--dry-run`` only show what ``prune`` would do.
|
- ``--dry-run`` only show what ``prune`` would do.
|
||||||
|
|
||||||
- ``--verbose`` increased verbosity shows additional statistics for ``prune``.
|
- ``--verbose`` increased verbosity shows additional statistics for ``prune``.
|
||||||
|
|
||||||
|
|
||||||
|
Recovering from "no free space" errors
|
||||||
|
**************************************
|
||||||
|
|
||||||
|
In some cases when a repository has grown large enough to fill up all disk space or the
|
||||||
|
allocated quota, then ``prune`` might fail to free space. ``prune`` works in such a way
|
||||||
|
that a repository remains usable no matter at which point the command is interrupted.
|
||||||
|
However, this also means that ``prune`` requires some scratch space to work.
|
||||||
|
|
||||||
|
In most cases it is sufficient to instruct ``prune`` to use as little scratch space as
|
||||||
|
possible by running it as ``prune --max-repack-size 0``. Note that for restic versions
|
||||||
|
before 0.13.0 ``prune --max-repack-size 1`` must be used. Obviously, this can only work
|
||||||
|
if several snapshots have been removed using ``forget`` before. This then allows the
|
||||||
|
``prune`` command to actually remove data from the repository. If the command succeeds,
|
||||||
|
but there is still little free space, then remove a few more snapshots and run ``prune`` again.
|
||||||
|
|
||||||
|
If ``prune`` fails to complete, then ``prune --unsafe-recover-no-free-space SOME-ID``
|
||||||
|
is available as a method of last resort. It allows prune to work with little to no free
|
||||||
|
space. However, a **failed** ``prune`` run can cause the repository to become
|
||||||
|
**temporarily unusable**. Therefore, make sure that you have a stable connection to the
|
||||||
|
repository storage, before running this command. In case the command fails, it may become
|
||||||
|
necessary to manually remove all files from the `index/` folder of the repository and
|
||||||
|
run `rebuild-index` afterwards.
|
||||||
|
|
||||||
|
To prevent accidental usages of the ``--unsafe-recover-no-free-space`` option it is
|
||||||
|
necessary to first run ``prune --unsafe-recover-no-free-space SOME-ID`` and then replace
|
||||||
|
``SOME-ID`` with the requested ID.
|
||||||
|
|
|
@ -116,6 +116,28 @@ func (mi *MasterIndex) IsMixedPack(packID restic.ID) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IDs returns the IDs of all indexes contained in the index.
|
||||||
|
func (mi *MasterIndex) IDs() restic.IDSet {
|
||||||
|
mi.idxMutex.RLock()
|
||||||
|
defer mi.idxMutex.RUnlock()
|
||||||
|
|
||||||
|
ids := restic.NewIDSet()
|
||||||
|
for _, idx := range mi.idx {
|
||||||
|
if !idx.Final() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
indexIDs, err := idx.IDs()
|
||||||
|
if err != nil {
|
||||||
|
debug.Log("not using index, ID() returned error %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, id := range indexIDs {
|
||||||
|
ids.Insert(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ids
|
||||||
|
}
|
||||||
|
|
||||||
// Packs returns all packs that are covered by the index.
|
// Packs returns all packs that are covered by the index.
|
||||||
// If packBlacklist is given, those packs are only contained in the
|
// If packBlacklist is given, those packs are only contained in the
|
||||||
// resulting IDSet if they are contained in a non-final (newly written) index.
|
// resulting IDSet if they are contained in a non-final (newly written) index.
|
||||||
|
|
|
@ -569,20 +569,7 @@ func (r *Repository) Index() restic.MasterIndex {
|
||||||
// SetIndex instructs the repository to use the given index.
|
// SetIndex instructs the repository to use the given index.
|
||||||
func (r *Repository) SetIndex(i restic.MasterIndex) error {
|
func (r *Repository) SetIndex(i restic.MasterIndex) error {
|
||||||
r.idx = i.(*MasterIndex)
|
r.idx = i.(*MasterIndex)
|
||||||
|
return r.PrepareCache()
|
||||||
ids := restic.NewIDSet()
|
|
||||||
for _, idx := range r.idx.All() {
|
|
||||||
indexIDs, err := idx.IDs()
|
|
||||||
if err != nil {
|
|
||||||
debug.Log("not using index, ID() returned error %v", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, id := range indexIDs {
|
|
||||||
ids.Insert(id)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return r.PrepareCache(ids)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SaveIndex saves an index in the repository.
|
// SaveIndex saves an index in the repository.
|
||||||
|
@ -628,20 +615,16 @@ func (r *Repository) SaveFullIndex(ctx context.Context) error {
|
||||||
func (r *Repository) LoadIndex(ctx context.Context) error {
|
func (r *Repository) LoadIndex(ctx context.Context) error {
|
||||||
debug.Log("Loading index")
|
debug.Log("Loading index")
|
||||||
|
|
||||||
validIndex := restic.NewIDSet()
|
|
||||||
err := ForAllIndexes(ctx, r, func(id restic.ID, idx *Index, oldFormat bool, err error) error {
|
err := ForAllIndexes(ctx, r, func(id restic.ID, idx *Index, oldFormat bool, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ids, err := idx.IDs()
|
_, err = idx.IDs()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, id := range ids {
|
|
||||||
validIndex.Insert(id)
|
|
||||||
}
|
|
||||||
r.idx.Insert(idx)
|
r.idx.Insert(idx)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
@ -667,7 +650,7 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove index files from the cache which have been removed in the repo
|
// remove index files from the cache which have been removed in the repo
|
||||||
return r.PrepareCache(validIndex)
|
return r.PrepareCache()
|
||||||
}
|
}
|
||||||
|
|
||||||
const listPackParallelism = 10
|
const listPackParallelism = 10
|
||||||
|
@ -739,11 +722,12 @@ func (r *Repository) CreateIndexFromPacks(ctx context.Context, packsize map[rest
|
||||||
|
|
||||||
// PrepareCache initializes the local cache. indexIDs is the list of IDs of
|
// PrepareCache initializes the local cache. indexIDs is the list of IDs of
|
||||||
// index files still present in the repo.
|
// index files still present in the repo.
|
||||||
func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
|
func (r *Repository) PrepareCache() error {
|
||||||
if r.Cache == nil {
|
if r.Cache == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
indexIDs := r.idx.IDs()
|
||||||
debug.Log("prepare cache with %d index files", len(indexIDs))
|
debug.Log("prepare cache with %d index files", len(indexIDs))
|
||||||
|
|
||||||
// clear old index files
|
// clear old index files
|
||||||
|
@ -752,12 +736,7 @@ func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
|
||||||
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
|
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
packs := restic.NewIDSet()
|
packs := r.idx.Packs(restic.NewIDSet())
|
||||||
for _, idx := range r.idx.All() {
|
|
||||||
for id := range idx.Packs() {
|
|
||||||
packs.Insert(id)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// clear old packs
|
// clear old packs
|
||||||
err = r.Cache.Clear(restic.PackFile, packs)
|
err = r.Cache.Clear(restic.PackFile, packs)
|
||||||
|
|
Loading…
Reference in a new issue