Merge pull request #3481 from MichaelEischer/recover-enospace

Recover from no free space errors
This commit is contained in:
Alexander Neumann 2022-04-30 20:22:26 +02:00 committed by GitHub
commit ffbd48c0c6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 120 additions and 38 deletions

View file

@ -0,0 +1,9 @@
Enhancement: Support pruning even after running out of disk space
When running out of disk space it was no longer possible to add or remove
data from a repository. To help with recovering from such a deadlock, the
prune command now supports an `--unsafe-recover-no-free-space` option to
recover from such situations. Make sure to read the documentation first!
https://github.com/restic/restic/issues/1153
https://github.com/restic/restic/pull/3481

View file

@ -40,6 +40,9 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
// PruneOptions collects all options for the cleanup command. // PruneOptions collects all options for the cleanup command.
type PruneOptions struct { type PruneOptions struct {
DryRun bool DryRun bool
UnsafeNoSpaceRecovery string
unsafeRecovery bool
MaxUnused string MaxUnused string
maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
@ -56,6 +59,7 @@ func init() {
cmdRoot.AddCommand(cmdPrune) cmdRoot.AddCommand(cmdPrune)
f := cmdPrune.Flags() f := cmdPrune.Flags()
f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done") f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
f.StringVarP(&pruneOptions.UnsafeNoSpaceRecovery, "unsafe-recover-no-free-space", "", "", "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first.")
addPruneOptions(cmdPrune) addPruneOptions(cmdPrune)
} }
@ -75,6 +79,10 @@ func verifyPruneOptions(opts *PruneOptions) error {
} }
opts.MaxRepackBytes = uint64(size) opts.MaxRepackBytes = uint64(size)
} }
if opts.UnsafeNoSpaceRecovery != "" {
// prevent repacking data to make sure users cannot get stuck.
opts.MaxRepackBytes = 0
}
maxUnused := strings.TrimSpace(opts.MaxUnused) maxUnused := strings.TrimSpace(opts.MaxUnused)
if maxUnused == "" { if maxUnused == "" {
@ -136,6 +144,14 @@ func runPrune(opts PruneOptions, gopts GlobalOptions) error {
return errors.Fatal("prune requires a backend connection limit of at least two") return errors.Fatal("prune requires a backend connection limit of at least two")
} }
if opts.UnsafeNoSpaceRecovery != "" {
repoID := repo.Config().ID
if opts.UnsafeNoSpaceRecovery != repoID {
return errors.Fatalf("must pass id '%s' to --unsafe-recover-no-free-space", repoID)
}
opts.unsafeRecovery = true
}
lock, err := lockRepoExclusive(gopts.ctx, repo) lock, err := lockRepoExclusive(gopts.ctx, repo)
defer unlockRepo(lock) defer unlockRepo(lock)
if err != nil { if err != nil {
@ -522,7 +538,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
ignorePacks.Merge(removePacks) ignorePacks.Merge(removePacks)
} }
if len(ignorePacks) != 0 { if opts.unsafeRecovery {
Verbosef("deleting index files\n")
indexFiles := repo.Index().(*repository.MasterIndex).IDs()
err = DeleteFilesChecked(gopts, repo, indexFiles, restic.IndexFile)
if err != nil {
return errors.Fatalf("%s", err)
}
} else if len(ignorePacks) != 0 {
err = rebuildIndexFiles(gopts, repo, ignorePacks, nil) err = rebuildIndexFiles(gopts, repo, ignorePacks, nil)
if err != nil { if err != nil {
return errors.Fatalf("%s", err) return errors.Fatalf("%s", err)
@ -534,11 +557,18 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
DeleteFiles(gopts, repo, removePacks, restic.PackFile) DeleteFiles(gopts, repo, removePacks, restic.PackFile)
} }
if opts.unsafeRecovery {
_, err = writeIndexFiles(gopts, repo, ignorePacks, nil)
if err != nil {
return errors.Fatalf("%s", err)
}
}
Verbosef("done\n") Verbosef("done\n")
return nil return nil
} }
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error { func writeIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) (restic.IDSet, error) {
Verbosef("rebuilding index\n") Verbosef("rebuilding index\n")
idx := (repo.Index()).(*repository.MasterIndex) idx := (repo.Index()).(*repository.MasterIndex)
@ -546,6 +576,11 @@ func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks
bar := newProgressMax(!gopts.Quiet, packcount, "packs processed") bar := newProgressMax(!gopts.Quiet, packcount, "packs processed")
obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar) obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar)
bar.Done() bar.Done()
return obsoleteIndexes, err
}
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error {
obsoleteIndexes, err := writeIndexFiles(gopts, repo, removePacks, extraObsolete)
if err != nil { if err != nil {
return err return err
} }

View file

@ -1573,26 +1573,35 @@ func TestCheckRestoreNoLock(t *testing.T) {
} }
func TestPrune(t *testing.T) { func TestPrune(t *testing.T) {
t.Run("0", func(t *testing.T) { testPruneVariants(t, false)
opts := PruneOptions{MaxUnused: "0%"} testPruneVariants(t, true)
}
func testPruneVariants(t *testing.T, unsafeNoSpaceRecovery bool) {
suffix := ""
if unsafeNoSpaceRecovery {
suffix = "-recovery"
}
t.Run("0"+suffix, func(t *testing.T) {
opts := PruneOptions{MaxUnused: "0%", unsafeRecovery: unsafeNoSpaceRecovery}
checkOpts := CheckOptions{ReadData: true, CheckUnused: true} checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
testPrune(t, opts, checkOpts) testPrune(t, opts, checkOpts)
}) })
t.Run("50", func(t *testing.T) { t.Run("50"+suffix, func(t *testing.T) {
opts := PruneOptions{MaxUnused: "50%"} opts := PruneOptions{MaxUnused: "50%", unsafeRecovery: unsafeNoSpaceRecovery}
checkOpts := CheckOptions{ReadData: true} checkOpts := CheckOptions{ReadData: true}
testPrune(t, opts, checkOpts) testPrune(t, opts, checkOpts)
}) })
t.Run("unlimited", func(t *testing.T) { t.Run("unlimited"+suffix, func(t *testing.T) {
opts := PruneOptions{MaxUnused: "unlimited"} opts := PruneOptions{MaxUnused: "unlimited", unsafeRecovery: unsafeNoSpaceRecovery}
checkOpts := CheckOptions{ReadData: true} checkOpts := CheckOptions{ReadData: true}
testPrune(t, opts, checkOpts) testPrune(t, opts, checkOpts)
}) })
t.Run("CachableOnly", func(t *testing.T) { t.Run("CachableOnly"+suffix, func(t *testing.T) {
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true} opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true, unsafeRecovery: unsafeNoSpaceRecovery}
checkOpts := CheckOptions{ReadData: true} checkOpts := CheckOptions{ReadData: true}
testPrune(t, opts, checkOpts) testPrune(t, opts, checkOpts)
}) })

View file

@ -444,3 +444,31 @@ The ``prune`` command accepts the following options:
- ``--dry-run`` only show what ``prune`` would do. - ``--dry-run`` only show what ``prune`` would do.
- ``--verbose`` increased verbosity shows additional statistics for ``prune``. - ``--verbose`` increased verbosity shows additional statistics for ``prune``.
Recovering from "no free space" errors
**************************************
In some cases when a repository has grown large enough to fill up all disk space or the
allocated quota, then ``prune`` might fail to free space. ``prune`` works in such a way
that a repository remains usable no matter at which point the command is interrupted.
However, this also means that ``prune`` requires some scratch space to work.
In most cases it is sufficient to instruct ``prune`` to use as little scratch space as
possible by running it as ``prune --max-repack-size 0``. Note that for restic versions
before 0.13.0 ``prune --max-repack-size 1`` must be used. Obviously, this can only work
if several snapshots have been removed using ``forget`` before. This then allows the
``prune`` command to actually remove data from the repository. If the command succeeds,
but there is still little free space, then remove a few more snapshots and run ``prune`` again.
If ``prune`` fails to complete, then ``prune --unsafe-recover-no-free-space SOME-ID``
is available as a method of last resort. It allows prune to work with little to no free
space. However, a **failed** ``prune`` run can cause the repository to become
**temporarily unusable**. Therefore, make sure that you have a stable connection to the
repository storage, before running this command. In case the command fails, it may become
necessary to manually remove all files from the `index/` folder of the repository and
run `rebuild-index` afterwards.
To prevent accidental usages of the ``--unsafe-recover-no-free-space`` option it is
necessary to first run ``prune --unsafe-recover-no-free-space SOME-ID`` and then replace
``SOME-ID`` with the requested ID.

View file

@ -116,6 +116,28 @@ func (mi *MasterIndex) IsMixedPack(packID restic.ID) bool {
return false return false
} }
// IDs returns the IDs of all indexes contained in the index.
func (mi *MasterIndex) IDs() restic.IDSet {
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
ids := restic.NewIDSet()
for _, idx := range mi.idx {
if !idx.Final() {
continue
}
indexIDs, err := idx.IDs()
if err != nil {
debug.Log("not using index, ID() returned error %v", err)
continue
}
for _, id := range indexIDs {
ids.Insert(id)
}
}
return ids
}
// Packs returns all packs that are covered by the index. // Packs returns all packs that are covered by the index.
// If packBlacklist is given, those packs are only contained in the // If packBlacklist is given, those packs are only contained in the
// resulting IDSet if they are contained in a non-final (newly written) index. // resulting IDSet if they are contained in a non-final (newly written) index.

View file

@ -569,20 +569,7 @@ func (r *Repository) Index() restic.MasterIndex {
// SetIndex instructs the repository to use the given index. // SetIndex instructs the repository to use the given index.
func (r *Repository) SetIndex(i restic.MasterIndex) error { func (r *Repository) SetIndex(i restic.MasterIndex) error {
r.idx = i.(*MasterIndex) r.idx = i.(*MasterIndex)
return r.PrepareCache()
ids := restic.NewIDSet()
for _, idx := range r.idx.All() {
indexIDs, err := idx.IDs()
if err != nil {
debug.Log("not using index, ID() returned error %v", err)
continue
}
for _, id := range indexIDs {
ids.Insert(id)
}
}
return r.PrepareCache(ids)
} }
// SaveIndex saves an index in the repository. // SaveIndex saves an index in the repository.
@ -628,20 +615,16 @@ func (r *Repository) SaveFullIndex(ctx context.Context) error {
func (r *Repository) LoadIndex(ctx context.Context) error { func (r *Repository) LoadIndex(ctx context.Context) error {
debug.Log("Loading index") debug.Log("Loading index")
validIndex := restic.NewIDSet()
err := ForAllIndexes(ctx, r, func(id restic.ID, idx *Index, oldFormat bool, err error) error { err := ForAllIndexes(ctx, r, func(id restic.ID, idx *Index, oldFormat bool, err error) error {
if err != nil { if err != nil {
return err return err
} }
ids, err := idx.IDs() _, err = idx.IDs()
if err != nil { if err != nil {
return err return err
} }
for _, id := range ids {
validIndex.Insert(id)
}
r.idx.Insert(idx) r.idx.Insert(idx)
return nil return nil
}) })
@ -667,7 +650,7 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
} }
// remove index files from the cache which have been removed in the repo // remove index files from the cache which have been removed in the repo
return r.PrepareCache(validIndex) return r.PrepareCache()
} }
const listPackParallelism = 10 const listPackParallelism = 10
@ -739,11 +722,12 @@ func (r *Repository) CreateIndexFromPacks(ctx context.Context, packsize map[rest
// PrepareCache initializes the local cache. indexIDs is the list of IDs of // PrepareCache initializes the local cache. indexIDs is the list of IDs of
// index files still present in the repo. // index files still present in the repo.
func (r *Repository) PrepareCache(indexIDs restic.IDSet) error { func (r *Repository) PrepareCache() error {
if r.Cache == nil { if r.Cache == nil {
return nil return nil
} }
indexIDs := r.idx.IDs()
debug.Log("prepare cache with %d index files", len(indexIDs)) debug.Log("prepare cache with %d index files", len(indexIDs))
// clear old index files // clear old index files
@ -752,12 +736,7 @@ func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err) fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
} }
packs := restic.NewIDSet() packs := r.idx.Packs(restic.NewIDSet())
for _, idx := range r.idx.All() {
for id := range idx.Packs() {
packs.Insert(id)
}
}
// clear old packs // clear old packs
err = r.Cache.Clear(restic.PackFile, packs) err = r.Cache.Clear(restic.PackFile, packs)