forked from TrueCloudLab/restic
Merge pull request #3481 from MichaelEischer/recover-enospace
Recover from no free space errors
This commit is contained in:
commit
ffbd48c0c6
6 changed files with 120 additions and 38 deletions
9
changelog/unreleased/issue-1153
Normal file
9
changelog/unreleased/issue-1153
Normal file
|
@ -0,0 +1,9 @@
|
|||
Enhancement: Support pruning even after running out of disk space
|
||||
|
||||
When running out of disk space it was no longer possible to add or remove
|
||||
data from a repository. To help with recovering from such a deadlock, the
|
||||
prune command now supports an `--unsafe-recover-no-free-space` option to
|
||||
recover from such situations. Make sure to read the documentation first!
|
||||
|
||||
https://github.com/restic/restic/issues/1153
|
||||
https://github.com/restic/restic/pull/3481
|
|
@ -39,7 +39,10 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
|||
|
||||
// PruneOptions collects all options for the cleanup command.
|
||||
type PruneOptions struct {
|
||||
DryRun bool
|
||||
DryRun bool
|
||||
UnsafeNoSpaceRecovery string
|
||||
|
||||
unsafeRecovery bool
|
||||
|
||||
MaxUnused string
|
||||
maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
|
||||
|
@ -56,6 +59,7 @@ func init() {
|
|||
cmdRoot.AddCommand(cmdPrune)
|
||||
f := cmdPrune.Flags()
|
||||
f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
|
||||
f.StringVarP(&pruneOptions.UnsafeNoSpaceRecovery, "unsafe-recover-no-free-space", "", "", "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first.")
|
||||
addPruneOptions(cmdPrune)
|
||||
}
|
||||
|
||||
|
@ -75,6 +79,10 @@ func verifyPruneOptions(opts *PruneOptions) error {
|
|||
}
|
||||
opts.MaxRepackBytes = uint64(size)
|
||||
}
|
||||
if opts.UnsafeNoSpaceRecovery != "" {
|
||||
// prevent repacking data to make sure users cannot get stuck.
|
||||
opts.MaxRepackBytes = 0
|
||||
}
|
||||
|
||||
maxUnused := strings.TrimSpace(opts.MaxUnused)
|
||||
if maxUnused == "" {
|
||||
|
@ -136,6 +144,14 @@ func runPrune(opts PruneOptions, gopts GlobalOptions) error {
|
|||
return errors.Fatal("prune requires a backend connection limit of at least two")
|
||||
}
|
||||
|
||||
if opts.UnsafeNoSpaceRecovery != "" {
|
||||
repoID := repo.Config().ID
|
||||
if opts.UnsafeNoSpaceRecovery != repoID {
|
||||
return errors.Fatalf("must pass id '%s' to --unsafe-recover-no-free-space", repoID)
|
||||
}
|
||||
opts.unsafeRecovery = true
|
||||
}
|
||||
|
||||
lock, err := lockRepoExclusive(gopts.ctx, repo)
|
||||
defer unlockRepo(lock)
|
||||
if err != nil {
|
||||
|
@ -522,7 +538,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
ignorePacks.Merge(removePacks)
|
||||
}
|
||||
|
||||
if len(ignorePacks) != 0 {
|
||||
if opts.unsafeRecovery {
|
||||
Verbosef("deleting index files\n")
|
||||
indexFiles := repo.Index().(*repository.MasterIndex).IDs()
|
||||
err = DeleteFilesChecked(gopts, repo, indexFiles, restic.IndexFile)
|
||||
if err != nil {
|
||||
return errors.Fatalf("%s", err)
|
||||
}
|
||||
} else if len(ignorePacks) != 0 {
|
||||
err = rebuildIndexFiles(gopts, repo, ignorePacks, nil)
|
||||
if err != nil {
|
||||
return errors.Fatalf("%s", err)
|
||||
|
@ -534,11 +557,18 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
||||
}
|
||||
|
||||
if opts.unsafeRecovery {
|
||||
_, err = writeIndexFiles(gopts, repo, ignorePacks, nil)
|
||||
if err != nil {
|
||||
return errors.Fatalf("%s", err)
|
||||
}
|
||||
}
|
||||
|
||||
Verbosef("done\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error {
|
||||
func writeIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) (restic.IDSet, error) {
|
||||
Verbosef("rebuilding index\n")
|
||||
|
||||
idx := (repo.Index()).(*repository.MasterIndex)
|
||||
|
@ -546,6 +576,11 @@ func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks
|
|||
bar := newProgressMax(!gopts.Quiet, packcount, "packs processed")
|
||||
obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar)
|
||||
bar.Done()
|
||||
return obsoleteIndexes, err
|
||||
}
|
||||
|
||||
func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error {
|
||||
obsoleteIndexes, err := writeIndexFiles(gopts, repo, removePacks, extraObsolete)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -1573,26 +1573,35 @@ func TestCheckRestoreNoLock(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestPrune(t *testing.T) {
|
||||
t.Run("0", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "0%"}
|
||||
testPruneVariants(t, false)
|
||||
testPruneVariants(t, true)
|
||||
}
|
||||
|
||||
func testPruneVariants(t *testing.T, unsafeNoSpaceRecovery bool) {
|
||||
suffix := ""
|
||||
if unsafeNoSpaceRecovery {
|
||||
suffix = "-recovery"
|
||||
}
|
||||
t.Run("0"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "0%", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("50", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "50%"}
|
||||
t.Run("50"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "50%", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("unlimited", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "unlimited"}
|
||||
t.Run("unlimited"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "unlimited", unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
||||
t.Run("CachableOnly", func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true}
|
||||
t.Run("CachableOnly"+suffix, func(t *testing.T) {
|
||||
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true, unsafeRecovery: unsafeNoSpaceRecovery}
|
||||
checkOpts := CheckOptions{ReadData: true}
|
||||
testPrune(t, opts, checkOpts)
|
||||
})
|
||||
|
|
|
@ -444,3 +444,31 @@ The ``prune`` command accepts the following options:
|
|||
- ``--dry-run`` only show what ``prune`` would do.
|
||||
|
||||
- ``--verbose`` increased verbosity shows additional statistics for ``prune``.
|
||||
|
||||
|
||||
Recovering from "no free space" errors
|
||||
**************************************
|
||||
|
||||
In some cases when a repository has grown large enough to fill up all disk space or the
|
||||
allocated quota, then ``prune`` might fail to free space. ``prune`` works in such a way
|
||||
that a repository remains usable no matter at which point the command is interrupted.
|
||||
However, this also means that ``prune`` requires some scratch space to work.
|
||||
|
||||
In most cases it is sufficient to instruct ``prune`` to use as little scratch space as
|
||||
possible by running it as ``prune --max-repack-size 0``. Note that for restic versions
|
||||
before 0.13.0 ``prune --max-repack-size 1`` must be used. Obviously, this can only work
|
||||
if several snapshots have been removed using ``forget`` before. This then allows the
|
||||
``prune`` command to actually remove data from the repository. If the command succeeds,
|
||||
but there is still little free space, then remove a few more snapshots and run ``prune`` again.
|
||||
|
||||
If ``prune`` fails to complete, then ``prune --unsafe-recover-no-free-space SOME-ID``
|
||||
is available as a method of last resort. It allows prune to work with little to no free
|
||||
space. However, a **failed** ``prune`` run can cause the repository to become
|
||||
**temporarily unusable**. Therefore, make sure that you have a stable connection to the
|
||||
repository storage, before running this command. In case the command fails, it may become
|
||||
necessary to manually remove all files from the `index/` folder of the repository and
|
||||
run `rebuild-index` afterwards.
|
||||
|
||||
To prevent accidental usages of the ``--unsafe-recover-no-free-space`` option it is
|
||||
necessary to first run ``prune --unsafe-recover-no-free-space SOME-ID`` and then replace
|
||||
``SOME-ID`` with the requested ID.
|
||||
|
|
|
@ -116,6 +116,28 @@ func (mi *MasterIndex) IsMixedPack(packID restic.ID) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// IDs returns the IDs of all indexes contained in the index.
|
||||
func (mi *MasterIndex) IDs() restic.IDSet {
|
||||
mi.idxMutex.RLock()
|
||||
defer mi.idxMutex.RUnlock()
|
||||
|
||||
ids := restic.NewIDSet()
|
||||
for _, idx := range mi.idx {
|
||||
if !idx.Final() {
|
||||
continue
|
||||
}
|
||||
indexIDs, err := idx.IDs()
|
||||
if err != nil {
|
||||
debug.Log("not using index, ID() returned error %v", err)
|
||||
continue
|
||||
}
|
||||
for _, id := range indexIDs {
|
||||
ids.Insert(id)
|
||||
}
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
// Packs returns all packs that are covered by the index.
|
||||
// If packBlacklist is given, those packs are only contained in the
|
||||
// resulting IDSet if they are contained in a non-final (newly written) index.
|
||||
|
|
|
@ -569,20 +569,7 @@ func (r *Repository) Index() restic.MasterIndex {
|
|||
// SetIndex instructs the repository to use the given index.
|
||||
func (r *Repository) SetIndex(i restic.MasterIndex) error {
|
||||
r.idx = i.(*MasterIndex)
|
||||
|
||||
ids := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
indexIDs, err := idx.IDs()
|
||||
if err != nil {
|
||||
debug.Log("not using index, ID() returned error %v", err)
|
||||
continue
|
||||
}
|
||||
for _, id := range indexIDs {
|
||||
ids.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
return r.PrepareCache(ids)
|
||||
return r.PrepareCache()
|
||||
}
|
||||
|
||||
// SaveIndex saves an index in the repository.
|
||||
|
@ -628,20 +615,16 @@ func (r *Repository) SaveFullIndex(ctx context.Context) error {
|
|||
func (r *Repository) LoadIndex(ctx context.Context) error {
|
||||
debug.Log("Loading index")
|
||||
|
||||
validIndex := restic.NewIDSet()
|
||||
err := ForAllIndexes(ctx, r, func(id restic.ID, idx *Index, oldFormat bool, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ids, err := idx.IDs()
|
||||
_, err = idx.IDs()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, id := range ids {
|
||||
validIndex.Insert(id)
|
||||
}
|
||||
r.idx.Insert(idx)
|
||||
return nil
|
||||
})
|
||||
|
@ -667,7 +650,7 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
|
|||
}
|
||||
|
||||
// remove index files from the cache which have been removed in the repo
|
||||
return r.PrepareCache(validIndex)
|
||||
return r.PrepareCache()
|
||||
}
|
||||
|
||||
const listPackParallelism = 10
|
||||
|
@ -739,11 +722,12 @@ func (r *Repository) CreateIndexFromPacks(ctx context.Context, packsize map[rest
|
|||
|
||||
// PrepareCache initializes the local cache. indexIDs is the list of IDs of
|
||||
// index files still present in the repo.
|
||||
func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
|
||||
func (r *Repository) PrepareCache() error {
|
||||
if r.Cache == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
indexIDs := r.idx.IDs()
|
||||
debug.Log("prepare cache with %d index files", len(indexIDs))
|
||||
|
||||
// clear old index files
|
||||
|
@ -752,12 +736,7 @@ func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
|
|||
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
|
||||
}
|
||||
|
||||
packs := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
for id := range idx.Packs() {
|
||||
packs.Insert(id)
|
||||
}
|
||||
}
|
||||
packs := r.idx.Packs(restic.NewIDSet())
|
||||
|
||||
// clear old packs
|
||||
err = r.Cache.Clear(restic.PackFile, packs)
|
||||
|
|
Loading…
Reference in a new issue