forked from TrueCloudLab/restic
Merge pull request #1696 from restic/fix-check-cache
check: Improve cache handling
This commit is contained in:
commit
8919125b0b
6 changed files with 156 additions and 51 deletions
27
changelog/unreleased/issue-1665
Normal file
27
changelog/unreleased/issue-1665
Normal file
|
@ -0,0 +1,27 @@
|
|||
Enhancement: Improve cache handling for `restic check`
|
||||
|
||||
For safety reasons, restic does not use a local metadata cache for the `restic
|
||||
check` command, so that data is loaded from the repository and restic can check
|
||||
it's in good condition. When the cache is disabled, restic will fetch each tiny
|
||||
blob needed for checking the integrity using a separate backend request. For
|
||||
non-local backends, that will take a long time, and depending on the backend
|
||||
(e.g. B2) may also be much more expensive.
|
||||
|
||||
This PR adds a few commits which will change the behavior as follows:
|
||||
|
||||
* When `restic check` is called without any additional parameters, it will
|
||||
build a new cache in a temporary directory, which is removed at the end of
|
||||
the check. This way, we'll get readahead for metadata files (so restic will
|
||||
fetch the whole file when the first blob from the file is requested), but
|
||||
all data is freshly fetched from the storage backend. This is the default
|
||||
behavior and will work for almost all users.
|
||||
|
||||
* When `restic check` is called with `--with-cache`, the default on-disc cache
|
||||
is used. This behavior hasn't changed since the cache was introduced.
|
||||
|
||||
* When `--no-cache` is specified, restic falls back to the old behavior, and
|
||||
read all tiny blobs in separate requests.
|
||||
|
||||
https://github.com/restic/restic/issues/1665
|
||||
https://github.com/restic/restic/issues/1694
|
||||
https://github.com/restic/restic/pull/1696
|
|
@ -2,6 +2,7 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
@ -11,6 +12,7 @@ import (
|
|||
|
||||
"github.com/restic/restic/internal/checker"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/fs"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
)
|
||||
|
||||
|
@ -117,15 +119,52 @@ func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
|
|||
return readProgress
|
||||
}
|
||||
|
||||
// prepareCheckCache configures a special cache directory for check.
|
||||
//
|
||||
// * if --with-cache is specified, the default cache is used
|
||||
// * if the user explicitely requested --no-cache, we don't use any cache
|
||||
// * by default, we use a cache in a temporary directory that is deleted after the check
|
||||
func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions) (cleanup func()) {
|
||||
cleanup = func() {}
|
||||
if opts.WithCache {
|
||||
// use the default cache, no setup needed
|
||||
return cleanup
|
||||
}
|
||||
|
||||
if gopts.NoCache {
|
||||
// don't use any cache, no setup needed
|
||||
return cleanup
|
||||
}
|
||||
|
||||
// use a cache in a temporary directory
|
||||
tempdir, err := ioutil.TempDir("", "restic-check-cache-")
|
||||
if err != nil {
|
||||
// if an error occurs, don't use any cache
|
||||
Warnf("unable to create temporary directory for cache during check, disabling cache: %v\n", err)
|
||||
gopts.NoCache = true
|
||||
return cleanup
|
||||
}
|
||||
|
||||
gopts.CacheDir = tempdir
|
||||
Verbosef("using temporary cache in %v\n", tempdir)
|
||||
|
||||
cleanup = func() {
|
||||
err := fs.RemoveAll(tempdir)
|
||||
if err != nil {
|
||||
Warnf("error removing temporary cache directory: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
return cleanup
|
||||
}
|
||||
|
||||
func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
||||
if len(args) != 0 {
|
||||
return errors.Fatal("check has no arguments")
|
||||
}
|
||||
|
||||
if !opts.WithCache {
|
||||
// do not use a cache for the checker
|
||||
gopts.NoCache = true
|
||||
}
|
||||
cleanup := prepareCheckCache(opts, &gopts)
|
||||
defer cleanup()
|
||||
|
||||
repo, err := OpenRepository(gopts)
|
||||
if err != nil {
|
||||
|
|
|
@ -164,7 +164,11 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
|||
}
|
||||
}
|
||||
|
||||
c.repo.SetIndex(c.masterIndex)
|
||||
err := c.repo.SetIndex(c.masterIndex)
|
||||
if err != nil {
|
||||
debug.Log("SetIndex returned error: %v", err)
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
return hints, errs
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ type Repository struct {
|
|||
|
||||
KeyFn func() *crypto.Key
|
||||
|
||||
SetIndexFn func(restic.Index)
|
||||
SetIndexFn func(restic.Index) error
|
||||
|
||||
IndexFn func() restic.Index
|
||||
SaveFullIndexFn func() error
|
||||
|
@ -51,8 +51,8 @@ func (repo Repository) Key() *crypto.Key {
|
|||
}
|
||||
|
||||
// SetIndex is a stub method.
|
||||
func (repo Repository) SetIndex(idx restic.Index) {
|
||||
repo.SetIndexFn(idx)
|
||||
func (repo Repository) SetIndex(idx restic.Index) error {
|
||||
return repo.SetIndexFn(idx)
|
||||
}
|
||||
|
||||
// Index is a stub method.
|
||||
|
|
|
@ -331,8 +331,20 @@ func (r *Repository) Index() restic.Index {
|
|||
}
|
||||
|
||||
// SetIndex instructs the repository to use the given index.
|
||||
func (r *Repository) SetIndex(i restic.Index) {
|
||||
func (r *Repository) SetIndex(i restic.Index) error {
|
||||
r.idx = i.(*MasterIndex)
|
||||
|
||||
ids := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
id, err := idx.ID()
|
||||
if err != nil {
|
||||
debug.Log("not using index, ID() returned error %v", err)
|
||||
continue
|
||||
}
|
||||
ids.Insert(id)
|
||||
}
|
||||
|
||||
return r.PrepareCache(ids)
|
||||
}
|
||||
|
||||
// SaveIndex saves an index in the repository.
|
||||
|
@ -413,52 +425,75 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
|
|||
r.idx.Insert(idx)
|
||||
}
|
||||
|
||||
if r.Cache != nil {
|
||||
// clear old index files
|
||||
err := r.Cache.Clear(restic.IndexFile, validIndex)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
|
||||
}
|
||||
|
||||
packs := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
for id := range idx.Packs() {
|
||||
packs.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
// clear old data files
|
||||
err = r.Cache.Clear(restic.DataFile, packs)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err)
|
||||
}
|
||||
|
||||
treePacks := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
for _, id := range idx.TreePacks() {
|
||||
treePacks.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
// use readahead
|
||||
cache := r.Cache.(*cache.Cache)
|
||||
cache.PerformReadahead = func(h restic.Handle) bool {
|
||||
if h.Type != restic.DataFile {
|
||||
return false
|
||||
}
|
||||
|
||||
id, err := restic.ParseID(h.Name)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return treePacks.Has(id)
|
||||
}
|
||||
err := r.PrepareCache(validIndex)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return <-errCh
|
||||
}
|
||||
|
||||
// PrepareCache initializes the local cache. indexIDs is the list of IDs of
|
||||
// index files still present in the repo.
|
||||
func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
|
||||
if r.Cache == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
debug.Log("prepare cache with %d index files", len(indexIDs))
|
||||
|
||||
// clear old index files
|
||||
err := r.Cache.Clear(restic.IndexFile, indexIDs)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
|
||||
}
|
||||
|
||||
packs := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
for id := range idx.Packs() {
|
||||
packs.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
// clear old data files
|
||||
err = r.Cache.Clear(restic.DataFile, packs)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err)
|
||||
}
|
||||
|
||||
treePacks := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
for _, id := range idx.TreePacks() {
|
||||
treePacks.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
// use readahead
|
||||
debug.Log("using readahead")
|
||||
cache := r.Cache.(*cache.Cache)
|
||||
cache.PerformReadahead = func(h restic.Handle) bool {
|
||||
if h.Type != restic.DataFile {
|
||||
debug.Log("no readahead for %v, is not data file", h)
|
||||
return false
|
||||
}
|
||||
|
||||
id, err := restic.ParseID(h.Name)
|
||||
if err != nil {
|
||||
debug.Log("no readahead for %v, invalid ID", h)
|
||||
return false
|
||||
}
|
||||
|
||||
if treePacks.Has(id) {
|
||||
debug.Log("perform readahead for %v", h)
|
||||
return true
|
||||
}
|
||||
debug.Log("no readahead for %v, not tree file", h)
|
||||
return false
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadIndex loads the index id from backend and returns it.
|
||||
func LoadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*Index, error) {
|
||||
idx, err := LoadIndexWithDecoder(ctx, repo, id, DecodeIndex)
|
||||
|
|
|
@ -15,7 +15,7 @@ type Repository interface {
|
|||
|
||||
Key() *crypto.Key
|
||||
|
||||
SetIndex(Index)
|
||||
SetIndex(Index) error
|
||||
|
||||
Index() Index
|
||||
SaveFullIndex(context.Context) error
|
||||
|
|
Loading…
Reference in a new issue