Merge pull request #1696 from restic/fix-check-cache

check: Improve cache handling
This commit is contained in:
Alexander Neumann 2018-04-01 14:33:00 +02:00
commit 8919125b0b
6 changed files with 156 additions and 51 deletions

View file

@ -0,0 +1,27 @@
Enhancement: Improve cache handling for `restic check`
For safety reasons, restic does not use a local metadata cache for the `restic
check` command, so that data is loaded from the repository and restic can check
it's in good condition. When the cache is disabled, restic will fetch each tiny
blob needed for checking the integrity using a separate backend request. For
non-local backends, that will take a long time, and depending on the backend
(e.g. B2) may also be much more expensive.
This PR adds a few commits which will change the behavior as follows:
* When `restic check` is called without any additional parameters, it will
build a new cache in a temporary directory, which is removed at the end of
the check. This way, we'll get readahead for metadata files (so restic will
fetch the whole file when the first blob from the file is requested), but
all data is freshly fetched from the storage backend. This is the default
behavior and will work for almost all users.
* When `restic check` is called with `--with-cache`, the default on-disc cache
is used. This behavior hasn't changed since the cache was introduced.
* When `--no-cache` is specified, restic falls back to the old behavior, and
read all tiny blobs in separate requests.
https://github.com/restic/restic/issues/1665
https://github.com/restic/restic/issues/1694
https://github.com/restic/restic/pull/1696

View file

@ -2,6 +2,7 @@ package main
import (
"fmt"
"io/ioutil"
"os"
"strconv"
"strings"
@ -11,6 +12,7 @@ import (
"github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
)
@ -117,15 +119,52 @@ func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
return readProgress
}
// prepareCheckCache configures a special cache directory for check.
//
// * if --with-cache is specified, the default cache is used
// * if the user explicitely requested --no-cache, we don't use any cache
// * by default, we use a cache in a temporary directory that is deleted after the check
func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions) (cleanup func()) {
cleanup = func() {}
if opts.WithCache {
// use the default cache, no setup needed
return cleanup
}
if gopts.NoCache {
// don't use any cache, no setup needed
return cleanup
}
// use a cache in a temporary directory
tempdir, err := ioutil.TempDir("", "restic-check-cache-")
if err != nil {
// if an error occurs, don't use any cache
Warnf("unable to create temporary directory for cache during check, disabling cache: %v\n", err)
gopts.NoCache = true
return cleanup
}
gopts.CacheDir = tempdir
Verbosef("using temporary cache in %v\n", tempdir)
cleanup = func() {
err := fs.RemoveAll(tempdir)
if err != nil {
Warnf("error removing temporary cache directory: %v\n", err)
}
}
return cleanup
}
func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
if len(args) != 0 {
return errors.Fatal("check has no arguments")
}
if !opts.WithCache {
// do not use a cache for the checker
gopts.NoCache = true
}
cleanup := prepareCheckCache(opts, &gopts)
defer cleanup()
repo, err := OpenRepository(gopts)
if err != nil {

View file

@ -164,7 +164,11 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
}
}
c.repo.SetIndex(c.masterIndex)
err := c.repo.SetIndex(c.masterIndex)
if err != nil {
debug.Log("SetIndex returned error: %v", err)
errs = append(errs, err)
}
return hints, errs
}

View file

@ -11,7 +11,7 @@ type Repository struct {
KeyFn func() *crypto.Key
SetIndexFn func(restic.Index)
SetIndexFn func(restic.Index) error
IndexFn func() restic.Index
SaveFullIndexFn func() error
@ -51,8 +51,8 @@ func (repo Repository) Key() *crypto.Key {
}
// SetIndex is a stub method.
func (repo Repository) SetIndex(idx restic.Index) {
repo.SetIndexFn(idx)
func (repo Repository) SetIndex(idx restic.Index) error {
return repo.SetIndexFn(idx)
}
// Index is a stub method.

View file

@ -331,8 +331,20 @@ func (r *Repository) Index() restic.Index {
}
// SetIndex instructs the repository to use the given index.
func (r *Repository) SetIndex(i restic.Index) {
func (r *Repository) SetIndex(i restic.Index) error {
r.idx = i.(*MasterIndex)
ids := restic.NewIDSet()
for _, idx := range r.idx.All() {
id, err := idx.ID()
if err != nil {
debug.Log("not using index, ID() returned error %v", err)
continue
}
ids.Insert(id)
}
return r.PrepareCache(ids)
}
// SaveIndex saves an index in the repository.
@ -413,52 +425,75 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
r.idx.Insert(idx)
}
if r.Cache != nil {
// clear old index files
err := r.Cache.Clear(restic.IndexFile, validIndex)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
}
packs := restic.NewIDSet()
for _, idx := range r.idx.All() {
for id := range idx.Packs() {
packs.Insert(id)
}
}
// clear old data files
err = r.Cache.Clear(restic.DataFile, packs)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err)
}
treePacks := restic.NewIDSet()
for _, idx := range r.idx.All() {
for _, id := range idx.TreePacks() {
treePacks.Insert(id)
}
}
// use readahead
cache := r.Cache.(*cache.Cache)
cache.PerformReadahead = func(h restic.Handle) bool {
if h.Type != restic.DataFile {
return false
}
id, err := restic.ParseID(h.Name)
if err != nil {
return false
}
return treePacks.Has(id)
}
err := r.PrepareCache(validIndex)
if err != nil {
return err
}
return <-errCh
}
// PrepareCache initializes the local cache. indexIDs is the list of IDs of
// index files still present in the repo.
func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
if r.Cache == nil {
return nil
}
debug.Log("prepare cache with %d index files", len(indexIDs))
// clear old index files
err := r.Cache.Clear(restic.IndexFile, indexIDs)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
}
packs := restic.NewIDSet()
for _, idx := range r.idx.All() {
for id := range idx.Packs() {
packs.Insert(id)
}
}
// clear old data files
err = r.Cache.Clear(restic.DataFile, packs)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err)
}
treePacks := restic.NewIDSet()
for _, idx := range r.idx.All() {
for _, id := range idx.TreePacks() {
treePacks.Insert(id)
}
}
// use readahead
debug.Log("using readahead")
cache := r.Cache.(*cache.Cache)
cache.PerformReadahead = func(h restic.Handle) bool {
if h.Type != restic.DataFile {
debug.Log("no readahead for %v, is not data file", h)
return false
}
id, err := restic.ParseID(h.Name)
if err != nil {
debug.Log("no readahead for %v, invalid ID", h)
return false
}
if treePacks.Has(id) {
debug.Log("perform readahead for %v", h)
return true
}
debug.Log("no readahead for %v, not tree file", h)
return false
}
return nil
}
// LoadIndex loads the index id from backend and returns it.
func LoadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*Index, error) {
idx, err := LoadIndexWithDecoder(ctx, repo, id, DecodeIndex)

View file

@ -15,7 +15,7 @@ type Repository interface {
Key() *crypto.Key
SetIndex(Index)
SetIndex(Index) error
Index() Index
SaveFullIndex(context.Context) error