From 2ace242f366e65cf3a4fcfa138e4d7eb4e4d13f3 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Thu, 9 May 2024 15:26:41 +0200 Subject: [PATCH] repository: make reloading broken files explicit --- internal/cache/backend.go | 11 ++--- internal/cache/file.go | 10 +++-- internal/repository/raw.go | 61 ++++++++++++-------------- internal/repository/repository_test.go | 14 +++--- 4 files changed, 45 insertions(+), 51 deletions(-) diff --git a/internal/cache/backend.go b/internal/cache/backend.go index 7a7e5cd9f..27b37e9d9 100644 --- a/internal/cache/backend.go +++ b/internal/cache/backend.go @@ -161,14 +161,10 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset // try loading from cache without checking that the handle is actually cached inCache, err := b.loadFromCache(h, length, offset, consumer) if inCache { - if err == nil { - return nil - } - - // drop from cache and retry once - _ = b.Cache.remove(h) + debug.Log("error loading %v from cache: %v", h, err) + // the caller must explicitly use cache.Forget() to remove the cache entry + return err } - debug.Log("error loading %v from cache: %v", h, err) // if we don't automatically cache this file type, fall back to the backend if !autoCacheTypes(h) { @@ -184,6 +180,7 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset inCache, err = b.loadFromCache(h, length, offset, consumer) if inCache { + debug.Log("error loading %v from cache: %v", h, err) return err } diff --git a/internal/cache/file.go b/internal/cache/file.go index b2e9ec618..921add24c 100644 --- a/internal/cache/file.go +++ b/internal/cache/file.go @@ -55,14 +55,12 @@ func (c *Cache) load(h backend.Handle, length int, offset int64) (io.ReadCloser, size := fi.Size() if size <= int64(crypto.CiphertextLength(0)) { _ = f.Close() - _ = c.remove(h) - return nil, errors.Errorf("cached file %v is truncated, removing", h) + return nil, errors.Errorf("cached file %v is truncated", h) } if size < offset+int64(length) { _ = f.Close() - _ = c.remove(h) - return nil, errors.Errorf("cached file %v is too short, removing", h) + return nil, errors.Errorf("cached file %v is too short", h) } if offset > 0 { @@ -139,6 +137,10 @@ func (c *Cache) save(h backend.Handle, rd io.Reader) error { return errors.WithStack(err) } +func (c *Cache) Forget(h backend.Handle) error { + return c.remove(h) +} + // remove deletes a file. When the file is not cached, no error is returned. func (c *Cache) remove(h backend.Handle) error { if !c.canBeCached(h.Type) { diff --git a/internal/repository/raw.go b/internal/repository/raw.go index d173908d4..31443b010 100644 --- a/internal/repository/raw.go +++ b/internal/repository/raw.go @@ -7,7 +7,6 @@ import ( "io" "github.com/restic/restic/internal/backend" - "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/restic" ) @@ -17,47 +16,41 @@ import ( func (r *Repository) LoadRaw(ctx context.Context, t restic.FileType, id restic.ID) (buf []byte, err error) { h := backend.Handle{Type: t, Name: id.String()} - ctx, cancel := context.WithCancel(ctx) + buf, err = loadRaw(ctx, r.be, h) - var dataErr error - retriedInvalidData := false - err = r.be.Load(ctx, h, 0, 0, func(rd io.Reader) error { - // make sure this is idempotent, in case an error occurs this function may be called multiple times! - wr := bytes.NewBuffer(buf[:0]) - _, cerr := io.Copy(wr, rd) - if cerr != nil { - return cerr + // retry loading damaged data only once. If a file fails to download correctly + // the second time, then it is likely corrupted at the backend. + if h.Type != backend.ConfigFile && id != restic.Hash(buf) { + if r.Cache != nil { + // Cleanup cache to make sure it's not the cached copy that is broken. + // Ignore error as there's not much we can do in that case. + _ = r.Cache.Forget(h) } - buf = wr.Bytes() - // retry loading damaged data only once. If a file fails to download correctly - // the second time, then it is likely corrupted at the backend. - if h.Type != backend.ConfigFile { - if id != restic.Hash(buf) { - if !retriedInvalidData { - debug.Log("retry loading broken blob %v", h) - retriedInvalidData = true - } else { - // with a canceled context there is not guarantee which error will - // be returned by `be.Load`. - dataErr = fmt.Errorf("loadAll(%v): %w", h, restic.ErrInvalidData) - cancel() - } - return restic.ErrInvalidData - } + buf, err = loadRaw(ctx, r.be, h) + + if err == nil && id != restic.Hash(buf) { + // Return corrupted data to the caller if it is still broken the second time to + // let the caller decide what to do with the data. + return buf, fmt.Errorf("LoadRaw(%v): %w", h, restic.ErrInvalidData) } - return nil - }) - - // Return corrupted data to the caller if it is still broken the second time to - // let the caller decide what to do with the data. - if dataErr != nil { - return buf, dataErr } if err != nil { return nil, err } - return buf, nil } + +func loadRaw(ctx context.Context, be backend.Backend, h backend.Handle) (buf []byte, err error) { + err = be.Load(ctx, h, 0, 0, func(rd io.Reader) error { + wr := new(bytes.Buffer) + _, cerr := io.Copy(wr, rd) + if cerr != nil { + return cerr + } + buf = wr.Bytes() + return cerr + }) + return buf, err +} diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index d7481117a..28829e4cf 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -9,6 +9,7 @@ import ( "math/rand" "os" "path/filepath" + "sync" "testing" "time" @@ -264,6 +265,7 @@ func TestRepositoryLoadUnpackedBroken(t *testing.T) { type damageOnceBackend struct { backend.Backend + m sync.Map } func (be *damageOnceBackend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error { @@ -271,13 +273,13 @@ func (be *damageOnceBackend) Load(ctx context.Context, h backend.Handle, length if h.Type == restic.ConfigFile { return be.Backend.Load(ctx, h, length, offset, fn) } - // return broken data on the first try - err := be.Backend.Load(ctx, h, length+1, offset, fn) - if err != nil { - // retry - err = be.Backend.Load(ctx, h, length, offset, fn) + + _, retry := be.m.Swap(h, true) + if !retry { + // return broken data on the first try + length++ } - return err + return be.Backend.Load(ctx, h, length, offset, fn) } func TestRepositoryLoadUnpackedRetryBroken(t *testing.T) {