repository: make reloading broken files explicit

This commit is contained in:
Michael Eischer 2024-05-09 15:26:41 +02:00
parent e9390352a7
commit 2ace242f36
4 changed files with 45 additions and 51 deletions

View file

@ -161,14 +161,10 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset
// try loading from cache without checking that the handle is actually cached // try loading from cache without checking that the handle is actually cached
inCache, err := b.loadFromCache(h, length, offset, consumer) inCache, err := b.loadFromCache(h, length, offset, consumer)
if inCache { if inCache {
if err == nil { debug.Log("error loading %v from cache: %v", h, err)
return nil // the caller must explicitly use cache.Forget() to remove the cache entry
} return err
// drop from cache and retry once
_ = b.Cache.remove(h)
} }
debug.Log("error loading %v from cache: %v", h, err)
// if we don't automatically cache this file type, fall back to the backend // if we don't automatically cache this file type, fall back to the backend
if !autoCacheTypes(h) { if !autoCacheTypes(h) {
@ -184,6 +180,7 @@ func (b *Backend) Load(ctx context.Context, h backend.Handle, length int, offset
inCache, err = b.loadFromCache(h, length, offset, consumer) inCache, err = b.loadFromCache(h, length, offset, consumer)
if inCache { if inCache {
debug.Log("error loading %v from cache: %v", h, err)
return err return err
} }

View file

@ -55,14 +55,12 @@ func (c *Cache) load(h backend.Handle, length int, offset int64) (io.ReadCloser,
size := fi.Size() size := fi.Size()
if size <= int64(crypto.CiphertextLength(0)) { if size <= int64(crypto.CiphertextLength(0)) {
_ = f.Close() _ = f.Close()
_ = c.remove(h) return nil, errors.Errorf("cached file %v is truncated", h)
return nil, errors.Errorf("cached file %v is truncated, removing", h)
} }
if size < offset+int64(length) { if size < offset+int64(length) {
_ = f.Close() _ = f.Close()
_ = c.remove(h) return nil, errors.Errorf("cached file %v is too short", h)
return nil, errors.Errorf("cached file %v is too short, removing", h)
} }
if offset > 0 { if offset > 0 {
@ -139,6 +137,10 @@ func (c *Cache) save(h backend.Handle, rd io.Reader) error {
return errors.WithStack(err) return errors.WithStack(err)
} }
func (c *Cache) Forget(h backend.Handle) error {
return c.remove(h)
}
// remove deletes a file. When the file is not cached, no error is returned. // remove deletes a file. When the file is not cached, no error is returned.
func (c *Cache) remove(h backend.Handle) error { func (c *Cache) remove(h backend.Handle) error {
if !c.canBeCached(h.Type) { if !c.canBeCached(h.Type) {

View file

@ -7,7 +7,6 @@ import (
"io" "io"
"github.com/restic/restic/internal/backend" "github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
) )
@ -17,47 +16,41 @@ import (
func (r *Repository) LoadRaw(ctx context.Context, t restic.FileType, id restic.ID) (buf []byte, err error) { func (r *Repository) LoadRaw(ctx context.Context, t restic.FileType, id restic.ID) (buf []byte, err error) {
h := backend.Handle{Type: t, Name: id.String()} h := backend.Handle{Type: t, Name: id.String()}
ctx, cancel := context.WithCancel(ctx) buf, err = loadRaw(ctx, r.be, h)
var dataErr error // retry loading damaged data only once. If a file fails to download correctly
retriedInvalidData := false // the second time, then it is likely corrupted at the backend.
err = r.be.Load(ctx, h, 0, 0, func(rd io.Reader) error { if h.Type != backend.ConfigFile && id != restic.Hash(buf) {
// make sure this is idempotent, in case an error occurs this function may be called multiple times! if r.Cache != nil {
wr := bytes.NewBuffer(buf[:0]) // Cleanup cache to make sure it's not the cached copy that is broken.
_, cerr := io.Copy(wr, rd) // Ignore error as there's not much we can do in that case.
if cerr != nil { _ = r.Cache.Forget(h)
return cerr
} }
buf = wr.Bytes()
// retry loading damaged data only once. If a file fails to download correctly buf, err = loadRaw(ctx, r.be, h)
// the second time, then it is likely corrupted at the backend.
if h.Type != backend.ConfigFile { if err == nil && id != restic.Hash(buf) {
if id != restic.Hash(buf) { // Return corrupted data to the caller if it is still broken the second time to
if !retriedInvalidData { // let the caller decide what to do with the data.
debug.Log("retry loading broken blob %v", h) return buf, fmt.Errorf("LoadRaw(%v): %w", h, restic.ErrInvalidData)
retriedInvalidData = true
} else {
// with a canceled context there is not guarantee which error will
// be returned by `be.Load`.
dataErr = fmt.Errorf("loadAll(%v): %w", h, restic.ErrInvalidData)
cancel()
}
return restic.ErrInvalidData
}
} }
return nil
})
// Return corrupted data to the caller if it is still broken the second time to
// let the caller decide what to do with the data.
if dataErr != nil {
return buf, dataErr
} }
if err != nil { if err != nil {
return nil, err return nil, err
} }
return buf, nil return buf, nil
} }
func loadRaw(ctx context.Context, be backend.Backend, h backend.Handle) (buf []byte, err error) {
err = be.Load(ctx, h, 0, 0, func(rd io.Reader) error {
wr := new(bytes.Buffer)
_, cerr := io.Copy(wr, rd)
if cerr != nil {
return cerr
}
buf = wr.Bytes()
return cerr
})
return buf, err
}

View file

@ -9,6 +9,7 @@ import (
"math/rand" "math/rand"
"os" "os"
"path/filepath" "path/filepath"
"sync"
"testing" "testing"
"time" "time"
@ -264,6 +265,7 @@ func TestRepositoryLoadUnpackedBroken(t *testing.T) {
type damageOnceBackend struct { type damageOnceBackend struct {
backend.Backend backend.Backend
m sync.Map
} }
func (be *damageOnceBackend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error { func (be *damageOnceBackend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
@ -271,13 +273,13 @@ func (be *damageOnceBackend) Load(ctx context.Context, h backend.Handle, length
if h.Type == restic.ConfigFile { if h.Type == restic.ConfigFile {
return be.Backend.Load(ctx, h, length, offset, fn) return be.Backend.Load(ctx, h, length, offset, fn)
} }
// return broken data on the first try
err := be.Backend.Load(ctx, h, length+1, offset, fn) _, retry := be.m.Swap(h, true)
if err != nil { if !retry {
// retry // return broken data on the first try
err = be.Backend.Load(ctx, h, length, offset, fn) length++
} }
return err return be.Backend.Load(ctx, h, length, offset, fn)
} }
func TestRepositoryLoadUnpackedRetryBroken(t *testing.T) { func TestRepositoryLoadUnpackedRetryBroken(t *testing.T) {