Merge pull request #3682 from MichaelEischer/refactor-code

Pack size calculation cleanup and misc other changes
This commit is contained in:
Alexander Neumann 2022-03-29 11:06:04 +02:00 committed by GitHub
commit 724ace0e99
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 106 additions and 82 deletions

View file

@ -79,7 +79,7 @@ func runCat(gopts GlobalOptions, args []string) error {
Println(string(buf))
return nil
case "index":
buf, err := repo.LoadAndDecrypt(gopts.ctx, nil, restic.IndexFile, id)
buf, err := repo.LoadUnpacked(gopts.ctx, nil, restic.IndexFile, id)
if err != nil {
return err
}

View file

@ -8,6 +8,7 @@ import (
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
@ -245,7 +246,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
indexPack := make(map[restic.ID]packInfo)
// save computed pack header size
for pid, hdrSize := range repo.Index().PackSize(ctx, true) {
for pid, hdrSize := range pack.Size(ctx, repo.Index(), true) {
// initialize tpe with NumBlobTypes to indicate it's not set
indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
}

View file

@ -1,6 +1,7 @@
package main
import (
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
@ -97,7 +98,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor
if err != nil {
return err
}
packSizeFromIndex = repo.Index().PackSize(ctx, false)
packSizeFromIndex = pack.Size(ctx, repo.Index(), false)
}
Verbosef("getting pack files to read...\n")

View file

@ -102,7 +102,7 @@ func main() {
fmt.Fprintf(os.Stderr, "%v\nthe `unlock` command can be used to remove stale locks\n", err)
case err == ErrInvalidSourceData:
fmt.Fprintf(os.Stderr, "Warning: %v\n", err)
case errors.IsFatal(errors.Cause(err)):
case errors.IsFatal(err):
fmt.Fprintf(os.Stderr, "%v\n", err)
case err != nil:
fmt.Fprintf(os.Stderr, "%+v\n", err)

View file

@ -234,6 +234,7 @@ func TestEnsureFileContent(ctx context.Context, t testing.TB, repo restic.Reposi
return
}
copy(content[pos:pos+len(part)], part)
pos += len(part)
}

View file

@ -8,7 +8,6 @@ import (
"runtime"
"github.com/pkg/errors"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
@ -59,7 +58,7 @@ func (c *Cache) load(h restic.Handle, length int, offset int64) (io.ReadCloser,
return nil, errors.WithStack(err)
}
if fi.Size() <= crypto.Extension {
if fi.Size() <= int64(restic.CiphertextLength(0)) {
_ = f.Close()
_ = c.remove(h)
return nil, errors.Errorf("cached file %v is truncated, removing", h)
@ -117,7 +116,7 @@ func (c *Cache) Save(h restic.Handle, rd io.Reader) error {
return errors.Wrap(err, "Copy")
}
if n <= crypto.Extension {
if n <= int64(restic.CiphertextLength(0)) {
_ = f.Close()
_ = fs.Remove(f.Name())
debug.Log("trying to cache truncated file %v, removing", h)

View file

@ -123,7 +123,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
}
// compute pack size using index entries
c.packs = c.masterIndex.PackSize(ctx, false)
c.packs = pack.Size(ctx, c.masterIndex, false)
debug.Log("checking for duplicate packs")
for packID := range c.packs {
@ -452,7 +452,7 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, blobs []r
sort.Slice(blobs, func(i, j int) bool {
return blobs[i].Offset < blobs[j].Offset
})
idxHdrSize := pack.HeaderSize + len(blobs)*int(pack.EntrySize)
idxHdrSize := pack.CalculateHeaderSize(blobs)
lastBlobEnd := 0
nonContinuousPack := false
for _, blob := range blobs {
@ -542,7 +542,7 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, blobs []r
// Check if blob is contained in index and position is correct
idxHas := false
for _, pb := range idx.Lookup(blob.BlobHandle) {
if pb.PackID == id && pb.Offset == blob.Offset && pb.Length == blob.Length {
if pb.PackID == id && pb.Blob == blob {
idxHas = true
break
}

View file

@ -23,6 +23,8 @@ type Fataler interface {
// IsFatal returns true if err is a fatal message that should be printed to the
// user. Then, the program should exit.
func IsFatal(err error) bool {
// unwrap "Wrap" method
err = Cause(err)
e, ok := err.(Fataler)
return ok && e.Fatal()
}

View file

@ -0,0 +1,22 @@
package errors_test
import (
"testing"
"github.com/restic/restic/internal/errors"
)
func TestFatal(t *testing.T) {
for _, v := range []struct {
err error
expected bool
}{
{errors.Fatal("broken"), true},
{errors.Fatalf("broken %d", 42), true},
{errors.New("error"), false},
} {
if errors.IsFatal(v.err) != v.expected {
t.Fatalf("IsFatal for %q, expected: %v, got: %v", v.err, v.expected, errors.IsFatal(v.err))
}
}
}

View file

@ -1,6 +1,7 @@
package pack
import (
"context"
"encoding/binary"
"fmt"
"io"
@ -46,7 +47,7 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error)
return n, errors.Wrap(err, "Write")
}
var EntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
var entrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
// headerEntry describes the format of header entries. It serves only as
// documentation.
@ -88,7 +89,7 @@ func (p *Packer) Finalize() (uint, error) {
bytesWritten += uint(hdrBytes)
// write length
err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(EntrySize))))
err = binary.Write(p.wr, binary.LittleEndian, uint32(hdrBytes))
if err != nil {
return 0, errors.Wrap(err, "binary.Write")
}
@ -100,7 +101,7 @@ func (p *Packer) Finalize() (uint, error) {
// makeHeader constructs the header for p.
func (p *Packer) makeHeader() ([]byte, error) {
buf := make([]byte, 0, len(p.blobs)*int(EntrySize))
buf := make([]byte, 0, len(p.blobs)*int(entrySize))
for _, b := range p.blobs {
switch b.Type {
@ -151,14 +152,14 @@ func (p *Packer) String() string {
var (
// we require at least one entry in the header, and one blob for a pack file
minFileSize = EntrySize + crypto.Extension + uint(headerLengthSize)
minFileSize = entrySize + crypto.Extension + uint(headerLengthSize)
)
const (
// size of the header-length field at the end of the file; it is a uint32
headerLengthSize = 4
// HeaderSize is the header's constant overhead (independent of #entries)
HeaderSize = headerLengthSize + crypto.Extension
// headerSize is the header's constant overhead (independent of #entries)
headerSize = headerLengthSize + crypto.Extension
// MaxHeaderSize is the max size of header including header-length field
MaxHeaderSize = 16*1024*1024 + headerLengthSize
@ -172,7 +173,7 @@ const (
// the appropriate size.
func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
var bufsize int
bufsize += max * int(EntrySize)
bufsize += max * int(entrySize)
bufsize += crypto.Extension
bufsize += headerLengthSize
@ -196,7 +197,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
err = InvalidFileError{Message: "header length is zero"}
case hlen < crypto.Extension:
err = InvalidFileError{Message: "header length is too small"}
case (hlen-crypto.Extension)%uint32(EntrySize) != 0:
case (hlen-crypto.Extension)%uint32(entrySize) != 0:
err = InvalidFileError{Message: "header length is invalid"}
case int64(hlen) > size-int64(headerLengthSize):
err = InvalidFileError{Message: "header is larger than file"}
@ -207,7 +208,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
return nil, 0, errors.Wrap(err, "readHeader")
}
total := (int(hlen) - crypto.Extension) / int(EntrySize)
total := (int(hlen) - crypto.Extension) / int(entrySize)
if total < max {
// truncate to the beginning of the pack header
b = b[len(b)-int(hlen):]
@ -273,7 +274,7 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
return nil, 0, err
}
entries = make([]restic.Blob, 0, uint(len(buf))/EntrySize)
entries = make([]restic.Blob, 0, uint(len(buf))/entrySize)
pos := uint(0)
for len(buf) > 0 {
@ -285,23 +286,18 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
entries = append(entries, entry)
pos += entry.Length
buf = buf[EntrySize:]
buf = buf[entrySize:]
}
return entries, hdrSize, nil
}
// PackedSizeOfBlob returns the size a blob actually uses when saved in a pack
func PackedSizeOfBlob(blobLength uint) uint {
return blobLength + EntrySize
}
func parseHeaderEntry(p []byte) (b restic.Blob, err error) {
if uint(len(p)) < EntrySize {
if uint(len(p)) < entrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, err
}
p = p[:EntrySize]
p = p[:entrySize]
switch p[0] {
case 0:
@ -317,3 +313,28 @@ func parseHeaderEntry(p []byte) (b restic.Blob, err error) {
return b, nil
}
func CalculateHeaderSize(blobs []restic.Blob) int {
return headerSize + len(blobs)*int(entrySize)
}
// Size returns the size of all packs computed by index information.
// If onlyHdr is set to true, only the size of the header is returned
// Note that this function only gives correct sizes, if there are no
// duplicates in the index.
func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.ID]int64 {
packSize := make(map[restic.ID]int64)
for blob := range mi.Each(ctx) {
size, ok := packSize[blob.PackID]
if !ok {
size = headerSize
}
if !onlyHdr {
size += int64(blob.Length)
}
packSize[blob.PackID] = size + int64(entrySize)
}
return packSize
}

View file

@ -41,7 +41,7 @@ func TestParseHeaderEntry(t *testing.T) {
buf.Reset()
_ = binary.Write(buf, binary.LittleEndian, &h)
b, err = parseHeaderEntry(buf.Bytes()[:EntrySize-1])
b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1])
rtest.Assert(t, err != nil, "no error for short input")
}
@ -58,7 +58,7 @@ func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
func TestReadHeaderEagerLoad(t *testing.T) {
testReadHeader := func(dataSize, entryCount, expectedReadInvocationCount int) {
expectedHeader := rtest.Random(0, entryCount*int(EntrySize)+crypto.Extension)
expectedHeader := rtest.Random(0, entryCount*int(entrySize)+crypto.Extension)
buf := &bytes.Buffer{}
buf.Write(rtest.Random(0, dataSize)) // pack blobs data
@ -83,8 +83,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
testReadHeader(100, eagerEntries+1, 2)
// file size == eager header load size
eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension)
headerSize := int(1*EntrySize) + crypto.Extension
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension)
headerSize := int(1*entrySize) + crypto.Extension
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
testReadHeader(dataSize-1, 1, 1)
testReadHeader(dataSize, 1, 1)
@ -96,8 +96,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
func TestReadRecords(t *testing.T) {
testReadRecords := func(dataSize, entryCount, totalRecords int) {
totalHeader := rtest.Random(0, totalRecords*int(EntrySize)+crypto.Extension)
off := len(totalHeader) - (entryCount*int(EntrySize) + crypto.Extension)
totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension)
off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension)
if off < 0 {
off = 0
}
@ -127,8 +127,8 @@ func TestReadRecords(t *testing.T) {
testReadRecords(100, eagerEntries, eagerEntries+1)
// file size == eager header load size
eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension)
headerSize := int(1*EntrySize) + crypto.Extension
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension)
headerSize := int(1*entrySize) + crypto.Extension
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
testReadRecords(dataSize-1, 1, 1)
testReadRecords(dataSize, 1, 1)

View file

@ -5,7 +5,6 @@ import (
"context"
"crypto/rand"
"crypto/sha256"
"encoding/binary"
"encoding/json"
"io"
"testing"
@ -54,17 +53,18 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSi
for _, buf := range bufs {
written += len(buf.data)
}
// header length + header + header crypto
headerSize := binary.Size(uint32(0)) + restic.CiphertextLength(len(bufs)*int(pack.EntrySize))
written += headerSize
// check length
rtest.Equals(t, uint(written), packSize)
// read and parse it again
entries, hdrSize, err := pack.List(k, rd, int64(packSize))
rtest.OK(t, err)
rtest.Equals(t, len(entries), len(bufs))
// check the head size calculation for consistency
headerSize := pack.CalculateHeaderSize(entries)
written += headerSize
// check length
rtest.Equals(t, uint(written), packSize)
rtest.Equals(t, headerSize, int(hdrSize))
var buf []byte

View file

@ -543,7 +543,7 @@ func (idx *Index) merge(idx2 *Index) error {
m.foreachWithID(e2.id, func(e *indexEntry) {
b := idx.toPackedBlob(e, restic.BlobType(typ))
b2 := idx2.toPackedBlob(e2, restic.BlobType(typ))
if b.Length == b2.Length && b.Offset == b2.Offset && b.PackID == b2.PackID {
if b == b2 {
found = true
}
})

View file

@ -53,7 +53,7 @@ func ForAllIndexes(ctx context.Context, repo restic.Repository,
var idx *Index
oldFormat := false
buf, err = repo.LoadAndDecrypt(ctx, buf[:0], restic.IndexFile, fi.ID)
buf, err = repo.LoadUnpacked(ctx, buf[:0], restic.IndexFile, fi.ID)
if err == nil {
idx, oldFormat, err = DecodeIndex(buf, fi.ID)
}

View file

@ -262,7 +262,7 @@ func AddKey(ctx context.Context, s *Repository, password, username, hostname str
}
nonce := crypto.NewRandomNonce()
ciphertext := make([]byte, 0, len(buf)+newkey.user.Overhead()+newkey.user.NonceSize())
ciphertext := make([]byte, 0, restic.CiphertextLength(len(buf)))
ciphertext = append(ciphertext, nonce...)
ciphertext = newkey.user.Seal(ciphertext, nonce, buf, nil)
newkey.Data = ciphertext

View file

@ -6,7 +6,6 @@ import (
"sync"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress"
"golang.org/x/sync/errgroup"
@ -131,27 +130,6 @@ func (mi *MasterIndex) Packs(packBlacklist restic.IDSet) restic.IDSet {
return packs
}
// PackSize returns the size of all packs computed by index information.
// If onlyHdr is set to true, only the size of the header is returned
// Note that this function only gives correct sizes, if there are no
// duplicates in the index.
func (mi *MasterIndex) PackSize(ctx context.Context, onlyHdr bool) map[restic.ID]int64 {
packSize := make(map[restic.ID]int64)
for blob := range mi.Each(ctx) {
size, ok := packSize[blob.PackID]
if !ok {
size = pack.HeaderSize
}
if !onlyHdr {
size += int64(blob.Length)
}
packSize[blob.PackID] = size + int64(pack.EntrySize)
}
return packSize
}
// Count returns the number of blobs of type t in the index.
func (mi *MasterIndex) Count(t restic.BlobType) (n uint) {
mi.idxMutex.RLock()

View file

@ -86,10 +86,10 @@ func (r *Repository) PrefixLength(ctx context.Context, t restic.FileType) (int,
return restic.PrefixLength(ctx, r.be, t)
}
// LoadAndDecrypt loads and decrypts the file with the given type and ID, using
// LoadUnpacked loads and decrypts the file with the given type and ID, using
// the supplied buffer (which must be empty). If the buffer is nil, a new
// buffer will be allocated and returned.
func (r *Repository) LoadAndDecrypt(ctx context.Context, buf []byte, t restic.FileType, id restic.ID) ([]byte, error) {
func (r *Repository) LoadUnpacked(ctx context.Context, buf []byte, t restic.FileType, id restic.ID) ([]byte, error) {
if len(buf) != 0 {
panic("buf is not empty")
}
@ -239,7 +239,7 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.
// LoadJSONUnpacked decrypts the data and afterwards calls json.Unmarshal on
// the item.
func (r *Repository) LoadJSONUnpacked(ctx context.Context, t restic.FileType, id restic.ID, item interface{}) (err error) {
buf, err := r.LoadAndDecrypt(ctx, nil, t, id)
buf, err := r.LoadUnpacked(ctx, nil, t, id)
if err != nil {
return err
}
@ -252,10 +252,10 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo
return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe})
}
// SaveAndEncrypt encrypts data and stores it to the backend as type t. If data
// saveAndEncrypt encrypts data and stores it to the backend as type t. If data
// is small enough, it will be packed together with other small blobs.
// The caller must ensure that the id matches the data.
func (r *Repository) SaveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
debug.Log("save id %v (%v, %d bytes)", id, t, len(data))
nonce := crypto.NewRandomNonce()
@ -698,7 +698,7 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte
// only save when needed or explicitly told
if !known || storeDuplicate {
err = r.SaveAndEncrypt(ctx, t, buf, newID)
err = r.saveAndEncrypt(ctx, t, buf, newID)
}
return newID, known, err

View file

@ -218,7 +218,7 @@ func BenchmarkLoadBlob(b *testing.B) {
}
}
func BenchmarkLoadAndDecrypt(b *testing.B) {
func BenchmarkLoadUnpacked(b *testing.B) {
repo, cleanup := repository.TestRepository(b)
defer cleanup()
@ -237,7 +237,7 @@ func BenchmarkLoadAndDecrypt(b *testing.B) {
b.SetBytes(int64(length))
for i := 0; i < b.N; i++ {
data, err := repo.LoadAndDecrypt(context.TODO(), nil, restic.PackFile, storageID)
data, err := repo.LoadUnpacked(context.TODO(), nil, restic.PackFile, storageID)
rtest.OK(b, err)
// See comment in BenchmarkLoadBlob.
@ -300,7 +300,7 @@ func TestRepositoryLoadIndex(t *testing.T) {
// loadIndex loads the index id from backend and returns it.
func loadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*repository.Index, error) {
buf, err := repo.LoadAndDecrypt(ctx, nil, restic.IndexFile, id)
buf, err := repo.LoadUnpacked(ctx, nil, restic.IndexFile, id)
if err != nil {
return nil, err
}

View file

@ -78,7 +78,7 @@ func LoadConfig(ctx context.Context, r JSONUnpackedLoader) (Config, error) {
}
if cfg.Version != RepoVersion {
return Config{}, errors.New("unsupported repository version")
return Config{}, errors.Errorf("unsupported repository version %v", cfg.Version)
}
if checkPolynomial {

View file

@ -43,10 +43,10 @@ type Repository interface {
SaveJSONUnpacked(context.Context, FileType, interface{}) (ID, error)
LoadJSONUnpacked(ctx context.Context, t FileType, id ID, dest interface{}) error
// LoadAndDecrypt loads and decrypts the file with the given type and ID,
// LoadUnpacked loads and decrypts the file with the given type and ID,
// using the supplied buffer (which must be empty). If the buffer is nil, a
// new buffer will be allocated and returned.
LoadAndDecrypt(ctx context.Context, buf []byte, t FileType, id ID) (data []byte, err error)
LoadUnpacked(ctx context.Context, buf []byte, t FileType, id ID) (data []byte, err error)
LoadBlob(context.Context, BlobType, ID, []byte) ([]byte, error)
SaveBlob(context.Context, BlobType, []byte, ID, bool) (ID, bool, error)
@ -70,7 +70,6 @@ type MasterIndex interface {
Has(BlobHandle) bool
Lookup(BlobHandle) []PackedBlob
Count(BlobType) uint
PackSize(ctx context.Context, onlyHdr bool) map[ID]int64
// Each returns a channel that yields all blobs known to the index. When
// the context is cancelled, the background goroutine terminates. This

View file

@ -195,7 +195,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
if packID.Equal(pack.id) {
addBlob(blob, fileOffset)
}
fileOffset += int64(blob.Length) - crypto.Extension
fileOffset += int64(restic.PlaintextLength(int(blob.Length)))
})
if err != nil {
// restoreFiles should have caught this error before