Merge pull request #3101 from aawsome/packsizes

Compute packsizes in MasterIndex
This commit is contained in:
Alexander Neumann 2020-11-22 15:49:19 +01:00 committed by GitHub
commit c844580e0f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 86 additions and 72 deletions

View file

@ -8,7 +8,6 @@ import (
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
@ -233,7 +232,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
// iterate over all blobs in index to find out which blobs are duplicates // iterate over all blobs in index to find out which blobs are duplicates
for blob := range repo.Index().Each(ctx) { for blob := range repo.Index().Each(ctx) {
bh := blob.Handle() bh := blob.Handle()
size := uint64(pack.PackedSizeOfBlob(blob.Length)) size := uint64(blob.Length)
switch { switch {
case usedBlobs.Has(bh): // used blob, move to keepBlobs case usedBlobs.Has(bh): // used blob, move to keepBlobs
usedBlobs.Delete(bh) usedBlobs.Delete(bh)
@ -261,19 +260,28 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
indexPack := make(map[restic.ID]packInfo) indexPack := make(map[restic.ID]packInfo)
// save computed pack header size
for pid, hdrSize := range repo.Index().PackSize(ctx, true) {
// initialize tpe with NumBlobTypes to indicate it's not set
indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
}
// iterate over all blobs in index to generate packInfo // iterate over all blobs in index to generate packInfo
for blob := range repo.Index().Each(ctx) { for blob := range repo.Index().Each(ctx) {
ip, ok := indexPack[blob.PackID] ip := indexPack[blob.PackID]
if !ok {
ip = packInfo{tpe: blob.Type, usedSize: pack.HeaderSize} // Set blob type if not yet set
if ip.tpe == restic.NumBlobTypes {
ip.tpe = blob.Type
} }
// mark mixed packs with "Invalid blob type" // mark mixed packs with "Invalid blob type"
if ip.tpe != blob.Type { if ip.tpe != blob.Type {
ip.tpe = restic.InvalidBlob ip.tpe = restic.InvalidBlob
} }
bh := blob.Handle() bh := blob.Handle()
size := uint64(pack.PackedSizeOfBlob(blob.Length)) size := uint64(blob.Length)
switch { switch {
case duplicateBlobs.Has(bh): // duplicate blob case duplicateBlobs.Has(bh): // duplicate blob
ip.usedSize += size ip.usedSize += size

View file

@ -1,7 +1,6 @@
package main package main
import ( import (
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
@ -91,17 +90,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor
} }
Verbosef("getting pack files to read...\n") Verbosef("getting pack files to read...\n")
packSizeFromIndex := repo.Index().PackSize(ctx, false)
// Compute size of each pack from index entries
packSizeFromIndex := make(map[restic.ID]int64)
for blob := range repo.Index().Each(ctx) {
size, ok := packSizeFromIndex[blob.PackID]
if !ok {
size = pack.HeaderSize
}
// update packSizeFromIndex
packSizeFromIndex[blob.PackID] = size + int64(pack.PackedSizeOfBlob(blob.Length))
}
err = repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error { err = repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
size, ok := packSizeFromIndex[id] size, ok := packSizeFromIndex[id]

View file

@ -178,13 +178,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
c.masterIndex.MergeFinalIndexes() c.masterIndex.MergeFinalIndexes()
// compute pack size using index entries // compute pack size using index entries
for blob := range c.masterIndex.Each(ctx) { c.packs = c.masterIndex.PackSize(ctx, false)
size, ok := c.packs[blob.PackID]
if !ok {
size = pack.HeaderSize
}
c.packs[blob.PackID] = size + int64(pack.PackedSizeOfBlob(blob.Length))
}
debug.Log("checking for duplicate packs") debug.Log("checking for duplicate packs")
for packID := range c.packs { for packID := range c.packs {
@ -749,17 +743,17 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, size int6
return errors.Errorf("Pack size does not match, want %v, got %v", size, realSize) return errors.Errorf("Pack size does not match, want %v, got %v", size, realSize)
} }
blobs, err := pack.List(r.Key(), packfile, size) blobs, hdrSize, err := pack.List(r.Key(), packfile, size)
if err != nil { if err != nil {
return err return err
} }
var errs []error var errs []error
var buf []byte var buf []byte
sizeFromBlobs := int64(pack.HeaderSize) // pack size computed only from blob information sizeFromBlobs := uint(hdrSize)
idx := r.Index() idx := r.Index()
for i, blob := range blobs { for i, blob := range blobs {
sizeFromBlobs += int64(pack.PackedSizeOfBlob(blob.Length)) sizeFromBlobs += blob.Length
debug.Log(" check blob %d: %v", i, blob) debug.Log(" check blob %d: %v", i, blob)
buf = buf[:cap(buf)] buf = buf[:cap(buf)]
@ -809,7 +803,7 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, size int6
} }
} }
if sizeFromBlobs != size { if int64(sizeFromBlobs) != size {
debug.Log("Pack size does not match, want %v, got %v", size, sizeFromBlobs) debug.Log("Pack size does not match, want %v, got %v", size, sizeFromBlobs)
errs = append(errs, errors.Errorf("Pack size does not match, want %v, got %v", size, sizeFromBlobs)) errs = append(errs, errors.Errorf("Pack size does not match, want %v, got %v", size, sizeFromBlobs))
} }

View file

@ -46,7 +46,7 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error)
return n, errors.Wrap(err, "Write") return n, errors.Wrap(err, "Write")
} }
var entrySize = uint(binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{})) var EntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
// headerEntry describes the format of header entries. It serves only as // headerEntry describes the format of header entries. It serves only as
// documentation. // documentation.
@ -88,7 +88,7 @@ func (p *Packer) Finalize() (uint, error) {
bytesWritten += uint(hdrBytes) bytesWritten += uint(hdrBytes)
// write length // write length
err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(entrySize)))) err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(EntrySize))))
if err != nil { if err != nil {
return 0, errors.Wrap(err, "binary.Write") return 0, errors.Wrap(err, "binary.Write")
} }
@ -100,7 +100,7 @@ func (p *Packer) Finalize() (uint, error) {
// makeHeader constructs the header for p. // makeHeader constructs the header for p.
func (p *Packer) makeHeader() ([]byte, error) { func (p *Packer) makeHeader() ([]byte, error) {
buf := make([]byte, 0, len(p.blobs)*int(entrySize)) buf := make([]byte, 0, len(p.blobs)*int(EntrySize))
for _, b := range p.blobs { for _, b := range p.blobs {
switch b.Type { switch b.Type {
@ -151,7 +151,7 @@ func (p *Packer) String() string {
var ( var (
// we require at least one entry in the header, and one blob for a pack file // we require at least one entry in the header, and one blob for a pack file
minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) minFileSize = EntrySize + crypto.Extension + uint(headerLengthSize)
) )
const ( const (
@ -171,7 +171,7 @@ const (
// the appropriate size. // the appropriate size.
func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
var bufsize int var bufsize int
bufsize += max * int(entrySize) bufsize += max * int(EntrySize)
bufsize += crypto.Extension bufsize += crypto.Extension
bufsize += headerLengthSize bufsize += headerLengthSize
@ -195,7 +195,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
err = InvalidFileError{Message: "header length is zero"} err = InvalidFileError{Message: "header length is zero"}
case hlen < crypto.Extension: case hlen < crypto.Extension:
err = InvalidFileError{Message: "header length is too small"} err = InvalidFileError{Message: "header length is too small"}
case (hlen-crypto.Extension)%uint32(entrySize) != 0: case (hlen-crypto.Extension)%uint32(EntrySize) != 0:
err = InvalidFileError{Message: "header length is invalid"} err = InvalidFileError{Message: "header length is invalid"}
case int64(hlen) > size-int64(headerLengthSize): case int64(hlen) > size-int64(headerLengthSize):
err = InvalidFileError{Message: "header is larger than file"} err = InvalidFileError{Message: "header is larger than file"}
@ -206,7 +206,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
return nil, 0, errors.Wrap(err, "readHeader") return nil, 0, errors.Wrap(err, "readHeader")
} }
total := (int(hlen) - crypto.Extension) / int(entrySize) total := (int(hlen) - crypto.Extension) / int(EntrySize)
if total < max { if total < max {
// truncate to the beginning of the pack header // truncate to the beginning of the pack header
b = b[len(b)-int(hlen):] b = b[len(b)-int(hlen):]
@ -252,52 +252,55 @@ func (e InvalidFileError) Error() string {
return e.Message return e.Message
} }
// List returns the list of entries found in a pack file. // List returns the list of entries found in a pack file and the length of the
func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, err error) { // header (including header size and crypto overhead)
func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdrSize uint32, err error) {
buf, err := readHeader(rd, size) buf, err := readHeader(rd, size)
if err != nil { if err != nil {
return nil, err return nil, 0, err
} }
if len(buf) < k.NonceSize()+k.Overhead() { if len(buf) < k.NonceSize()+k.Overhead() {
return nil, errors.New("invalid header, too small") return nil, 0, errors.New("invalid header, too small")
} }
hdrSize = headerLengthSize + uint32(len(buf))
nonce, buf := buf[:k.NonceSize()], buf[k.NonceSize():] nonce, buf := buf[:k.NonceSize()], buf[k.NonceSize():]
buf, err = k.Open(buf[:0], nonce, buf, nil) buf, err = k.Open(buf[:0], nonce, buf, nil)
if err != nil { if err != nil {
return nil, err return nil, 0, err
} }
entries = make([]restic.Blob, 0, uint(len(buf))/entrySize) entries = make([]restic.Blob, 0, uint(len(buf))/EntrySize)
pos := uint(0) pos := uint(0)
for len(buf) > 0 { for len(buf) > 0 {
entry, err := parseHeaderEntry(buf) entry, err := parseHeaderEntry(buf)
if err != nil { if err != nil {
return nil, err return nil, 0, err
} }
entry.Offset = pos entry.Offset = pos
entries = append(entries, entry) entries = append(entries, entry)
pos += entry.Length pos += entry.Length
buf = buf[entrySize:] buf = buf[EntrySize:]
} }
return entries, nil return entries, hdrSize, nil
} }
// PackedSizeOfBlob returns the size a blob actually uses when saved in a pack // PackedSizeOfBlob returns the size a blob actually uses when saved in a pack
func PackedSizeOfBlob(blobLength uint) uint { func PackedSizeOfBlob(blobLength uint) uint {
return blobLength + entrySize return blobLength + EntrySize
} }
func parseHeaderEntry(p []byte) (b restic.Blob, err error) { func parseHeaderEntry(p []byte) (b restic.Blob, err error) {
if uint(len(p)) < entrySize { if uint(len(p)) < EntrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, err return b, err
} }
p = p[:entrySize] p = p[:EntrySize]
switch p[0] { switch p[0] {
case 0: case 0:

View file

@ -41,7 +41,7 @@ func TestParseHeaderEntry(t *testing.T) {
buf.Reset() buf.Reset()
_ = binary.Write(buf, binary.LittleEndian, &h) _ = binary.Write(buf, binary.LittleEndian, &h)
b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1]) b, err = parseHeaderEntry(buf.Bytes()[:EntrySize-1])
rtest.Assert(t, err != nil, "no error for short input") rtest.Assert(t, err != nil, "no error for short input")
} }
@ -58,7 +58,7 @@ func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
func TestReadHeaderEagerLoad(t *testing.T) { func TestReadHeaderEagerLoad(t *testing.T) {
testReadHeader := func(dataSize, entryCount, expectedReadInvocationCount int) { testReadHeader := func(dataSize, entryCount, expectedReadInvocationCount int) {
expectedHeader := rtest.Random(0, entryCount*int(entrySize)+crypto.Extension) expectedHeader := rtest.Random(0, entryCount*int(EntrySize)+crypto.Extension)
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
buf.Write(rtest.Random(0, dataSize)) // pack blobs data buf.Write(rtest.Random(0, dataSize)) // pack blobs data
@ -83,8 +83,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
testReadHeader(100, eagerEntries+1, 2) testReadHeader(100, eagerEntries+1, 2)
// file size == eager header load size // file size == eager header load size
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension) eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension)
headerSize := int(1*entrySize) + crypto.Extension headerSize := int(1*EntrySize) + crypto.Extension
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0)) dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
testReadHeader(dataSize-1, 1, 1) testReadHeader(dataSize-1, 1, 1)
testReadHeader(dataSize, 1, 1) testReadHeader(dataSize, 1, 1)
@ -96,8 +96,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
func TestReadRecords(t *testing.T) { func TestReadRecords(t *testing.T) {
testReadRecords := func(dataSize, entryCount, totalRecords int) { testReadRecords := func(dataSize, entryCount, totalRecords int) {
totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension) totalHeader := rtest.Random(0, totalRecords*int(EntrySize)+crypto.Extension)
off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension) off := len(totalHeader) - (entryCount*int(EntrySize) + crypto.Extension)
if off < 0 { if off < 0 {
off = 0 off = 0
} }
@ -127,8 +127,8 @@ func TestReadRecords(t *testing.T) {
testReadRecords(100, eagerEntries, eagerEntries+1) testReadRecords(100, eagerEntries, eagerEntries+1)
// file size == eager header load size // file size == eager header load size
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension) eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension)
headerSize := int(1*entrySize) + crypto.Extension headerSize := int(1*EntrySize) + crypto.Extension
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0)) dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
testReadRecords(dataSize-1, 1, 1) testReadRecords(dataSize-1, 1, 1)
testReadRecords(dataSize, 1, 1) testReadRecords(dataSize, 1, 1)

View file

@ -53,19 +53,18 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSi
for _, buf := range bufs { for _, buf := range bufs {
written += len(buf.data) written += len(buf.data)
} }
// header length // header length + header + header crypto
written += binary.Size(uint32(0)) headerSize := binary.Size(uint32(0)) + restic.CiphertextLength(len(bufs)*int(pack.EntrySize))
// header + header crypto written += headerSize
headerSize := len(bufs) * (binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{}))
written += restic.CiphertextLength(headerSize)
// check length // check length
rtest.Equals(t, uint(written), packSize) rtest.Equals(t, uint(written), packSize)
// read and parse it again // read and parse it again
entries, err := pack.List(k, rd, int64(packSize)) entries, hdrSize, err := pack.List(k, rd, int64(packSize))
rtest.OK(t, err) rtest.OK(t, err)
rtest.Equals(t, len(entries), len(bufs)) rtest.Equals(t, len(entries), len(bufs))
rtest.Equals(t, headerSize, int(hdrSize))
var buf []byte var buf []byte
for i, b := range bufs { for i, b := range bufs {

View file

@ -5,6 +5,7 @@ import (
"sync" "sync"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress" "github.com/restic/restic/internal/ui/progress"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
@ -111,6 +112,27 @@ func (mi *MasterIndex) Packs() restic.IDSet {
return packs return packs
} }
// PackSize returns the size of all packs computed by index information.
// If onlyHdr is set to true, only the size of the header is returned
// Note that this function only gives correct sizes, if there are no
// duplicates in the index.
func (mi *MasterIndex) PackSize(ctx context.Context, onlyHdr bool) map[restic.ID]int64 {
packSize := make(map[restic.ID]int64)
for blob := range mi.Each(ctx) {
size, ok := packSize[blob.PackID]
if !ok {
size = pack.HeaderSize
}
if !onlyHdr {
size += int64(blob.Length)
}
packSize[blob.PackID] = size + int64(pack.EntrySize)
}
return packSize
}
// Count returns the number of blobs of type t in the index. // Count returns the number of blobs of type t in the index.
func (mi *MasterIndex) Count(t restic.BlobType) (n uint) { func (mi *MasterIndex) Count(t restic.BlobType) (n uint) {
mi.idxMutex.RLock() mi.idxMutex.RLock()

View file

@ -92,7 +92,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee
for job := range processQueue { for job := range processQueue {
tempfile, packID, packLength := job.tempfile, job.hash, job.packLength tempfile, packID, packLength := job.tempfile, job.hash, job.packLength
blobs, err := pack.List(repo.Key(), tempfile, packLength) blobs, _, err := pack.List(repo.Key(), tempfile, packLength)
if err != nil { if err != nil {
return err return err
} }

View file

@ -740,16 +740,11 @@ func (r *Repository) List(ctx context.Context, t restic.FileType, fn func(restic
} }
// ListPack returns the list of blobs saved in the pack id and the length of // ListPack returns the list of blobs saved in the pack id and the length of
// the file as stored in the backend. // the the pack header.
func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, int64, error) { func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, uint32, error) {
h := restic.Handle{Type: restic.PackFile, Name: id.String()} h := restic.Handle{Type: restic.PackFile, Name: id.String()}
blobs, err := pack.List(r.Key(), restic.ReaderAt(ctx, r.Backend(), h), size) return pack.List(r.Key(), restic.ReaderAt(ctx, r.Backend(), h), size)
if err != nil {
return nil, 0, err
}
return blobs, size, nil
} }
// Delete calls backend.Delete() if implemented, and returns an error // Delete calls backend.Delete() if implemented, and returns an error

View file

@ -32,7 +32,10 @@ type Repository interface {
// //
// The function fn is called in the same Goroutine List() was called from. // The function fn is called in the same Goroutine List() was called from.
List(ctx context.Context, t FileType, fn func(ID, int64) error) error List(ctx context.Context, t FileType, fn func(ID, int64) error) error
ListPack(context.Context, ID, int64) ([]Blob, int64, error)
// ListPack returns the list of blobs saved in the pack id and the length of
// the the pack header.
ListPack(context.Context, ID, int64) ([]Blob, uint32, error)
Flush(context.Context) error Flush(context.Context) error
@ -63,6 +66,7 @@ type MasterIndex interface {
Lookup(ID, BlobType) []PackedBlob Lookup(ID, BlobType) []PackedBlob
Count(BlobType) uint Count(BlobType) uint
Packs() IDSet Packs() IDSet
PackSize(ctx context.Context, onlyHdr bool) map[ID]int64
// Each returns a channel that yields all blobs known to the index. When // Each returns a channel that yields all blobs known to the index. When
// the context is cancelled, the background goroutine terminates. This // the context is cancelled, the background goroutine terminates. This