forked from TrueCloudLab/restic
Merge pull request #3101 from aawsome/packsizes
Compute packsizes in MasterIndex
This commit is contained in:
commit
c844580e0f
10 changed files with 86 additions and 72 deletions
|
@ -8,7 +8,6 @@ import (
|
|||
|
||||
"github.com/restic/restic/internal/debug"
|
||||
"github.com/restic/restic/internal/errors"
|
||||
"github.com/restic/restic/internal/pack"
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
|
||||
|
@ -233,7 +232,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
// iterate over all blobs in index to find out which blobs are duplicates
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
bh := blob.Handle()
|
||||
size := uint64(pack.PackedSizeOfBlob(blob.Length))
|
||||
size := uint64(blob.Length)
|
||||
switch {
|
||||
case usedBlobs.Has(bh): // used blob, move to keepBlobs
|
||||
usedBlobs.Delete(bh)
|
||||
|
@ -261,19 +260,28 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
|
||||
indexPack := make(map[restic.ID]packInfo)
|
||||
|
||||
// save computed pack header size
|
||||
for pid, hdrSize := range repo.Index().PackSize(ctx, true) {
|
||||
// initialize tpe with NumBlobTypes to indicate it's not set
|
||||
indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
|
||||
}
|
||||
|
||||
// iterate over all blobs in index to generate packInfo
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
ip, ok := indexPack[blob.PackID]
|
||||
if !ok {
|
||||
ip = packInfo{tpe: blob.Type, usedSize: pack.HeaderSize}
|
||||
ip := indexPack[blob.PackID]
|
||||
|
||||
// Set blob type if not yet set
|
||||
if ip.tpe == restic.NumBlobTypes {
|
||||
ip.tpe = blob.Type
|
||||
}
|
||||
|
||||
// mark mixed packs with "Invalid blob type"
|
||||
if ip.tpe != blob.Type {
|
||||
ip.tpe = restic.InvalidBlob
|
||||
}
|
||||
|
||||
bh := blob.Handle()
|
||||
size := uint64(pack.PackedSizeOfBlob(blob.Length))
|
||||
size := uint64(blob.Length)
|
||||
switch {
|
||||
case duplicateBlobs.Has(bh): // duplicate blob
|
||||
ip.usedSize += size
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"github.com/restic/restic/internal/pack"
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
|
||||
|
@ -91,17 +90,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor
|
|||
}
|
||||
|
||||
Verbosef("getting pack files to read...\n")
|
||||
|
||||
// Compute size of each pack from index entries
|
||||
packSizeFromIndex := make(map[restic.ID]int64)
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
size, ok := packSizeFromIndex[blob.PackID]
|
||||
if !ok {
|
||||
size = pack.HeaderSize
|
||||
}
|
||||
// update packSizeFromIndex
|
||||
packSizeFromIndex[blob.PackID] = size + int64(pack.PackedSizeOfBlob(blob.Length))
|
||||
}
|
||||
packSizeFromIndex := repo.Index().PackSize(ctx, false)
|
||||
|
||||
err = repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
|
||||
size, ok := packSizeFromIndex[id]
|
||||
|
|
|
@ -178,13 +178,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
|||
c.masterIndex.MergeFinalIndexes()
|
||||
|
||||
// compute pack size using index entries
|
||||
for blob := range c.masterIndex.Each(ctx) {
|
||||
size, ok := c.packs[blob.PackID]
|
||||
if !ok {
|
||||
size = pack.HeaderSize
|
||||
}
|
||||
c.packs[blob.PackID] = size + int64(pack.PackedSizeOfBlob(blob.Length))
|
||||
}
|
||||
c.packs = c.masterIndex.PackSize(ctx, false)
|
||||
|
||||
debug.Log("checking for duplicate packs")
|
||||
for packID := range c.packs {
|
||||
|
@ -749,17 +743,17 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, size int6
|
|||
return errors.Errorf("Pack size does not match, want %v, got %v", size, realSize)
|
||||
}
|
||||
|
||||
blobs, err := pack.List(r.Key(), packfile, size)
|
||||
blobs, hdrSize, err := pack.List(r.Key(), packfile, size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var errs []error
|
||||
var buf []byte
|
||||
sizeFromBlobs := int64(pack.HeaderSize) // pack size computed only from blob information
|
||||
sizeFromBlobs := uint(hdrSize)
|
||||
idx := r.Index()
|
||||
for i, blob := range blobs {
|
||||
sizeFromBlobs += int64(pack.PackedSizeOfBlob(blob.Length))
|
||||
sizeFromBlobs += blob.Length
|
||||
debug.Log(" check blob %d: %v", i, blob)
|
||||
|
||||
buf = buf[:cap(buf)]
|
||||
|
@ -809,7 +803,7 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, size int6
|
|||
}
|
||||
}
|
||||
|
||||
if sizeFromBlobs != size {
|
||||
if int64(sizeFromBlobs) != size {
|
||||
debug.Log("Pack size does not match, want %v, got %v", size, sizeFromBlobs)
|
||||
errs = append(errs, errors.Errorf("Pack size does not match, want %v, got %v", size, sizeFromBlobs))
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error)
|
|||
return n, errors.Wrap(err, "Write")
|
||||
}
|
||||
|
||||
var entrySize = uint(binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{}))
|
||||
var EntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
|
||||
|
||||
// headerEntry describes the format of header entries. It serves only as
|
||||
// documentation.
|
||||
|
@ -88,7 +88,7 @@ func (p *Packer) Finalize() (uint, error) {
|
|||
bytesWritten += uint(hdrBytes)
|
||||
|
||||
// write length
|
||||
err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(entrySize))))
|
||||
err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(EntrySize))))
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "binary.Write")
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ func (p *Packer) Finalize() (uint, error) {
|
|||
|
||||
// makeHeader constructs the header for p.
|
||||
func (p *Packer) makeHeader() ([]byte, error) {
|
||||
buf := make([]byte, 0, len(p.blobs)*int(entrySize))
|
||||
buf := make([]byte, 0, len(p.blobs)*int(EntrySize))
|
||||
|
||||
for _, b := range p.blobs {
|
||||
switch b.Type {
|
||||
|
@ -151,7 +151,7 @@ func (p *Packer) String() string {
|
|||
|
||||
var (
|
||||
// we require at least one entry in the header, and one blob for a pack file
|
||||
minFileSize = entrySize + crypto.Extension + uint(headerLengthSize)
|
||||
minFileSize = EntrySize + crypto.Extension + uint(headerLengthSize)
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -171,7 +171,7 @@ const (
|
|||
// the appropriate size.
|
||||
func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
|
||||
var bufsize int
|
||||
bufsize += max * int(entrySize)
|
||||
bufsize += max * int(EntrySize)
|
||||
bufsize += crypto.Extension
|
||||
bufsize += headerLengthSize
|
||||
|
||||
|
@ -195,7 +195,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
|
|||
err = InvalidFileError{Message: "header length is zero"}
|
||||
case hlen < crypto.Extension:
|
||||
err = InvalidFileError{Message: "header length is too small"}
|
||||
case (hlen-crypto.Extension)%uint32(entrySize) != 0:
|
||||
case (hlen-crypto.Extension)%uint32(EntrySize) != 0:
|
||||
err = InvalidFileError{Message: "header length is invalid"}
|
||||
case int64(hlen) > size-int64(headerLengthSize):
|
||||
err = InvalidFileError{Message: "header is larger than file"}
|
||||
|
@ -206,7 +206,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
|
|||
return nil, 0, errors.Wrap(err, "readHeader")
|
||||
}
|
||||
|
||||
total := (int(hlen) - crypto.Extension) / int(entrySize)
|
||||
total := (int(hlen) - crypto.Extension) / int(EntrySize)
|
||||
if total < max {
|
||||
// truncate to the beginning of the pack header
|
||||
b = b[len(b)-int(hlen):]
|
||||
|
@ -252,52 +252,55 @@ func (e InvalidFileError) Error() string {
|
|||
return e.Message
|
||||
}
|
||||
|
||||
// List returns the list of entries found in a pack file.
|
||||
func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, err error) {
|
||||
// List returns the list of entries found in a pack file and the length of the
|
||||
// header (including header size and crypto overhead)
|
||||
func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdrSize uint32, err error) {
|
||||
buf, err := readHeader(rd, size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
if len(buf) < k.NonceSize()+k.Overhead() {
|
||||
return nil, errors.New("invalid header, too small")
|
||||
return nil, 0, errors.New("invalid header, too small")
|
||||
}
|
||||
|
||||
hdrSize = headerLengthSize + uint32(len(buf))
|
||||
|
||||
nonce, buf := buf[:k.NonceSize()], buf[k.NonceSize():]
|
||||
buf, err = k.Open(buf[:0], nonce, buf, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
entries = make([]restic.Blob, 0, uint(len(buf))/entrySize)
|
||||
entries = make([]restic.Blob, 0, uint(len(buf))/EntrySize)
|
||||
|
||||
pos := uint(0)
|
||||
for len(buf) > 0 {
|
||||
entry, err := parseHeaderEntry(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, 0, err
|
||||
}
|
||||
entry.Offset = pos
|
||||
|
||||
entries = append(entries, entry)
|
||||
pos += entry.Length
|
||||
buf = buf[entrySize:]
|
||||
buf = buf[EntrySize:]
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
return entries, hdrSize, nil
|
||||
}
|
||||
|
||||
// PackedSizeOfBlob returns the size a blob actually uses when saved in a pack
|
||||
func PackedSizeOfBlob(blobLength uint) uint {
|
||||
return blobLength + entrySize
|
||||
return blobLength + EntrySize
|
||||
}
|
||||
|
||||
func parseHeaderEntry(p []byte) (b restic.Blob, err error) {
|
||||
if uint(len(p)) < entrySize {
|
||||
if uint(len(p)) < EntrySize {
|
||||
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
|
||||
return b, err
|
||||
}
|
||||
p = p[:entrySize]
|
||||
p = p[:EntrySize]
|
||||
|
||||
switch p[0] {
|
||||
case 0:
|
||||
|
|
|
@ -41,7 +41,7 @@ func TestParseHeaderEntry(t *testing.T) {
|
|||
buf.Reset()
|
||||
_ = binary.Write(buf, binary.LittleEndian, &h)
|
||||
|
||||
b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1])
|
||||
b, err = parseHeaderEntry(buf.Bytes()[:EntrySize-1])
|
||||
rtest.Assert(t, err != nil, "no error for short input")
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,7 @@ func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
|
|||
func TestReadHeaderEagerLoad(t *testing.T) {
|
||||
|
||||
testReadHeader := func(dataSize, entryCount, expectedReadInvocationCount int) {
|
||||
expectedHeader := rtest.Random(0, entryCount*int(entrySize)+crypto.Extension)
|
||||
expectedHeader := rtest.Random(0, entryCount*int(EntrySize)+crypto.Extension)
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
buf.Write(rtest.Random(0, dataSize)) // pack blobs data
|
||||
|
@ -83,8 +83,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
|
|||
testReadHeader(100, eagerEntries+1, 2)
|
||||
|
||||
// file size == eager header load size
|
||||
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension)
|
||||
headerSize := int(1*entrySize) + crypto.Extension
|
||||
eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension)
|
||||
headerSize := int(1*EntrySize) + crypto.Extension
|
||||
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
|
||||
testReadHeader(dataSize-1, 1, 1)
|
||||
testReadHeader(dataSize, 1, 1)
|
||||
|
@ -96,8 +96,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
|
|||
|
||||
func TestReadRecords(t *testing.T) {
|
||||
testReadRecords := func(dataSize, entryCount, totalRecords int) {
|
||||
totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension)
|
||||
off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension)
|
||||
totalHeader := rtest.Random(0, totalRecords*int(EntrySize)+crypto.Extension)
|
||||
off := len(totalHeader) - (entryCount*int(EntrySize) + crypto.Extension)
|
||||
if off < 0 {
|
||||
off = 0
|
||||
}
|
||||
|
@ -127,8 +127,8 @@ func TestReadRecords(t *testing.T) {
|
|||
testReadRecords(100, eagerEntries, eagerEntries+1)
|
||||
|
||||
// file size == eager header load size
|
||||
eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension)
|
||||
headerSize := int(1*entrySize) + crypto.Extension
|
||||
eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension)
|
||||
headerSize := int(1*EntrySize) + crypto.Extension
|
||||
dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0))
|
||||
testReadRecords(dataSize-1, 1, 1)
|
||||
testReadRecords(dataSize, 1, 1)
|
||||
|
|
|
@ -53,19 +53,18 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSi
|
|||
for _, buf := range bufs {
|
||||
written += len(buf.data)
|
||||
}
|
||||
// header length
|
||||
written += binary.Size(uint32(0))
|
||||
// header + header crypto
|
||||
headerSize := len(bufs) * (binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{}))
|
||||
written += restic.CiphertextLength(headerSize)
|
||||
// header length + header + header crypto
|
||||
headerSize := binary.Size(uint32(0)) + restic.CiphertextLength(len(bufs)*int(pack.EntrySize))
|
||||
written += headerSize
|
||||
|
||||
// check length
|
||||
rtest.Equals(t, uint(written), packSize)
|
||||
|
||||
// read and parse it again
|
||||
entries, err := pack.List(k, rd, int64(packSize))
|
||||
entries, hdrSize, err := pack.List(k, rd, int64(packSize))
|
||||
rtest.OK(t, err)
|
||||
rtest.Equals(t, len(entries), len(bufs))
|
||||
rtest.Equals(t, headerSize, int(hdrSize))
|
||||
|
||||
var buf []byte
|
||||
for i, b := range bufs {
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"sync"
|
||||
|
||||
"github.com/restic/restic/internal/debug"
|
||||
"github.com/restic/restic/internal/pack"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
"github.com/restic/restic/internal/ui/progress"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
@ -111,6 +112,27 @@ func (mi *MasterIndex) Packs() restic.IDSet {
|
|||
return packs
|
||||
}
|
||||
|
||||
// PackSize returns the size of all packs computed by index information.
|
||||
// If onlyHdr is set to true, only the size of the header is returned
|
||||
// Note that this function only gives correct sizes, if there are no
|
||||
// duplicates in the index.
|
||||
func (mi *MasterIndex) PackSize(ctx context.Context, onlyHdr bool) map[restic.ID]int64 {
|
||||
packSize := make(map[restic.ID]int64)
|
||||
|
||||
for blob := range mi.Each(ctx) {
|
||||
size, ok := packSize[blob.PackID]
|
||||
if !ok {
|
||||
size = pack.HeaderSize
|
||||
}
|
||||
if !onlyHdr {
|
||||
size += int64(blob.Length)
|
||||
}
|
||||
packSize[blob.PackID] = size + int64(pack.EntrySize)
|
||||
}
|
||||
|
||||
return packSize
|
||||
}
|
||||
|
||||
// Count returns the number of blobs of type t in the index.
|
||||
func (mi *MasterIndex) Count(t restic.BlobType) (n uint) {
|
||||
mi.idxMutex.RLock()
|
||||
|
|
|
@ -92,7 +92,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee
|
|||
for job := range processQueue {
|
||||
tempfile, packID, packLength := job.tempfile, job.hash, job.packLength
|
||||
|
||||
blobs, err := pack.List(repo.Key(), tempfile, packLength)
|
||||
blobs, _, err := pack.List(repo.Key(), tempfile, packLength)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -740,16 +740,11 @@ func (r *Repository) List(ctx context.Context, t restic.FileType, fn func(restic
|
|||
}
|
||||
|
||||
// ListPack returns the list of blobs saved in the pack id and the length of
|
||||
// the file as stored in the backend.
|
||||
func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, int64, error) {
|
||||
// the the pack header.
|
||||
func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, uint32, error) {
|
||||
h := restic.Handle{Type: restic.PackFile, Name: id.String()}
|
||||
|
||||
blobs, err := pack.List(r.Key(), restic.ReaderAt(ctx, r.Backend(), h), size)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
return blobs, size, nil
|
||||
return pack.List(r.Key(), restic.ReaderAt(ctx, r.Backend(), h), size)
|
||||
}
|
||||
|
||||
// Delete calls backend.Delete() if implemented, and returns an error
|
||||
|
|
|
@ -32,7 +32,10 @@ type Repository interface {
|
|||
//
|
||||
// The function fn is called in the same Goroutine List() was called from.
|
||||
List(ctx context.Context, t FileType, fn func(ID, int64) error) error
|
||||
ListPack(context.Context, ID, int64) ([]Blob, int64, error)
|
||||
|
||||
// ListPack returns the list of blobs saved in the pack id and the length of
|
||||
// the the pack header.
|
||||
ListPack(context.Context, ID, int64) ([]Blob, uint32, error)
|
||||
|
||||
Flush(context.Context) error
|
||||
|
||||
|
@ -63,6 +66,7 @@ type MasterIndex interface {
|
|||
Lookup(ID, BlobType) []PackedBlob
|
||||
Count(BlobType) uint
|
||||
Packs() IDSet
|
||||
PackSize(ctx context.Context, onlyHdr bool) map[ID]int64
|
||||
|
||||
// Each returns a channel that yields all blobs known to the index. When
|
||||
// the context is cancelled, the background goroutine terminates. This
|
||||
|
|
Loading…
Reference in a new issue