Merge pull request #3786 from greatroar/prune
restic prune: Merge three loops over the index
This commit is contained in:
commit
19581dbc18
4 changed files with 42 additions and 59 deletions
|
@ -242,11 +242,26 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
|
||||
Verbosef("searching used packs...\n")
|
||||
|
||||
indexPack := make(map[restic.ID]packInfo)
|
||||
keepBlobs := restic.NewBlobSet()
|
||||
duplicateBlobs := restic.NewBlobSet()
|
||||
|
||||
// iterate over all blobs in index to find out which blobs are duplicates
|
||||
// iterate over all blobs in index to generate packInfo and find duplicates
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
ip, seen := indexPack[blob.PackID]
|
||||
|
||||
if seen {
|
||||
// mark mixed packs with "Invalid blob type"
|
||||
if ip.tpe != blob.Type {
|
||||
ip.tpe = restic.InvalidBlob
|
||||
}
|
||||
} else {
|
||||
ip = packInfo{
|
||||
tpe: blob.Type,
|
||||
usedSize: pack.HeaderSize,
|
||||
}
|
||||
}
|
||||
ip.usedSize += uint64(pack.CalculateEntrySize(blob.Blob))
|
||||
|
||||
bh := blob.BlobHandle
|
||||
size := uint64(blob.Length)
|
||||
switch {
|
||||
|
@ -255,14 +270,27 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
keepBlobs.Insert(bh)
|
||||
stats.size.used += size
|
||||
stats.blobs.used++
|
||||
case keepBlobs.Has(bh): // duplicate blob
|
||||
duplicateBlobs.Insert(bh)
|
||||
ip.usedSize += size
|
||||
ip.usedBlobs++
|
||||
|
||||
case keepBlobs.Has(bh): // duplicate of a blob that we want to keep
|
||||
stats.size.duplicate += size
|
||||
stats.blobs.duplicate++
|
||||
default:
|
||||
ip.usedSize += size
|
||||
ip.duplicateBlobs++
|
||||
|
||||
default: // unused, don't care if it's a duplicate
|
||||
stats.size.unused += size
|
||||
stats.blobs.unused++
|
||||
ip.unusedSize += size
|
||||
ip.unusedBlobs++
|
||||
}
|
||||
|
||||
if !blob.IsCompressed() {
|
||||
ip.uncompressed = true
|
||||
}
|
||||
// update indexPack
|
||||
indexPack[blob.PackID] = ip
|
||||
}
|
||||
|
||||
// Check if all used blobs have been found in index
|
||||
|
@ -275,48 +303,6 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||
return errorIndexIncomplete
|
||||
}
|
||||
|
||||
indexPack := make(map[restic.ID]packInfo)
|
||||
|
||||
// save computed pack header size
|
||||
for pid, hdrSize := range pack.Size(ctx, repo.Index(), true) {
|
||||
// initialize tpe with NumBlobTypes to indicate it's not set
|
||||
indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
|
||||
}
|
||||
|
||||
// iterate over all blobs in index to generate packInfo
|
||||
for blob := range repo.Index().Each(ctx) {
|
||||
ip := indexPack[blob.PackID]
|
||||
|
||||
// Set blob type if not yet set
|
||||
if ip.tpe == restic.NumBlobTypes {
|
||||
ip.tpe = blob.Type
|
||||
}
|
||||
|
||||
// mark mixed packs with "Invalid blob type"
|
||||
if ip.tpe != blob.Type {
|
||||
ip.tpe = restic.InvalidBlob
|
||||
}
|
||||
|
||||
bh := blob.BlobHandle
|
||||
size := uint64(blob.Length)
|
||||
switch {
|
||||
case duplicateBlobs.Has(bh): // duplicate blob
|
||||
ip.usedSize += size
|
||||
ip.duplicateBlobs++
|
||||
case keepBlobs.Has(bh): // used blob, not duplicate
|
||||
ip.usedSize += size
|
||||
ip.usedBlobs++
|
||||
default: // unused blob
|
||||
ip.unusedSize += size
|
||||
ip.unusedBlobs++
|
||||
}
|
||||
if !blob.IsCompressed() {
|
||||
ip.uncompressed = true
|
||||
}
|
||||
// update indexPack
|
||||
indexPack[blob.PackID] = ip
|
||||
}
|
||||
|
||||
Verbosef("collecting packs for deletion and repacking\n")
|
||||
removePacksFirst := restic.NewIDSet()
|
||||
removePacks := restic.NewIDSet()
|
||||
|
|
|
@ -98,7 +98,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
packSizeFromIndex = pack.Size(ctx, repo.Index(), false)
|
||||
packSizeFromIndex = pack.Size(ctx, repo.Index())
|
||||
}
|
||||
|
||||
Verbosef("getting pack files to read...\n")
|
||||
|
|
|
@ -131,7 +131,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
|||
}
|
||||
|
||||
// compute pack size using index entries
|
||||
c.packs = pack.Size(ctx, c.masterIndex, false)
|
||||
c.packs = pack.Size(ctx, c.masterIndex)
|
||||
|
||||
debug.Log("checking for duplicate packs")
|
||||
for packID := range c.packs {
|
||||
|
|
|
@ -177,8 +177,8 @@ var (
|
|||
const (
|
||||
// size of the header-length field at the end of the file; it is a uint32
|
||||
headerLengthSize = 4
|
||||
// headerSize is the header's constant overhead (independent of #entries)
|
||||
headerSize = headerLengthSize + crypto.Extension
|
||||
// HeaderSize is the header's constant overhead (independent of #entries)
|
||||
HeaderSize = headerLengthSize + crypto.Extension
|
||||
|
||||
// MaxHeaderSize is the max size of header including header-length field
|
||||
MaxHeaderSize = 16*1024*1024 + headerLengthSize
|
||||
|
@ -242,7 +242,7 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
|
|||
// eagerly download eagerEntries header entries as part of header-length request.
|
||||
// only make second request if actual number of entries is greater than eagerEntries
|
||||
|
||||
eagerSize := eagerEntries*int(entrySize) + headerSize
|
||||
eagerSize := eagerEntries*int(entrySize) + HeaderSize
|
||||
b, c, err := readRecords(rd, size, eagerSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -349,7 +349,7 @@ func CalculateEntrySize(blob restic.Blob) int {
|
|||
}
|
||||
|
||||
func CalculateHeaderSize(blobs []restic.Blob) int {
|
||||
size := headerSize
|
||||
size := HeaderSize
|
||||
for _, blob := range blobs {
|
||||
size += CalculateEntrySize(blob)
|
||||
}
|
||||
|
@ -357,20 +357,17 @@ func CalculateHeaderSize(blobs []restic.Blob) int {
|
|||
}
|
||||
|
||||
// Size returns the size of all packs computed by index information.
|
||||
// If onlyHdr is set to true, only the size of the header is returned
|
||||
// Note that this function only gives correct sizes, if there are no
|
||||
// duplicates in the index.
|
||||
func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.ID]int64 {
|
||||
func Size(ctx context.Context, mi restic.MasterIndex) map[restic.ID]int64 {
|
||||
packSize := make(map[restic.ID]int64)
|
||||
|
||||
for blob := range mi.Each(ctx) {
|
||||
size, ok := packSize[blob.PackID]
|
||||
if !ok {
|
||||
size = headerSize
|
||||
}
|
||||
if !onlyHdr {
|
||||
size += int64(blob.Length)
|
||||
size = HeaderSize
|
||||
}
|
||||
size += int64(blob.Length)
|
||||
packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue