Index: Use slices instead of maps, reduce data

This commit is contained in:
Alexander Neumann 2017-01-15 15:45:52 +01:00
parent cd9b526203
commit d40f566e41
3 changed files with 33 additions and 74 deletions

View file

@ -103,11 +103,13 @@ func runPrune(gopts GlobalOptions) error {
return err return err
} }
blobs := 0
for _, pack := range idx.Packs { for _, pack := range idx.Packs {
stats.bytes += pack.Size stats.bytes += pack.Size
blobs += len(pack.Entries)
} }
Verbosef("repository contains %v packs (%v blobs) with %v bytes\n", Verbosef("repository contains %v packs (%v blobs) with %v bytes\n",
len(idx.Packs), len(idx.Blobs), formatBytes(uint64(stats.bytes))) len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
blobCount := make(map[restic.BlobHandle]int) blobCount := make(map[restic.BlobHandle]int)
duplicateBlobs := 0 duplicateBlobs := 0
@ -164,14 +166,17 @@ func runPrune(gopts GlobalOptions) error {
// find packs that need a rewrite // find packs that need a rewrite
rewritePacks := restic.NewIDSet() rewritePacks := restic.NewIDSet()
for h, blob := range idx.Blobs { for _, pack := range idx.Packs {
for _, blob := range pack.Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !usedBlobs.Has(h) { if !usedBlobs.Has(h) {
rewritePacks.Merge(blob.Packs) rewritePacks.Insert(pack.ID)
continue continue
} }
if blobCount[h] > 1 { if blobCount[h] > 1 {
rewritePacks.Merge(blob.Packs) rewritePacks.Insert(pack.ID)
}
} }
} }

View file

@ -14,27 +14,20 @@ import (
// Pack contains information about the contents of a pack. // Pack contains information about the contents of a pack.
type Pack struct { type Pack struct {
ID restic.ID
Size int64 Size int64
Entries []restic.Blob Entries []restic.Blob
} }
// Blob contains information about a blob.
type Blob struct {
Size int64
Packs restic.IDSet
}
// Index contains information about blobs and packs stored in a repo. // Index contains information about blobs and packs stored in a repo.
type Index struct { type Index struct {
Packs map[restic.ID]Pack Packs map[restic.ID]Pack
Blobs map[restic.BlobHandle]Blob
IndexIDs restic.IDSet IndexIDs restic.IDSet
} }
func newIndex() *Index { func newIndex() *Index {
return &Index{ return &Index{
Packs: make(map[restic.ID]Pack), Packs: make(map[restic.ID]Pack),
Blobs: make(map[restic.BlobHandle]Blob),
IndexIDs: restic.NewIDSet(), IndexIDs: restic.NewIDSet(),
} }
} }
@ -70,7 +63,7 @@ func New(repo restic.Repository, p *restic.Progress) (*Index, error) {
return nil, err return nil, err
} }
p := Pack{Entries: j.Entries(), Size: j.Size()} p := Pack{ID: packID, Entries: j.Entries(), Size: j.Size()}
idx.Packs[packID] = p idx.Packs[packID] = p
} }
@ -181,18 +174,6 @@ func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error
idx.Packs[id] = Pack{Size: size, Entries: entries} idx.Packs[id] = Pack{Size: size, Entries: entries}
for _, entry := range entries {
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
if _, ok := idx.Blobs[h]; !ok {
idx.Blobs[h] = Blob{
Size: int64(entry.Length),
Packs: restic.NewIDSet(),
}
}
idx.Blobs[h].Packs.Insert(id)
}
return nil return nil
} }
@ -202,15 +183,6 @@ func (idx *Index) RemovePack(id restic.ID) error {
return errors.Errorf("pack %v not found in the index", id.Str()) return errors.Errorf("pack %v not found in the index", id.Str())
} }
for _, blob := range idx.Packs[id].Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
idx.Blobs[h].Packs.Delete(id)
if len(idx.Blobs[h].Packs) == 0 {
delete(idx.Blobs, h)
}
}
delete(idx.Packs, id) delete(idx.Packs, id)
return nil return nil
@ -239,16 +211,13 @@ func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) {
func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) { func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) {
packs = restic.NewIDSet() packs = restic.NewIDSet()
for h := range blobs { for id, p := range idx.Packs {
blob, ok := idx.Blobs[h] for _, entry := range p.Entries {
if !ok { if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) {
continue
}
for id := range blob.Packs {
packs.Insert(id) packs.Insert(id)
} }
} }
}
return packs return packs
} }
@ -264,33 +233,22 @@ type Location struct {
var ErrBlobNotFound = errors.New("blob not found in index") var ErrBlobNotFound = errors.New("blob not found in index")
// FindBlob returns a list of packs and positions the blob can be found in. // FindBlob returns a list of packs and positions the blob can be found in.
func (idx *Index) FindBlob(h restic.BlobHandle) ([]Location, error) { func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) {
blob, ok := idx.Blobs[h] for id, p := range idx.Packs {
if !ok { for _, entry := range p.Entries {
if entry.ID.Equal(h.ID) && entry.Type == h.Type {
result = append(result, Location{
PackID: id,
Blob: entry,
})
}
}
}
if len(result) == 0 {
return nil, ErrBlobNotFound return nil, ErrBlobNotFound
} }
result := make([]Location, 0, len(blob.Packs))
for packID := range blob.Packs {
pack, ok := idx.Packs[packID]
if !ok {
return nil, errors.Errorf("pack %v not found in index", packID.Str())
}
for _, entry := range pack.Entries {
if entry.Type != h.Type {
continue
}
if !entry.ID.Equal(h.ID) {
continue
}
loc := Location{PackID: packID, Blob: entry}
result = append(result, loc)
}
}
return result, nil return result, nil
} }

View file

@ -151,7 +151,7 @@ func TestIndexDuplicateBlobs(t *testing.T) {
if len(dups) == 0 { if len(dups) == 0 {
t.Errorf("no duplicate blobs found") t.Errorf("no duplicate blobs found")
} }
t.Logf("%d packs, %d unique blobs", len(idx.Packs), len(idx.Blobs)) t.Logf("%d packs, %d duplicate blobs", len(idx.Packs), len(dups))
packs := idx.PacksForBlobs(dups) packs := idx.PacksForBlobs(dups)
if len(packs) == 0 { if len(packs) == 0 {
@ -249,10 +249,6 @@ func TestIndexAddRemovePack(t *testing.T) {
if err == nil { if err == nil {
t.Errorf("removed blob %v found in index", h) t.Errorf("removed blob %v found in index", h)
} }
if _, ok := idx.Blobs[h]; ok {
t.Errorf("removed blob %v found in index.Blobs", h)
}
} }
} }