forked from TrueCloudLab/restic
Add Index structures for Blobs
This commit is contained in:
parent
4bdd59b4ad
commit
2c517e4a33
2 changed files with 72 additions and 15 deletions
|
@ -17,14 +17,22 @@ type Pack struct {
|
|||
Entries []pack.Blob
|
||||
}
|
||||
|
||||
// Blob contains informaiton about a blob.
|
||||
type Blob struct {
|
||||
Size int64
|
||||
Packs backend.IDSet
|
||||
}
|
||||
|
||||
// Index contains information about blobs and packs stored in a repo.
|
||||
type Index struct {
|
||||
Packs map[backend.ID]Pack
|
||||
Blobs map[pack.Handle]Blob
|
||||
}
|
||||
|
||||
func newIndex() *Index {
|
||||
return &Index{
|
||||
Packs: make(map[backend.ID]Pack),
|
||||
Blobs: make(map[pack.Handle]Blob),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,9 +57,11 @@ func New(repo *repository.Repository) (*Index, error) {
|
|||
|
||||
debug.Log("Index.New", "pack %v contains %d blobs", packID.Str(), len(j.Entries))
|
||||
|
||||
if _, ok := idx.Packs[packID]; ok {
|
||||
return nil, fmt.Errorf("pack %v processed twice", packID.Str())
|
||||
err := idx.AddPack(packID, j.Size, j.Entries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p := Pack{Entries: j.Entries, Size: j.Size}
|
||||
idx.Packs[packID] = p
|
||||
}
|
||||
|
@ -100,6 +110,8 @@ func Load(repo *repository.Repository) (*Index, error) {
|
|||
supersedes := make(map[backend.ID]backend.IDSet)
|
||||
results := make(map[backend.ID]map[backend.ID]Pack)
|
||||
|
||||
index := newIndex()
|
||||
|
||||
for id := range repo.List(backend.Index, done) {
|
||||
debug.Log("index.Load", "Load index %v", id.Str())
|
||||
idx, err := loadIndexJSON(repo, id)
|
||||
|
@ -115,7 +127,7 @@ func Load(repo *repository.Repository) (*Index, error) {
|
|||
}
|
||||
|
||||
for _, jpack := range idx.Packs {
|
||||
P := Pack{}
|
||||
entries := make([]pack.Blob, 0, len(jpack.Blobs))
|
||||
for _, blob := range jpack.Blobs {
|
||||
entry := pack.Blob{
|
||||
ID: blob.ID,
|
||||
|
@ -123,9 +135,12 @@ func Load(repo *repository.Repository) (*Index, error) {
|
|||
Offset: blob.Offset,
|
||||
Length: blob.Length,
|
||||
}
|
||||
P.Entries = append(P.Entries, entry)
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
if err = index.AddPack(jpack.ID, 0, entries); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res[jpack.ID] = P
|
||||
}
|
||||
|
||||
results[id] = res
|
||||
|
@ -138,27 +153,44 @@ func Load(repo *repository.Repository) (*Index, error) {
|
|||
}
|
||||
}
|
||||
|
||||
idx := newIndex()
|
||||
for _, packs := range results {
|
||||
for id, pack := range packs {
|
||||
idx.Packs[id] = pack
|
||||
return index, nil
|
||||
}
|
||||
|
||||
// AddPack adds a pack to the index. If this pack is already in the index, an
|
||||
// error is returned.
|
||||
func (idx *Index) AddPack(id backend.ID, size int64, entries []pack.Blob) error {
|
||||
if _, ok := idx.Packs[id]; ok {
|
||||
return fmt.Errorf("pack %v already present in the index", id.Str())
|
||||
}
|
||||
|
||||
idx.Packs[id] = Pack{Size: size, Entries: entries}
|
||||
|
||||
for _, entry := range entries {
|
||||
h := pack.Handle{ID: entry.ID, Type: entry.Type}
|
||||
if _, ok := idx.Blobs[h]; !ok {
|
||||
idx.Blobs[h] = Blob{
|
||||
Size: int64(entry.Length),
|
||||
Packs: backend.NewIDSet(),
|
||||
}
|
||||
}
|
||||
|
||||
return idx, nil
|
||||
idx.Blobs[h].Packs.Insert(id)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DuplicateBlobs returns a list of blobs that are stored more than once in the
|
||||
// repo.
|
||||
func (idx *Index) DuplicateBlobs() (dups map[pack.Handle]int) {
|
||||
dups = make(map[pack.Handle]int)
|
||||
func (idx *Index) DuplicateBlobs() (dups pack.BlobSet) {
|
||||
dups = pack.NewBlobSet()
|
||||
seen := pack.NewBlobSet()
|
||||
|
||||
for _, p := range idx.Packs {
|
||||
for _, entry := range p.Entries {
|
||||
h := pack.Handle{ID: entry.ID, Type: entry.Type}
|
||||
if seen.Has(h) {
|
||||
dups[h]++
|
||||
dups.Insert(h)
|
||||
}
|
||||
seen.Insert(h)
|
||||
}
|
||||
|
@ -166,3 +198,21 @@ func (idx *Index) DuplicateBlobs() (dups map[pack.Handle]int) {
|
|||
|
||||
return dups
|
||||
}
|
||||
|
||||
// PacksForBlobs returns the set of packs in which the blobs are contained.
|
||||
func (idx *Index) PacksForBlobs(blobs pack.BlobSet) (packs backend.IDSet) {
|
||||
packs = backend.NewIDSet()
|
||||
|
||||
for h := range blobs {
|
||||
blob, ok := idx.Blobs[h]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
for id := range blob.Packs {
|
||||
packs.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
return packs
|
||||
}
|
||||
|
|
|
@ -156,7 +156,7 @@ func BenchmarkIndexNew(b *testing.B) {
|
|||
}
|
||||
|
||||
func TestIndexDuplicateBlobs(t *testing.T) {
|
||||
repo, cleanup := createFilledRepo(t, 3, 0.05)
|
||||
repo, cleanup := createFilledRepo(t, 3, 0.01)
|
||||
defer cleanup()
|
||||
|
||||
idx, err := New(repo)
|
||||
|
@ -168,4 +168,11 @@ func TestIndexDuplicateBlobs(t *testing.T) {
|
|||
if len(dups) == 0 {
|
||||
t.Errorf("no duplicate blobs found")
|
||||
}
|
||||
t.Logf("%d packs, %d unique blobs", len(idx.Packs), len(idx.Blobs))
|
||||
|
||||
packs := idx.PacksForBlobs(dups)
|
||||
if len(packs) == 0 {
|
||||
t.Errorf("no packs with duplicate blobs found")
|
||||
}
|
||||
t.Logf("%d packs with duplicate blobs", len(packs))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue