forked from TrueCloudLab/restic
Add Index structures for Blobs
This commit is contained in:
parent
4bdd59b4ad
commit
2c517e4a33
2 changed files with 72 additions and 15 deletions
|
@ -17,14 +17,22 @@ type Pack struct {
|
||||||
Entries []pack.Blob
|
Entries []pack.Blob
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Blob contains informaiton about a blob.
|
||||||
|
type Blob struct {
|
||||||
|
Size int64
|
||||||
|
Packs backend.IDSet
|
||||||
|
}
|
||||||
|
|
||||||
// Index contains information about blobs and packs stored in a repo.
|
// Index contains information about blobs and packs stored in a repo.
|
||||||
type Index struct {
|
type Index struct {
|
||||||
Packs map[backend.ID]Pack
|
Packs map[backend.ID]Pack
|
||||||
|
Blobs map[pack.Handle]Blob
|
||||||
}
|
}
|
||||||
|
|
||||||
func newIndex() *Index {
|
func newIndex() *Index {
|
||||||
return &Index{
|
return &Index{
|
||||||
Packs: make(map[backend.ID]Pack),
|
Packs: make(map[backend.ID]Pack),
|
||||||
|
Blobs: make(map[pack.Handle]Blob),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,9 +57,11 @@ func New(repo *repository.Repository) (*Index, error) {
|
||||||
|
|
||||||
debug.Log("Index.New", "pack %v contains %d blobs", packID.Str(), len(j.Entries))
|
debug.Log("Index.New", "pack %v contains %d blobs", packID.Str(), len(j.Entries))
|
||||||
|
|
||||||
if _, ok := idx.Packs[packID]; ok {
|
err := idx.AddPack(packID, j.Size, j.Entries)
|
||||||
return nil, fmt.Errorf("pack %v processed twice", packID.Str())
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
p := Pack{Entries: j.Entries, Size: j.Size}
|
p := Pack{Entries: j.Entries, Size: j.Size}
|
||||||
idx.Packs[packID] = p
|
idx.Packs[packID] = p
|
||||||
}
|
}
|
||||||
|
@ -100,6 +110,8 @@ func Load(repo *repository.Repository) (*Index, error) {
|
||||||
supersedes := make(map[backend.ID]backend.IDSet)
|
supersedes := make(map[backend.ID]backend.IDSet)
|
||||||
results := make(map[backend.ID]map[backend.ID]Pack)
|
results := make(map[backend.ID]map[backend.ID]Pack)
|
||||||
|
|
||||||
|
index := newIndex()
|
||||||
|
|
||||||
for id := range repo.List(backend.Index, done) {
|
for id := range repo.List(backend.Index, done) {
|
||||||
debug.Log("index.Load", "Load index %v", id.Str())
|
debug.Log("index.Load", "Load index %v", id.Str())
|
||||||
idx, err := loadIndexJSON(repo, id)
|
idx, err := loadIndexJSON(repo, id)
|
||||||
|
@ -115,7 +127,7 @@ func Load(repo *repository.Repository) (*Index, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, jpack := range idx.Packs {
|
for _, jpack := range idx.Packs {
|
||||||
P := Pack{}
|
entries := make([]pack.Blob, 0, len(jpack.Blobs))
|
||||||
for _, blob := range jpack.Blobs {
|
for _, blob := range jpack.Blobs {
|
||||||
entry := pack.Blob{
|
entry := pack.Blob{
|
||||||
ID: blob.ID,
|
ID: blob.ID,
|
||||||
|
@ -123,9 +135,12 @@ func Load(repo *repository.Repository) (*Index, error) {
|
||||||
Offset: blob.Offset,
|
Offset: blob.Offset,
|
||||||
Length: blob.Length,
|
Length: blob.Length,
|
||||||
}
|
}
|
||||||
P.Entries = append(P.Entries, entry)
|
entries = append(entries, entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = index.AddPack(jpack.ID, 0, entries); err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
res[jpack.ID] = P
|
|
||||||
}
|
}
|
||||||
|
|
||||||
results[id] = res
|
results[id] = res
|
||||||
|
@ -138,27 +153,44 @@ func Load(repo *repository.Repository) (*Index, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
idx := newIndex()
|
return index, nil
|
||||||
for _, packs := range results {
|
}
|
||||||
for id, pack := range packs {
|
|
||||||
idx.Packs[id] = pack
|
// AddPack adds a pack to the index. If this pack is already in the index, an
|
||||||
|
// error is returned.
|
||||||
|
func (idx *Index) AddPack(id backend.ID, size int64, entries []pack.Blob) error {
|
||||||
|
if _, ok := idx.Packs[id]; ok {
|
||||||
|
return fmt.Errorf("pack %v already present in the index", id.Str())
|
||||||
|
}
|
||||||
|
|
||||||
|
idx.Packs[id] = Pack{Size: size, Entries: entries}
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
h := pack.Handle{ID: entry.ID, Type: entry.Type}
|
||||||
|
if _, ok := idx.Blobs[h]; !ok {
|
||||||
|
idx.Blobs[h] = Blob{
|
||||||
|
Size: int64(entry.Length),
|
||||||
|
Packs: backend.NewIDSet(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return idx, nil
|
idx.Blobs[h].Packs.Insert(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DuplicateBlobs returns a list of blobs that are stored more than once in the
|
// DuplicateBlobs returns a list of blobs that are stored more than once in the
|
||||||
// repo.
|
// repo.
|
||||||
func (idx *Index) DuplicateBlobs() (dups map[pack.Handle]int) {
|
func (idx *Index) DuplicateBlobs() (dups pack.BlobSet) {
|
||||||
dups = make(map[pack.Handle]int)
|
dups = pack.NewBlobSet()
|
||||||
seen := pack.NewBlobSet()
|
seen := pack.NewBlobSet()
|
||||||
|
|
||||||
for _, p := range idx.Packs {
|
for _, p := range idx.Packs {
|
||||||
for _, entry := range p.Entries {
|
for _, entry := range p.Entries {
|
||||||
h := pack.Handle{ID: entry.ID, Type: entry.Type}
|
h := pack.Handle{ID: entry.ID, Type: entry.Type}
|
||||||
if seen.Has(h) {
|
if seen.Has(h) {
|
||||||
dups[h]++
|
dups.Insert(h)
|
||||||
}
|
}
|
||||||
seen.Insert(h)
|
seen.Insert(h)
|
||||||
}
|
}
|
||||||
|
@ -166,3 +198,21 @@ func (idx *Index) DuplicateBlobs() (dups map[pack.Handle]int) {
|
||||||
|
|
||||||
return dups
|
return dups
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PacksForBlobs returns the set of packs in which the blobs are contained.
|
||||||
|
func (idx *Index) PacksForBlobs(blobs pack.BlobSet) (packs backend.IDSet) {
|
||||||
|
packs = backend.NewIDSet()
|
||||||
|
|
||||||
|
for h := range blobs {
|
||||||
|
blob, ok := idx.Blobs[h]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for id := range blob.Packs {
|
||||||
|
packs.Insert(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return packs
|
||||||
|
}
|
||||||
|
|
|
@ -156,7 +156,7 @@ func BenchmarkIndexNew(b *testing.B) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIndexDuplicateBlobs(t *testing.T) {
|
func TestIndexDuplicateBlobs(t *testing.T) {
|
||||||
repo, cleanup := createFilledRepo(t, 3, 0.05)
|
repo, cleanup := createFilledRepo(t, 3, 0.01)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
|
||||||
idx, err := New(repo)
|
idx, err := New(repo)
|
||||||
|
@ -168,4 +168,11 @@ func TestIndexDuplicateBlobs(t *testing.T) {
|
||||||
if len(dups) == 0 {
|
if len(dups) == 0 {
|
||||||
t.Errorf("no duplicate blobs found")
|
t.Errorf("no duplicate blobs found")
|
||||||
}
|
}
|
||||||
|
t.Logf("%d packs, %d unique blobs", len(idx.Packs), len(idx.Blobs))
|
||||||
|
|
||||||
|
packs := idx.PacksForBlobs(dups)
|
||||||
|
if len(packs) == 0 {
|
||||||
|
t.Errorf("no packs with duplicate blobs found")
|
||||||
|
}
|
||||||
|
t.Logf("%d packs with duplicate blobs", len(packs))
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue