forked from TrueCloudLab/restic
Merge pull request #2863 from aawsome/index-no-duplicates
Don't save exact duplicates in merged index
This commit is contained in:
commit
eca0f0ad24
2 changed files with 66 additions and 5 deletions
|
@ -464,7 +464,7 @@ func (idx *Index) TreePacks() restic.IDs {
|
||||||
}
|
}
|
||||||
|
|
||||||
// merge() merges indexes, i.e. idx.merge(idx2) merges the contents of idx2 into idx.
|
// merge() merges indexes, i.e. idx.merge(idx2) merges the contents of idx2 into idx.
|
||||||
// idx2 is not changed by this method.
|
// During merging exact duplicates are removed; idx2 is not changed by this method.
|
||||||
func (idx *Index) merge(idx2 *Index) error {
|
func (idx *Index) merge(idx2 *Index) error {
|
||||||
idx.m.Lock()
|
idx.m.Lock()
|
||||||
defer idx.m.Unlock()
|
defer idx.m.Unlock()
|
||||||
|
@ -476,18 +476,35 @@ func (idx *Index) merge(idx2 *Index) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
packlen := len(idx.packs)
|
packlen := len(idx.packs)
|
||||||
|
// first append packs as they might be accessed when looking for duplicates below
|
||||||
|
idx.packs = append(idx.packs, idx2.packs...)
|
||||||
|
|
||||||
// copy all index entries of idx2 to idx
|
// copy all index entries of idx2 to idx
|
||||||
for typ := range idx2.byType {
|
for typ := range idx2.byType {
|
||||||
m2 := &idx2.byType[typ]
|
m2 := &idx2.byType[typ]
|
||||||
m := &idx.byType[typ]
|
m := &idx.byType[typ]
|
||||||
m2.foreach(func(entry *indexEntry) bool {
|
|
||||||
// packIndex is changed as idx2.pack is appended to idx.pack, see below
|
// helper func to test if identical entry is contained in idx
|
||||||
m.add(entry.id, entry.packIndex+packlen, entry.offset, entry.length)
|
hasIdenticalEntry := func(e2 *indexEntry) (found bool) {
|
||||||
|
m.foreachWithID(e2.id, func(e *indexEntry) {
|
||||||
|
b := idx.toPackedBlob(e, restic.BlobType(typ))
|
||||||
|
b2 := idx2.toPackedBlob(e2, restic.BlobType(typ))
|
||||||
|
if b.Length == b2.Length && b.Offset == b2.Offset && b.PackID == b2.PackID {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return found
|
||||||
|
}
|
||||||
|
|
||||||
|
m2.foreach(func(e2 *indexEntry) bool {
|
||||||
|
if !hasIdenticalEntry(e2) {
|
||||||
|
// packIndex needs to be changed as idx2.pack was appended to idx.pack, see above
|
||||||
|
m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length)
|
||||||
|
}
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
idx.packs = append(idx.packs, idx2.packs...)
|
|
||||||
idx.treePacks = append(idx.treePacks, idx2.treePacks...)
|
idx.treePacks = append(idx.treePacks, idx2.treePacks...)
|
||||||
idx.ids = append(idx.ids, idx2.ids...)
|
idx.ids = append(idx.ids, idx2.ids...)
|
||||||
idx.supersedes = append(idx.supersedes, idx2.supersedes...)
|
idx.supersedes = append(idx.supersedes, idx2.supersedes...)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package repository_test
|
package repository_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -167,6 +168,14 @@ func TestMasterMergeFinalIndexes(t *testing.T) {
|
||||||
rtest.Equals(t, []*repository.Index{idx1, idx2}, finalIndexes)
|
rtest.Equals(t, []*repository.Index{idx1, idx2}, finalIndexes)
|
||||||
|
|
||||||
mIdx.MergeFinalIndexes()
|
mIdx.MergeFinalIndexes()
|
||||||
|
allIndexes := mIdx.All()
|
||||||
|
rtest.Equals(t, 1, len(allIndexes))
|
||||||
|
|
||||||
|
blobCount := 0
|
||||||
|
for _ = range mIdx.Each(context.TODO()) {
|
||||||
|
blobCount++
|
||||||
|
}
|
||||||
|
rtest.Equals(t, 2, blobCount)
|
||||||
|
|
||||||
blobs := mIdx.Lookup(idInIdx1, restic.DataBlob)
|
blobs := mIdx.Lookup(idInIdx1, restic.DataBlob)
|
||||||
rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
|
rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
|
||||||
|
@ -176,6 +185,32 @@ func TestMasterMergeFinalIndexes(t *testing.T) {
|
||||||
|
|
||||||
blobs = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob)
|
blobs = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob)
|
||||||
rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
|
rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
|
||||||
|
|
||||||
|
// merge another index containing identical blobs
|
||||||
|
idx3 := repository.NewIndex()
|
||||||
|
idx3.Store(blob1)
|
||||||
|
idx3.Store(blob2)
|
||||||
|
|
||||||
|
mIdx.Insert(idx3)
|
||||||
|
finalIndexes = mIdx.FinalizeNotFinalIndexes()
|
||||||
|
rtest.Equals(t, []*repository.Index{idx3}, finalIndexes)
|
||||||
|
|
||||||
|
mIdx.MergeFinalIndexes()
|
||||||
|
allIndexes = mIdx.All()
|
||||||
|
rtest.Equals(t, 1, len(allIndexes))
|
||||||
|
|
||||||
|
// Index should have same entries as before!
|
||||||
|
blobs = mIdx.Lookup(idInIdx1, restic.DataBlob)
|
||||||
|
rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
|
||||||
|
|
||||||
|
blobs = mIdx.Lookup(idInIdx2, restic.DataBlob)
|
||||||
|
rtest.Equals(t, []restic.PackedBlob{blob2}, blobs)
|
||||||
|
|
||||||
|
blobCount = 0
|
||||||
|
for _ = range mIdx.Each(context.TODO()) {
|
||||||
|
blobCount++
|
||||||
|
}
|
||||||
|
rtest.Equals(t, 2, blobCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
func createRandomMasterIndex(rng *rand.Rand, num, size int) (*repository.MasterIndex, restic.ID) {
|
func createRandomMasterIndex(rng *rand.Rand, num, size int) (*repository.MasterIndex, restic.ID) {
|
||||||
|
@ -193,6 +228,15 @@ func createRandomMasterIndex(rng *rand.Rand, num, size int) (*repository.MasterI
|
||||||
return mIdx, lookupID
|
return mIdx, lookupID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkMasterIndexAlloc(b *testing.B) {
|
||||||
|
rng := rand.New(rand.NewSource(0))
|
||||||
|
b.ReportAllocs()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
createRandomMasterIndex(rng, 10000, 5)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
|
func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
|
||||||
mIdx, lookupID := createRandomMasterIndex(rand.New(rand.NewSource(0)), 1, 200000)
|
mIdx, lookupID := createRandomMasterIndex(rand.New(rand.NewSource(0)), 1, 200000)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue