Index: Add multiple packs per blob, pack.Type

Change the index so that a blob can be contained in multiple packs.

Require passing the blob type to all lookup functions.
This commit is contained in:
Alexander Neumann 2016-08-03 22:38:05 +02:00
parent 231da4ff80
commit 246302375d
12 changed files with 210 additions and 147 deletions

View file

@ -22,7 +22,7 @@ func saveTreeJSON(repo *repository.Repository, item interface{}) (backend.ID, er
// check if tree has been saved before // check if tree has been saved before
id := backend.Hash(data) id := backend.Hash(data)
if repo.Index().Has(id) { if repo.Index().Has(id, pack.Tree) {
return id, nil return id, nil
} }
@ -58,7 +58,7 @@ func ArchiveReader(repo *repository.Repository, p *Progress, rd io.Reader, name
id := backend.Hash(chunk.Data) id := backend.Hash(chunk.Data)
if !repo.Index().Has(id) { if !repo.Index().Has(id, pack.Data) {
_, err := repo.SaveAndEncrypt(pack.Data, chunk.Data, nil) _, err := repo.SaveAndEncrypt(pack.Data, chunk.Data, nil)
if err != nil { if err != nil {
return nil, backend.ID{}, err return nil, backend.ID{}, err

View file

@ -72,7 +72,7 @@ func NewArchiver(repo *repository.Repository) *Archiver {
// When the blob is not known, false is returned and the blob is added to the // When the blob is not known, false is returned and the blob is added to the
// list. This means that the caller false is returned to is responsible to save // list. This means that the caller false is returned to is responsible to save
// the blob to the backend. // the blob to the backend.
func (arch *Archiver) isKnownBlob(id backend.ID) bool { func (arch *Archiver) isKnownBlob(id backend.ID, t pack.BlobType) bool {
arch.knownBlobs.Lock() arch.knownBlobs.Lock()
defer arch.knownBlobs.Unlock() defer arch.knownBlobs.Unlock()
@ -82,7 +82,7 @@ func (arch *Archiver) isKnownBlob(id backend.ID) bool {
arch.knownBlobs.Insert(id) arch.knownBlobs.Insert(id)
_, err := arch.repo.Index().Lookup(id) _, err := arch.repo.Index().Lookup(id, t)
if err == nil { if err == nil {
return true return true
} }
@ -94,7 +94,7 @@ func (arch *Archiver) isKnownBlob(id backend.ID) bool {
func (arch *Archiver) Save(t pack.BlobType, data []byte, id backend.ID) error { func (arch *Archiver) Save(t pack.BlobType, data []byte, id backend.ID) error {
debug.Log("Archiver.Save", "Save(%v, %v)\n", t, id.Str()) debug.Log("Archiver.Save", "Save(%v, %v)\n", t, id.Str())
if arch.isKnownBlob(id) { if arch.isKnownBlob(id, pack.Data) {
debug.Log("Archiver.Save", "blob %v is known\n", id.Str()) debug.Log("Archiver.Save", "blob %v is known\n", id.Str())
return nil return nil
} }
@ -119,7 +119,7 @@ func (arch *Archiver) SaveTreeJSON(item interface{}) (backend.ID, error) {
// check if tree has been saved before // check if tree has been saved before
id := backend.Hash(data) id := backend.Hash(data)
if arch.isKnownBlob(id) { if arch.isKnownBlob(id, pack.Tree) {
return id, nil return id, nil
} }

View file

@ -12,12 +12,13 @@ import (
"runtime" "runtime"
"github.com/juju/errors"
"restic/backend" "restic/backend"
"restic/debug" "restic/debug"
"restic/fs" "restic/fs"
"restic/pack" "restic/pack"
"restic/repository" "restic/repository"
"github.com/juju/errors"
) )
// Node is a file, directory or other item in a backup. // Node is a file, directory or other item in a backup.
@ -215,14 +216,14 @@ func (node Node) createFileAt(path string, repo *repository.Repository) error {
var buf []byte var buf []byte
for _, id := range node.Content { for _, id := range node.Content {
blob, err := repo.Index().Lookup(id) size, err := repo.LookupBlobSize(id, pack.Data)
if err != nil { if err != nil {
return err return err
} }
buf = buf[:cap(buf)] buf = buf[:cap(buf)]
if uint(len(buf)) < blob.Length { if uint(len(buf)) < size {
buf = make([]byte, blob.Length) buf = make([]byte, size)
} }
buf, err := repo.LoadBlob(pack.Data, id, buf) buf, err := repo.LoadBlob(pack.Data, id, buf)

View file

@ -12,11 +12,7 @@ type Handle struct {
} }
func (h Handle) String() string { func (h Handle) String() string {
name := h.ID.String() return fmt.Sprintf("<%s/%s>", h.Type, h.ID.Str())
if len(name) > 10 {
name = name[:10]
}
return fmt.Sprintf("<%s/%s>", h.Type, name)
} }
// Handles is an ordered list of Handles that implements sort.Interface. // Handles is an ordered list of Handles that implements sort.Interface.

View file

@ -17,8 +17,9 @@ type BlobType uint8
// These are the blob types that can be stored in a pack. // These are the blob types that can be stored in a pack.
const ( const (
Data BlobType = 0 Invalid BlobType = iota
Tree = 1 Data
Tree
) )
func (t BlobType) String() string { func (t BlobType) String() string {

View file

@ -18,7 +18,7 @@ import (
// Index holds a lookup table for id -> pack. // Index holds a lookup table for id -> pack.
type Index struct { type Index struct {
m sync.Mutex m sync.Mutex
pack map[backend.ID][]indexEntry pack map[pack.Handle][]indexEntry
final bool // set to true for all indexes read from the backend ("finalized") final bool // set to true for all indexes read from the backend ("finalized")
id backend.ID // set to the ID of the index when it's finalized id backend.ID // set to the ID of the index when it's finalized
@ -27,7 +27,6 @@ type Index struct {
} }
type indexEntry struct { type indexEntry struct {
tpe pack.BlobType
packID backend.ID packID backend.ID
offset uint offset uint
length uint length uint
@ -36,19 +35,19 @@ type indexEntry struct {
// NewIndex returns a new index. // NewIndex returns a new index.
func NewIndex() *Index { func NewIndex() *Index {
return &Index{ return &Index{
pack: make(map[backend.ID][]indexEntry), pack: make(map[pack.Handle][]indexEntry),
created: time.Now(), created: time.Now(),
} }
} }
func (idx *Index) store(blob PackedBlob) { func (idx *Index) store(blob PackedBlob) {
list := idx.pack[blob.ID] newEntry := indexEntry{
idx.pack[blob.ID] = append(list, indexEntry{
tpe: blob.Type,
packID: blob.PackID, packID: blob.PackID,
offset: blob.Offset, offset: blob.Offset,
length: blob.Length, length: blob.Length,
}) }
h := pack.Handle{ID: blob.ID, Type: blob.Type}
idx.pack[h] = append(idx.pack[h], newEntry)
} }
// Final returns true iff the index is already written to the repository, it is // Final returns true iff the index is already written to the repository, it is
@ -112,27 +111,35 @@ func (idx *Index) Store(blob PackedBlob) {
} }
// Lookup queries the index for the blob ID and returns a PackedBlob. // Lookup queries the index for the blob ID and returns a PackedBlob.
func (idx *Index) Lookup(id backend.ID) (pb PackedBlob, err error) { func (idx *Index) Lookup(id backend.ID, tpe pack.BlobType) (blobs []PackedBlob, err error) {
idx.m.Lock() idx.m.Lock()
defer idx.m.Unlock() defer idx.m.Unlock()
if packs, ok := idx.pack[id]; ok { h := pack.Handle{ID: id, Type: tpe}
p := packs[0]
debug.Log("Index.Lookup", "id %v found in pack %v at %d, length %d",
id.Str(), p.packID.Str(), p.offset, p.length)
pb := PackedBlob{ if packs, ok := idx.pack[h]; ok {
Type: p.tpe, blobs = make([]PackedBlob, 0, len(packs))
Length: p.length,
ID: id, for _, p := range packs {
Offset: p.offset, debug.Log("Index.Lookup", "id %v found in pack %v at %d, length %d",
PackID: p.packID, id.Str(), p.packID.Str(), p.offset, p.length)
blob := PackedBlob{
Type: tpe,
Length: p.length,
ID: id,
Offset: p.offset,
PackID: p.packID,
}
blobs = append(blobs, blob)
} }
return pb, nil
return blobs, nil
} }
debug.Log("Index.Lookup", "id %v not found", id.Str()) debug.Log("Index.Lookup", "id %v not found", id.Str())
return PackedBlob{}, fmt.Errorf("id %v not found in index", id) return nil, fmt.Errorf("id %v not found in index", id)
} }
// ListPack returns a list of blobs contained in a pack. // ListPack returns a list of blobs contained in a pack.
@ -140,12 +147,12 @@ func (idx *Index) ListPack(id backend.ID) (list []PackedBlob) {
idx.m.Lock() idx.m.Lock()
defer idx.m.Unlock() defer idx.m.Unlock()
for blobID, packList := range idx.pack { for h, packList := range idx.pack {
for _, entry := range packList { for _, entry := range packList {
if entry.packID == id { if entry.packID == id {
list = append(list, PackedBlob{ list = append(list, PackedBlob{
ID: blobID, ID: h.ID,
Type: entry.tpe, Type: h.Type,
Length: entry.length, Length: entry.length,
Offset: entry.offset, Offset: entry.offset,
PackID: entry.packID, PackID: entry.packID,
@ -158,8 +165,8 @@ func (idx *Index) ListPack(id backend.ID) (list []PackedBlob) {
} }
// Has returns true iff the id is listed in the index. // Has returns true iff the id is listed in the index.
func (idx *Index) Has(id backend.ID) bool { func (idx *Index) Has(id backend.ID, tpe pack.BlobType) bool {
_, err := idx.Lookup(id) _, err := idx.Lookup(id, tpe)
if err == nil { if err == nil {
return true return true
} }
@ -169,12 +176,13 @@ func (idx *Index) Has(id backend.ID) bool {
// LookupSize returns the length of the cleartext content behind the // LookupSize returns the length of the cleartext content behind the
// given id // given id
func (idx *Index) LookupSize(id backend.ID) (cleartextLength uint, err error) { func (idx *Index) LookupSize(id backend.ID, tpe pack.BlobType) (cleartextLength uint, err error) {
blob, err := idx.Lookup(id) blobs, err := idx.Lookup(id, tpe)
if err != nil { if err != nil {
return 0, err return 0, err
} }
return blob.PlaintextLength(), nil
return blobs[0].PlaintextLength(), nil
} }
// Supersedes returns the list of indexes this index supersedes, if any. // Supersedes returns the list of indexes this index supersedes, if any.
@ -229,15 +237,15 @@ func (idx *Index) Each(done chan struct{}) <-chan PackedBlob {
close(ch) close(ch)
}() }()
for id, packs := range idx.pack { for h, packs := range idx.pack {
for _, blob := range packs { for _, blob := range packs {
select { select {
case <-done: case <-done:
return return
case ch <- PackedBlob{ case ch <- PackedBlob{
ID: id, ID: h.ID,
Type: h.Type,
Offset: blob.offset, Offset: blob.offset,
Type: blob.tpe,
Length: blob.length, Length: blob.length,
PackID: blob.packID, PackID: blob.packID,
}: }:
@ -270,13 +278,12 @@ func (idx *Index) Count(t pack.BlobType) (n uint) {
idx.m.Lock() idx.m.Lock()
defer idx.m.Unlock() defer idx.m.Unlock()
for id, list := range idx.pack { for h, list := range idx.pack {
for _, blob := range list { if h.Type != t {
if blob.tpe == t { continue
n++
debug.Log("Index.Count", " blob %v counted: %v", id.Str(), blob)
}
} }
n += uint(len(list))
} }
return return
@ -308,18 +315,18 @@ func (idx *Index) generatePackList() ([]*packJSON, error) {
list := []*packJSON{} list := []*packJSON{}
packs := make(map[backend.ID]*packJSON) packs := make(map[backend.ID]*packJSON)
for id, packedBlobs := range idx.pack { for h, packedBlobs := range idx.pack {
for _, blob := range packedBlobs { for _, blob := range packedBlobs {
if blob.packID.IsNull() { if blob.packID.IsNull() {
panic("null pack id") panic("null pack id")
} }
debug.Log("Index.generatePackList", "handle blob %v", id.Str()) debug.Log("Index.generatePackList", "handle blob %v", h)
if blob.packID.IsNull() { if blob.packID.IsNull() {
debug.Log("Index.generatePackList", "blob %q has no packID! (type %v, offset %v, length %v)", debug.Log("Index.generatePackList", "blob %v has no packID! (offset %v, length %v)",
id.Str(), blob.tpe, blob.offset, blob.length) h, blob.offset, blob.length)
return nil, fmt.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", id) return nil, fmt.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", h)
} }
// see if pack is already in map // see if pack is already in map
@ -335,8 +342,8 @@ func (idx *Index) generatePackList() ([]*packJSON, error) {
// add blob // add blob
p.Blobs = append(p.Blobs, blobJSON{ p.Blobs = append(p.Blobs, blobJSON{
ID: id, ID: h.ID,
Type: blob.tpe, Type: h.Type,
Offset: blob.offset, Offset: blob.offset,
Length: blob.length, Length: blob.length,
}) })

View file

@ -74,17 +74,27 @@ func TestIndexSerialize(t *testing.T) {
OK(t, err) OK(t, err)
for _, testBlob := range tests { for _, testBlob := range tests {
result, err := idx.Lookup(testBlob.id) list, err := idx.Lookup(testBlob.id, testBlob.tpe)
OK(t, err) OK(t, err)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list)
}
result := list[0]
Equals(t, testBlob.pack, result.PackID) Equals(t, testBlob.pack, result.PackID)
Equals(t, testBlob.tpe, result.Type) Equals(t, testBlob.tpe, result.Type)
Equals(t, testBlob.offset, result.Offset) Equals(t, testBlob.offset, result.Offset)
Equals(t, testBlob.length, result.Length) Equals(t, testBlob.length, result.Length)
result2, err := idx2.Lookup(testBlob.id) list2, err := idx2.Lookup(testBlob.id, testBlob.tpe)
OK(t, err) OK(t, err)
if len(list2) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list2), list2)
}
result2 := list2[0]
Equals(t, testBlob.pack, result2.PackID) Equals(t, testBlob.pack, result2.PackID)
Equals(t, testBlob.tpe, result2.Type) Equals(t, testBlob.tpe, result2.Type)
Equals(t, testBlob.offset, result2.Offset) Equals(t, testBlob.offset, result2.Offset)
@ -143,9 +153,15 @@ func TestIndexSerialize(t *testing.T) {
// all new blobs must be in the index // all new blobs must be in the index
for _, testBlob := range newtests { for _, testBlob := range newtests {
blob, err := idx3.Lookup(testBlob.id) list, err := idx3.Lookup(testBlob.id, testBlob.tpe)
OK(t, err) OK(t, err)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list)
}
blob := list[0]
Equals(t, testBlob.pack, blob.PackID) Equals(t, testBlob.pack, blob.PackID)
Equals(t, testBlob.tpe, blob.Type) Equals(t, testBlob.tpe, blob.Type)
Equals(t, testBlob.offset, blob.Offset) Equals(t, testBlob.offset, blob.Offset)
@ -265,13 +281,13 @@ var exampleTests = []struct {
var exampleLookupTest = struct { var exampleLookupTest = struct {
packID backend.ID packID backend.ID
blobs backend.IDSet blobs map[backend.ID]pack.BlobType
}{ }{
ParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), ParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
backend.IDSet{ map[backend.ID]pack.BlobType{
ParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"): struct{}{}, ParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"): pack.Data,
ParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"): struct{}{}, ParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"): pack.Tree,
ParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"): struct{}{}, ParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"): pack.Data,
}, },
} }
@ -282,9 +298,16 @@ func TestIndexUnserialize(t *testing.T) {
OK(t, err) OK(t, err)
for _, test := range exampleTests { for _, test := range exampleTests {
blob, err := idx.Lookup(test.id) list, err := idx.Lookup(test.id, test.tpe)
OK(t, err) OK(t, err)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
Equals(t, test.packID, blob.PackID) Equals(t, test.packID, blob.PackID)
Equals(t, test.tpe, blob.Type) Equals(t, test.tpe, blob.Type)
Equals(t, test.offset, blob.Offset) Equals(t, test.offset, blob.Offset)
@ -299,9 +322,13 @@ func TestIndexUnserialize(t *testing.T) {
} }
for _, blob := range blobs { for _, blob := range blobs {
if !exampleLookupTest.blobs.Has(blob.ID) { b, ok := exampleLookupTest.blobs[blob.ID]
if !ok {
t.Errorf("unexpected blob %v found", blob.ID.Str()) t.Errorf("unexpected blob %v found", blob.ID.Str())
} }
if blob.Type != b {
t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
}
} }
} }
@ -310,9 +337,14 @@ func TestIndexUnserializeOld(t *testing.T) {
OK(t, err) OK(t, err)
for _, test := range exampleTests { for _, test := range exampleTests {
blob, err := idx.Lookup(test.id) list, err := idx.Lookup(test.id, test.tpe)
OK(t, err) OK(t, err)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
Equals(t, test.packID, blob.PackID) Equals(t, test.packID, blob.PackID)
Equals(t, test.tpe, blob.Type) Equals(t, test.tpe, blob.Type)
Equals(t, test.offset, blob.Offset) Equals(t, test.offset, blob.Offset)

View file

@ -21,32 +21,32 @@ func NewMasterIndex() *MasterIndex {
} }
// Lookup queries all known Indexes for the ID and returns the first match. // Lookup queries all known Indexes for the ID and returns the first match.
func (mi *MasterIndex) Lookup(id backend.ID) (blob PackedBlob, err error) { func (mi *MasterIndex) Lookup(id backend.ID, tpe pack.BlobType) (blobs []PackedBlob, err error) {
mi.idxMutex.RLock() mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock() defer mi.idxMutex.RUnlock()
debug.Log("MasterIndex.Lookup", "looking up id %v", id.Str()) debug.Log("MasterIndex.Lookup", "looking up id %v, tpe %v", id.Str(), tpe)
for _, idx := range mi.idx { for _, idx := range mi.idx {
blob, err = idx.Lookup(id) blobs, err = idx.Lookup(id, tpe)
if err == nil { if err == nil {
debug.Log("MasterIndex.Lookup", debug.Log("MasterIndex.Lookup",
"found id %v: %v", id.Str(), blob) "found id %v: %v", id.Str(), blobs)
return return
} }
} }
debug.Log("MasterIndex.Lookup", "id %v not found in any index", id.Str()) debug.Log("MasterIndex.Lookup", "id %v not found in any index", id.Str())
return PackedBlob{}, fmt.Errorf("id %v not found in any index", id) return nil, fmt.Errorf("id %v not found in any index", id)
} }
// LookupSize queries all known Indexes for the ID and returns the first match. // LookupSize queries all known Indexes for the ID and returns the first match.
func (mi *MasterIndex) LookupSize(id backend.ID) (uint, error) { func (mi *MasterIndex) LookupSize(id backend.ID, tpe pack.BlobType) (uint, error) {
mi.idxMutex.RLock() mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock() defer mi.idxMutex.RUnlock()
for _, idx := range mi.idx { for _, idx := range mi.idx {
length, err := idx.LookupSize(id) length, err := idx.LookupSize(id, tpe)
if err == nil { if err == nil {
return length, nil return length, nil
} }
@ -72,12 +72,12 @@ func (mi *MasterIndex) ListPack(id backend.ID) (list []PackedBlob) {
} }
// Has queries all known Indexes for the ID and returns the first match. // Has queries all known Indexes for the ID and returns the first match.
func (mi *MasterIndex) Has(id backend.ID) bool { func (mi *MasterIndex) Has(id backend.ID, tpe pack.BlobType) bool {
mi.idxMutex.RLock() mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock() defer mi.idxMutex.RUnlock()
for _, idx := range mi.idx { for _, idx := range mi.idx {
if idx.Has(id) { if idx.Has(id, tpe) {
return true return true
} }
} }

View file

@ -13,7 +13,7 @@ import (
// these packs. Each pack is loaded and the blobs listed in keepBlobs is saved // these packs. Each pack is loaded and the blobs listed in keepBlobs is saved
// into a new pack. Afterwards, the packs are removed. This operation requires // into a new pack. Afterwards, the packs are removed. This operation requires
// an exclusive lock on the repo. // an exclusive lock on the repo.
func Repack(repo *Repository, packs, keepBlobs backend.IDSet) (err error) { func Repack(repo *Repository, packs backend.IDSet, keepBlobs pack.BlobSet) (err error) {
debug.Log("Repack", "repacking %d packs while keeping %d blobs", len(packs), len(keepBlobs)) debug.Log("Repack", "repacking %d packs while keeping %d blobs", len(packs), len(keepBlobs))
buf := make([]byte, 0, maxPackSize) buf := make([]byte, 0, maxPackSize)
@ -41,7 +41,8 @@ func Repack(repo *Repository, packs, keepBlobs backend.IDSet) (err error) {
debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(unpck.Entries)) debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(unpck.Entries))
var plaintext []byte var plaintext []byte
for _, entry := range unpck.Entries { for _, entry := range unpck.Entries {
if !keepBlobs.Has(entry.ID) { h := pack.Handle{ID: entry.ID, Type: entry.Type}
if !keepBlobs.Has(h) {
continue continue
} }
@ -63,7 +64,7 @@ func Repack(repo *Repository, packs, keepBlobs backend.IDSet) (err error) {
debug.Log("Repack", " saved blob %v", entry.ID.Str()) debug.Log("Repack", " saved blob %v", entry.ID.Str())
keepBlobs.Delete(entry.ID) keepBlobs.Delete(h)
} }
} }

View file

@ -58,12 +58,12 @@ func createRandomBlobs(t *testing.T, repo *repository.Repository, blobs int, pDa
// selectBlobs splits the list of all blobs randomly into two lists. A blob // selectBlobs splits the list of all blobs randomly into two lists. A blob
// will be contained in the firstone ith probability p. // will be contained in the firstone ith probability p.
func selectBlobs(t *testing.T, repo *repository.Repository, p float32) (list1, list2 backend.IDSet) { func selectBlobs(t *testing.T, repo *repository.Repository, p float32) (list1, list2 pack.BlobSet) {
done := make(chan struct{}) done := make(chan struct{})
defer close(done) defer close(done)
list1 = backend.NewIDSet() list1 = pack.NewBlobSet()
list2 = backend.NewIDSet() list2 = pack.NewBlobSet()
for id := range repo.List(backend.Data, done) { for id := range repo.List(backend.Data, done) {
entries, err := repo.ListPack(id) entries, err := repo.ListPack(id)
@ -73,9 +73,9 @@ func selectBlobs(t *testing.T, repo *repository.Repository, p float32) (list1, l
for _, entry := range entries { for _, entry := range entries {
if rand.Float32() <= p { if rand.Float32() <= p {
list1.Insert(entry.ID) list1.Insert(pack.Handle{ID: entry.ID, Type: entry.Type})
} else { } else {
list2.Insert(entry.ID) list2.Insert(pack.Handle{ID: entry.ID, Type: entry.Type})
} }
} }
} }
@ -95,23 +95,25 @@ func listPacks(t *testing.T, repo *repository.Repository) backend.IDSet {
return list return list
} }
func findPacksForBlobs(t *testing.T, repo *repository.Repository, blobs backend.IDSet) backend.IDSet { func findPacksForBlobs(t *testing.T, repo *repository.Repository, blobs pack.BlobSet) backend.IDSet {
packs := backend.NewIDSet() packs := backend.NewIDSet()
idx := repo.Index() idx := repo.Index()
for id := range blobs { for h := range blobs {
pb, err := idx.Lookup(id) list, err := idx.Lookup(h.ID, h.Type)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
packs.Insert(pb.PackID) for _, pb := range list {
packs.Insert(pb.PackID)
}
} }
return packs return packs
} }
func repack(t *testing.T, repo *repository.Repository, packs, blobs backend.IDSet) { func repack(t *testing.T, repo *repository.Repository, packs backend.IDSet, blobs pack.BlobSet) {
err := repository.Repack(repo, packs, blobs) err := repository.Repack(repo, packs, blobs)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -173,20 +175,29 @@ func TestRepack(t *testing.T) {
} }
idx := repo.Index() idx := repo.Index()
for id := range keepBlobs {
pb, err := idx.Lookup(id) for h := range keepBlobs {
list, err := idx.Lookup(h.ID, h.Type)
if err != nil { if err != nil {
t.Errorf("unable to find blob %v in repo", id.Str()) t.Errorf("unable to find blob %v in repo", h.ID.Str())
continue
} }
if len(list) != 1 {
t.Errorf("expected one pack in the list, got: %v", list)
continue
}
pb := list[0]
if removePacks.Has(pb.PackID) { if removePacks.Has(pb.PackID) {
t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID) t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID)
} }
} }
for id := range removeBlobs { for h := range removeBlobs {
if _, err := idx.Lookup(id); err == nil { if _, err := idx.Lookup(h.ID, h.Type); err == nil {
t.Errorf("blob %v still contained in the repo", id.Str()) t.Errorf("blob %v still contained in the repo", h)
} }
} }
} }

View file

@ -79,54 +79,68 @@ func (r *Repository) LoadAndDecrypt(t backend.Type, id backend.ID) ([]byte, erro
// large enough to hold the complete blob. // large enough to hold the complete blob.
func (r *Repository) LoadBlob(t pack.BlobType, id backend.ID, plaintextBuf []byte) ([]byte, error) { func (r *Repository) LoadBlob(t pack.BlobType, id backend.ID, plaintextBuf []byte) ([]byte, error) {
debug.Log("Repo.LoadBlob", "load %v with id %v", t, id.Str()) debug.Log("Repo.LoadBlob", "load %v with id %v", t, id.Str())
// lookup pack
blob, err := r.idx.Lookup(id) // lookup plaintext size of blob
size, err := r.idx.LookupSize(id, t)
if err != nil {
return nil, err
}
// make sure the plaintext buffer is large enough, extend otherwise
plaintextBufSize := uint(cap(plaintextBuf))
if size > plaintextBufSize {
debug.Log("Repo.LoadBlob", "need to expand buffer: want %d bytes, got %d",
size, plaintextBufSize)
plaintextBuf = make([]byte, size)
}
// lookup packs
blobs, err := r.idx.Lookup(id, t)
if err != nil { if err != nil {
debug.Log("Repo.LoadBlob", "id %v not found in index: %v", id.Str(), err) debug.Log("Repo.LoadBlob", "id %v not found in index: %v", id.Str(), err)
return nil, err return nil, err
} }
plaintextBufSize := uint(cap(plaintextBuf)) for _, blob := range blobs {
if blob.PlaintextLength() > plaintextBufSize { debug.Log("Repo.LoadBlob", "id %v found: %v", id.Str(), blob)
debug.Log("Repo.LoadBlob", "need to expand buffer: want %d bytes, got %d",
blob.PlaintextLength(), plaintextBufSize) if blob.Type != t {
plaintextBuf = make([]byte, blob.PlaintextLength()) debug.Log("Repo.LoadBlob", "blob %v has wrong block type, want %v", blob, t)
}
// load blob from pack
h := backend.Handle{Type: backend.Data, Name: blob.PackID.String()}
ciphertextBuf := make([]byte, blob.Length)
n, err := r.be.Load(h, ciphertextBuf, int64(blob.Offset))
if err != nil {
debug.Log("Repo.LoadBlob", "error loading blob %v: %v", blob, err)
fmt.Fprintf(os.Stderr, "error loading blob %v: %v", id, err)
continue
}
if uint(n) != blob.Length {
debug.Log("Repo.LoadBlob", "error loading blob %v: wrong length returned, want %d, got %d",
blob.Length, uint(n))
continue
}
// decrypt
plaintextBuf, err = r.decryptTo(plaintextBuf, ciphertextBuf)
if err != nil {
fmt.Fprintf(os.Stderr, "decrypting blob %v failed: %v", id, err)
continue
}
// check hash
if !backend.Hash(plaintextBuf).Equal(id) {
fmt.Fprintf(os.Stderr, "blob %v returned invalid hash", id)
continue
}
return plaintextBuf, nil
} }
if blob.Type != t { return nil, fmt.Errorf("loading blob %v from %v packs failed", id.Str(), len(blobs))
debug.Log("Repo.LoadBlob", "wrong type returned for %v: wanted %v, got %v", id.Str(), t, blob.Type)
return nil, fmt.Errorf("blob has wrong type %v (wanted: %v)", blob.Type, t)
}
debug.Log("Repo.LoadBlob", "id %v found: %v", id.Str(), blob)
// load blob from pack
h := backend.Handle{Type: backend.Data, Name: blob.PackID.String()}
ciphertextBuf := make([]byte, blob.Length)
n, err := r.be.Load(h, ciphertextBuf, int64(blob.Offset))
if err != nil {
debug.Log("Repo.LoadBlob", "error loading blob %v: %v", blob, err)
return nil, err
}
if uint(n) != blob.Length {
debug.Log("Repo.LoadBlob", "error loading blob %v: wrong length returned, want %d, got %d",
blob.Length, uint(n))
return nil, errors.New("wrong length returned")
}
// decrypt
plaintextBuf, err = r.decryptTo(plaintextBuf, ciphertextBuf)
if err != nil {
return nil, err
}
// check hash
if !backend.Hash(plaintextBuf).Equal(id) {
return nil, errors.New("invalid data returned")
}
return plaintextBuf, nil
} }
// closeOrErr calls cl.Close() and sets err to the returned error value if // closeOrErr calls cl.Close() and sets err to the returned error value if
@ -162,8 +176,8 @@ func (r *Repository) LoadJSONPack(t pack.BlobType, id backend.ID, item interface
} }
// LookupBlobSize returns the size of blob id. // LookupBlobSize returns the size of blob id.
func (r *Repository) LookupBlobSize(id backend.ID) (uint, error) { func (r *Repository) LookupBlobSize(id backend.ID, tpe pack.BlobType) (uint, error) {
return r.idx.LookupSize(id) return r.idx.LookupSize(id, tpe)
} }
// SaveAndEncrypt encrypts data and stores it to the backend as type t. If data // SaveAndEncrypt encrypts data and stores it to the backend as type t. If data

View file

@ -42,7 +42,7 @@ func (fs fakeFileSystem) saveFile(rd io.Reader) (blobs backend.IDs) {
} }
id := backend.Hash(chunk.Data) id := backend.Hash(chunk.Data)
if !fs.blobIsKnown(id) { if !fs.blobIsKnown(id, pack.Data) {
_, err := fs.repo.SaveAndEncrypt(pack.Data, chunk.Data, &id) _, err := fs.repo.SaveAndEncrypt(pack.Data, chunk.Data, &id)
if err != nil { if err != nil {
fs.t.Fatalf("error saving chunk: %v", err) fs.t.Fatalf("error saving chunk: %v", err)
@ -72,16 +72,16 @@ func (fs fakeFileSystem) treeIsKnown(tree *Tree) (bool, backend.ID) {
data = append(data, '\n') data = append(data, '\n')
id := backend.Hash(data) id := backend.Hash(data)
return fs.blobIsKnown(id), id return fs.blobIsKnown(id, pack.Tree), id
} }
func (fs fakeFileSystem) blobIsKnown(id backend.ID) bool { func (fs fakeFileSystem) blobIsKnown(id backend.ID, t pack.BlobType) bool {
if fs.knownBlobs.Has(id) { if fs.knownBlobs.Has(id) {
return true return true
} }
if fs.repo.Index().Has(id) { if fs.repo.Index().Has(id, t) {
return true return true
} }