Merge pull request #294 from restic/rework-id

Refactor IDs and IDSet
This commit is contained in:
Alexander Neumann 2015-09-08 21:26:07 +02:00
commit 108d28316a
9 changed files with 247 additions and 85 deletions

View file

@ -102,33 +102,3 @@ func (id *ID) UnmarshalJSON(b []byte) error {
func IDFromData(d []byte) ID {
return hashData(d)
}
type IDs []ID
func (ids IDs) Len() int {
return len(ids)
}
func (ids IDs) Less(i, j int) bool {
if len(ids[i]) < len(ids[j]) {
return true
}
for k, b := range ids[i] {
if b == ids[j][k] {
continue
}
if b < ids[j][k] {
return true
} else {
return false
}
}
return false
}
func (ids IDs) Swap(i, j int) {
ids[i], ids[j] = ids[j], ids[i]
}

69
backend/ids.go Normal file
View file

@ -0,0 +1,69 @@
package backend
import (
"encoding/hex"
"fmt"
)
// IDs is an ordered list of IDs that implements sort.Interface.
type IDs []ID
func (ids IDs) Len() int {
return len(ids)
}
func (ids IDs) Less(i, j int) bool {
if len(ids[i]) < len(ids[j]) {
return true
}
for k, b := range ids[i] {
if b == ids[j][k] {
continue
}
if b < ids[j][k] {
return true
}
return false
}
return false
}
func (ids IDs) Swap(i, j int) {
ids[i], ids[j] = ids[j], ids[i]
}
// Uniq returns list without duplicate IDs. The returned list retains the order
// of the original list so that the order of the first occurrence of each ID
// stays the same.
func (ids IDs) Uniq() (list IDs) {
seen := NewIDSet()
for _, id := range ids {
if seen.Has(id) {
continue
}
list = append(list, id)
seen.Insert(id)
}
return list
}
type shortID ID
func (id shortID) String() string {
return hex.EncodeToString(id[:shortStr])
}
func (ids IDs) String() string {
elements := make([]shortID, 0, len(ids))
for _, id := range ids {
elements = append(elements, shortID(id))
}
return fmt.Sprintf("%v", elements)
}

57
backend/ids_test.go Normal file
View file

@ -0,0 +1,57 @@
package backend_test
import (
"reflect"
"testing"
"github.com/restic/restic/backend"
)
var uniqTests = []struct {
before, after backend.IDs
}{
{
backend.IDs{
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
},
backend.IDs{
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
},
},
{
backend.IDs{
str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
},
backend.IDs{
str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
},
},
{
backend.IDs{
str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"),
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
},
backend.IDs{
str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"),
str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
},
},
}
func TestUniqIDs(t *testing.T) {
for i, test := range uniqTests {
uniq := test.before.Uniq()
if !reflect.DeepEqual(uniq, test.after) {
t.Errorf("uniqIDs() test %v failed\n wanted: %v\n got: %v", i, test.after, uniq)
}
}
}

49
backend/idset.go Normal file
View file

@ -0,0 +1,49 @@
package backend
// IDSet is a set of IDs.
type IDSet map[ID]struct{}
// NewIDSet returns a new IDSet, populated with ids.
func NewIDSet(ids ...ID) IDSet {
m := make(IDSet)
for _, id := range ids {
m[id] = struct{}{}
}
return m
}
// Has returns true iff id is contained in the set.
func (s IDSet) Has(id ID) bool {
_, ok := s[id]
return ok
}
// Insert adds id to the set.
func (s IDSet) Insert(id ID) {
s[id] = struct{}{}
}
// Delete removes id from the set.
func (s IDSet) Delete(id ID) {
delete(s, id)
}
// List returns a slice of all IDs in the set.
func (s IDSet) List() IDs {
list := make(IDs, 0, len(s))
for id := range s {
list = append(list, id)
}
return list
}
func (s IDSet) String() string {
str := s.List().String()
if len(str) < 2 {
return "{}"
}
return "{" + str[1:len(str)-2] + "}"
}

34
backend/idset_test.go Normal file
View file

@ -0,0 +1,34 @@
package backend_test
import (
"testing"
"github.com/restic/restic/backend"
)
var idsetTests = []struct {
id backend.ID
seen bool
}{
{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), false},
{str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), false},
{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
{str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), true},
{str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), false},
{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
{str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), true},
{str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), true},
{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
}
func TestIDSet(t *testing.T) {
set := backend.NewIDSet()
for i, test := range idsetTests {
seen := set.Has(test.id)
if seen != test.seen {
t.Errorf("IDSet test %v failed: wanted %v, got %v", i, test.seen, seen)
}
set.Insert(test.id)
}
}

View file

@ -489,21 +489,6 @@ func (c *Checker) Structure(errChan chan<- error, done <-chan struct{}) {
func (c *Checker) checkTree(id backend.ID, tree *restic.Tree) (errs []error) {
debug.Log("Checker.checkTree", "checking tree %v", id.Str())
// if _, ok := c.blobs[id2map(id)]; !ok {
// errs = append(errs, Error{TreeID: id, Err: errors.New("not found in index")})
// }
// blobs, subtrees, treeErrors := c.tree(id)
// if treeErrors != nil {
// debug.Log("Checker.trees", "error checking tree %v: %v", id.Str(), treeErrors)
// errs = append(errs, treeErrors...)
// continue
// }
// treeIDs = append(treeIDs, subtrees...)
// treesChecked[id2map(id)] = struct{}{}
var blobs []backend.ID
for i, node := range tree.Nodes {

View file

@ -57,7 +57,7 @@ func (t *BlobType) UnmarshalJSON(buf []byte) error {
// Blob is a blob within a pack.
type Blob struct {
Type BlobType
Length uint32
Length uint
ID backend.ID
Offset uint
}
@ -100,7 +100,7 @@ func (p *Packer) Add(t BlobType, id backend.ID, rd io.Reader) (int64, error) {
c := Blob{Type: t, ID: id}
n, err := io.Copy(p.hw, rd)
c.Length = uint32(n)
c.Length = uint(n)
c.Offset = p.bytes
p.bytes += uint(n)
p.blobs = append(p.blobs, c)
@ -164,7 +164,7 @@ func (p *Packer) writeHeader(wr io.Writer) (bytesWritten uint, err error) {
for _, b := range p.blobs {
entry := headerEntry{
Type: b.Type,
Length: b.Length,
Length: uint32(b.Length),
ID: b.ID,
}
@ -276,7 +276,7 @@ func NewUnpacker(k *crypto.Key, entries []Blob, rd io.ReadSeeker) (*Unpacker, er
entries = append(entries, Blob{
Type: e.Type,
Length: e.Length,
Length: uint(e.Length),
ID: e.ID,
Offset: pos,
})

View file

@ -15,7 +15,7 @@ import (
// Index holds a lookup table for id -> pack.
type Index struct {
m sync.Mutex
pack map[string]indexEntry
pack map[backend.ID]indexEntry
}
type indexEntry struct {
@ -29,12 +29,12 @@ type indexEntry struct {
// NewIndex returns a new index.
func NewIndex() *Index {
return &Index{
pack: make(map[string]indexEntry),
pack: make(map[backend.ID]indexEntry),
}
}
func (idx *Index) store(t pack.BlobType, id backend.ID, pack *backend.ID, offset, length uint, old bool) {
idx.pack[id.String()] = indexEntry{
idx.pack[id] = indexEntry{
tpe: t,
packID: pack,
offset: offset,
@ -61,9 +61,8 @@ func (idx *Index) Remove(packID backend.ID) {
debug.Log("Index.Remove", "id %v removed", packID.Str())
s := packID.String()
if _, ok := idx.pack[s]; ok {
delete(idx.pack, s)
if _, ok := idx.pack[packID]; ok {
delete(idx.pack, packID)
}
}
@ -72,7 +71,7 @@ func (idx *Index) Lookup(id backend.ID) (packID *backend.ID, tpe pack.BlobType,
idx.m.Lock()
defer idx.m.Unlock()
if p, ok := idx.pack[id.String()]; ok {
if p, ok := idx.pack[id]; ok {
debug.Log("Index.Lookup", "id %v found in pack %v at %d, length %d",
id.Str(), p.packID.Str(), p.offset, p.length)
return p.packID, p.tpe, p.offset, p.length, nil
@ -110,7 +109,7 @@ func (idx *Index) Merge(other *Index) {
for k, v := range other.pack {
if _, ok := idx.pack[k]; ok {
debug.Log("Index.Merge", "index already has key %v, updating", k[:8])
debug.Log("Index.Merge", "index already has key %v, updating", k.Str())
}
idx.pack[k] = v
@ -138,13 +137,7 @@ func (idx *Index) Each(done chan struct{}) <-chan PackedBlob {
close(ch)
}()
for ids, blob := range idx.pack {
id, err := backend.ParseID(ids)
if err != nil {
// ignore invalid IDs
continue
}
for id, blob := range idx.pack {
select {
case <-done:
return
@ -153,7 +146,7 @@ func (idx *Index) Each(done chan struct{}) <-chan PackedBlob {
ID: id,
Offset: blob.offset,
Type: blob.tpe,
Length: uint32(blob.length),
Length: blob.length,
},
PackID: *blob.packID,
}:
@ -173,7 +166,7 @@ func (idx *Index) Count(t pack.BlobType) (n uint) {
for id, blob := range idx.pack {
if blob.tpe == t {
n++
debug.Log("Index.Count", " blob %v counted: %v", id[:8], blob)
debug.Log("Index.Count", " blob %v counted: %v", id.Str(), blob)
}
}
@ -181,12 +174,12 @@ func (idx *Index) Count(t pack.BlobType) (n uint) {
}
type packJSON struct {
ID string `json:"id"`
ID backend.ID `json:"id"`
Blobs []blobJSON `json:"blobs"`
}
type blobJSON struct {
ID string `json:"id"`
ID backend.ID `json:"id"`
Type pack.BlobType `json:"type"`
Offset uint `json:"offset"`
Length uint `json:"length"`
@ -197,7 +190,7 @@ type blobJSON struct {
// blobs in the index.
func (idx *Index) generatePackList(selectFn func(indexEntry) bool) ([]*packJSON, error) {
list := []*packJSON{}
packs := make(map[string]*packJSON)
packs := make(map[backend.ID]*packJSON)
for id, blob := range idx.pack {
if selectFn != nil && !selectFn(blob) {
@ -208,15 +201,15 @@ func (idx *Index) generatePackList(selectFn func(indexEntry) bool) ([]*packJSON,
if blob.packID.IsNull() {
debug.Log("Index.generatePackList", "blob %q has no packID! (type %v, offset %v, length %v)",
id[:8], blob.tpe, blob.offset, blob.length)
id.Str(), blob.tpe, blob.offset, blob.length)
return nil, fmt.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", id)
}
// see if pack is already in map
p, ok := packs[blob.packID.String()]
p, ok := packs[*blob.packID]
if !ok {
// else create new pack
p = &packJSON{ID: blob.packID.String()}
p = &packJSON{ID: *blob.packID}
// and append it to the list and map
list = append(list, p)
@ -302,20 +295,8 @@ func DecodeIndex(rd io.Reader) (*Index, error) {
idx := NewIndex()
for _, pack := range list {
packID, err := backend.ParseID(pack.ID)
if err != nil {
debug.Log("Index.DecodeIndex", "error parsing pack ID %q: %v", pack.ID, err)
return nil, err
}
for _, blob := range pack.Blobs {
blobID, err := backend.ParseID(blob.ID)
if err != nil {
debug.Log("Index.DecodeIndex", "error parsing blob ID %q: %v", blob.ID, err)
return nil, err
}
idx.store(blob.Type, blobID, &packID, blob.Offset, blob.Length, true)
idx.store(blob.Type, blob.ID, &pack.ID, blob.Offset, blob.Length, true)
}
}

View file

@ -20,6 +20,10 @@ func closeIfOpen(ch chan struct{}) {
// processing stops. If done is closed, the function should return.
type ParallelWorkFunc func(id string, done <-chan struct{}) error
// ParallelIDWorkFunc gets one backend.ID to work on. If an error is returned,
// processing stops. If done is closed, the function should return.
type ParallelIDWorkFunc func(id backend.ID, done <-chan struct{}) error
// FilesInParallel runs n workers of f in parallel, on the IDs that
// repo.List(t) yield. If f returns an error, the process is aborted and the
// first error is returned.
@ -69,3 +73,16 @@ func FilesInParallel(repo backend.Lister, t backend.Type, n uint, f ParallelWork
return nil
}
// ParallelWorkFuncParseID converts a function that takes a backend.ID to a
// function that takes a string.
func ParallelWorkFuncParseID(f ParallelIDWorkFunc) ParallelWorkFunc {
return func(s string, done <-chan struct{}) error {
id, err := backend.ParseID(s)
if err != nil {
return err
}
return f(id, done)
}
}