2016-08-07 15:19:00 +00:00
|
|
|
// Package index contains various data structures for indexing content in a repository or backend.
|
|
|
|
package index
|
|
|
|
|
|
|
|
import (
|
2016-08-14 11:38:59 +00:00
|
|
|
"errors"
|
2016-08-07 15:19:00 +00:00
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"restic/backend"
|
|
|
|
"restic/debug"
|
|
|
|
"restic/pack"
|
|
|
|
"restic/repository"
|
|
|
|
"restic/worker"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Pack contains information about the contents of a pack.
|
|
|
|
type Pack struct {
|
2016-08-07 19:56:42 +00:00
|
|
|
Size int64
|
2016-08-07 15:19:00 +00:00
|
|
|
Entries []pack.Blob
|
|
|
|
}
|
|
|
|
|
2016-08-14 11:38:59 +00:00
|
|
|
// Blob contains information about a blob.
|
2016-08-07 20:18:20 +00:00
|
|
|
type Blob struct {
|
|
|
|
Size int64
|
|
|
|
Packs backend.IDSet
|
|
|
|
}
|
|
|
|
|
2016-08-07 15:19:00 +00:00
|
|
|
// Index contains information about blobs and packs stored in a repo.
|
|
|
|
type Index struct {
|
2016-08-14 11:38:59 +00:00
|
|
|
Packs map[backend.ID]Pack
|
|
|
|
Blobs map[pack.Handle]Blob
|
|
|
|
IndexIDs backend.IDSet
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func newIndex() *Index {
|
|
|
|
return &Index{
|
2016-08-14 11:38:59 +00:00
|
|
|
Packs: make(map[backend.ID]Pack),
|
|
|
|
Blobs: make(map[pack.Handle]Blob),
|
|
|
|
IndexIDs: backend.NewIDSet(),
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// New creates a new index for repo from scratch.
|
|
|
|
func New(repo *repository.Repository) (*Index, error) {
|
|
|
|
done := make(chan struct{})
|
|
|
|
defer close(done)
|
|
|
|
|
|
|
|
ch := make(chan worker.Job)
|
|
|
|
go repository.ListAllPacks(repo, ch, done)
|
|
|
|
|
|
|
|
idx := newIndex()
|
|
|
|
|
|
|
|
for job := range ch {
|
|
|
|
packID := job.Data.(backend.ID)
|
|
|
|
if job.Error != nil {
|
|
|
|
fmt.Fprintf(os.Stderr, "unable to list pack %v: %v\n", packID.Str(), job.Error)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
j := job.Result.(repository.ListAllPacksResult)
|
|
|
|
|
|
|
|
debug.Log("Index.New", "pack %v contains %d blobs", packID.Str(), len(j.Entries))
|
|
|
|
|
2016-08-07 20:18:20 +00:00
|
|
|
err := idx.AddPack(packID, j.Size, j.Entries)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
2016-08-07 20:18:20 +00:00
|
|
|
|
2016-08-07 19:56:42 +00:00
|
|
|
p := Pack{Entries: j.Entries, Size: j.Size}
|
2016-08-07 15:19:00 +00:00
|
|
|
idx.Packs[packID] = p
|
|
|
|
}
|
|
|
|
|
|
|
|
return idx, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
const loadIndexParallelism = 20
|
|
|
|
|
|
|
|
type packJSON struct {
|
|
|
|
ID backend.ID `json:"id"`
|
|
|
|
Blobs []blobJSON `json:"blobs"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type blobJSON struct {
|
|
|
|
ID backend.ID `json:"id"`
|
|
|
|
Type pack.BlobType `json:"type"`
|
|
|
|
Offset uint `json:"offset"`
|
|
|
|
Length uint `json:"length"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type indexJSON struct {
|
|
|
|
Supersedes backend.IDs `json:"supersedes,omitempty"`
|
|
|
|
Packs []*packJSON `json:"packs"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func loadIndexJSON(repo *repository.Repository, id backend.ID) (*indexJSON, error) {
|
2016-08-07 16:45:25 +00:00
|
|
|
debug.Log("index.loadIndexJSON", "process index %v\n", id.Str())
|
2016-08-07 15:19:00 +00:00
|
|
|
|
|
|
|
var idx indexJSON
|
|
|
|
err := repo.LoadJSONUnpacked(backend.Index, id, &idx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return &idx, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load creates an index by loading all index files from the repo.
|
|
|
|
func Load(repo *repository.Repository) (*Index, error) {
|
|
|
|
debug.Log("index.Load", "loading indexes")
|
|
|
|
|
|
|
|
done := make(chan struct{})
|
|
|
|
defer close(done)
|
|
|
|
|
|
|
|
supersedes := make(map[backend.ID]backend.IDSet)
|
|
|
|
results := make(map[backend.ID]map[backend.ID]Pack)
|
|
|
|
|
2016-08-07 20:18:20 +00:00
|
|
|
index := newIndex()
|
|
|
|
|
2016-08-07 15:19:00 +00:00
|
|
|
for id := range repo.List(backend.Index, done) {
|
|
|
|
debug.Log("index.Load", "Load index %v", id.Str())
|
|
|
|
idx, err := loadIndexJSON(repo, id)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
res := make(map[backend.ID]Pack)
|
|
|
|
supersedes[id] = backend.NewIDSet()
|
|
|
|
for _, sid := range idx.Supersedes {
|
|
|
|
debug.Log("index.Load", " index %v supersedes %v", id.Str(), sid)
|
|
|
|
supersedes[id].Insert(sid)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, jpack := range idx.Packs {
|
2016-08-07 20:18:20 +00:00
|
|
|
entries := make([]pack.Blob, 0, len(jpack.Blobs))
|
2016-08-07 15:19:00 +00:00
|
|
|
for _, blob := range jpack.Blobs {
|
2016-08-07 16:45:25 +00:00
|
|
|
entry := pack.Blob{
|
2016-08-07 15:19:00 +00:00
|
|
|
ID: blob.ID,
|
|
|
|
Type: blob.Type,
|
|
|
|
Offset: blob.Offset,
|
|
|
|
Length: blob.Length,
|
2016-08-07 16:45:25 +00:00
|
|
|
}
|
2016-08-07 20:18:20 +00:00
|
|
|
entries = append(entries, entry)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = index.AddPack(jpack.ID, 0, entries); err != nil {
|
|
|
|
return nil, err
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
results[id] = res
|
2016-08-14 11:38:59 +00:00
|
|
|
index.IndexIDs.Insert(id)
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for superID, list := range supersedes {
|
|
|
|
for indexID := range list {
|
2016-08-14 11:38:59 +00:00
|
|
|
if _, ok := results[indexID]; !ok {
|
|
|
|
continue
|
|
|
|
}
|
2016-08-07 15:19:00 +00:00
|
|
|
debug.Log("index.Load", " removing index %v, superseded by %v", indexID.Str(), superID.Str())
|
2016-08-14 11:38:59 +00:00
|
|
|
fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str())
|
2016-08-07 15:19:00 +00:00
|
|
|
delete(results, indexID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-07 20:18:20 +00:00
|
|
|
return index, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddPack adds a pack to the index. If this pack is already in the index, an
|
|
|
|
// error is returned.
|
|
|
|
func (idx *Index) AddPack(id backend.ID, size int64, entries []pack.Blob) error {
|
|
|
|
if _, ok := idx.Packs[id]; ok {
|
|
|
|
return fmt.Errorf("pack %v already present in the index", id.Str())
|
|
|
|
}
|
|
|
|
|
|
|
|
idx.Packs[id] = Pack{Size: size, Entries: entries}
|
|
|
|
|
|
|
|
for _, entry := range entries {
|
|
|
|
h := pack.Handle{ID: entry.ID, Type: entry.Type}
|
|
|
|
if _, ok := idx.Blobs[h]; !ok {
|
|
|
|
idx.Blobs[h] = Blob{
|
|
|
|
Size: int64(entry.Length),
|
|
|
|
Packs: backend.NewIDSet(),
|
|
|
|
}
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
2016-08-07 20:18:20 +00:00
|
|
|
|
|
|
|
idx.Blobs[h].Packs.Insert(id)
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
|
|
|
|
2016-08-07 20:18:20 +00:00
|
|
|
return nil
|
2016-08-07 15:19:00 +00:00
|
|
|
}
|
2016-08-07 19:57:31 +00:00
|
|
|
|
|
|
|
// DuplicateBlobs returns a list of blobs that are stored more than once in the
|
|
|
|
// repo.
|
2016-08-07 20:18:20 +00:00
|
|
|
func (idx *Index) DuplicateBlobs() (dups pack.BlobSet) {
|
|
|
|
dups = pack.NewBlobSet()
|
2016-08-07 19:57:31 +00:00
|
|
|
seen := pack.NewBlobSet()
|
|
|
|
|
|
|
|
for _, p := range idx.Packs {
|
|
|
|
for _, entry := range p.Entries {
|
|
|
|
h := pack.Handle{ID: entry.ID, Type: entry.Type}
|
|
|
|
if seen.Has(h) {
|
2016-08-07 20:18:20 +00:00
|
|
|
dups.Insert(h)
|
2016-08-07 19:57:31 +00:00
|
|
|
}
|
|
|
|
seen.Insert(h)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return dups
|
|
|
|
}
|
2016-08-07 20:18:20 +00:00
|
|
|
|
|
|
|
// PacksForBlobs returns the set of packs in which the blobs are contained.
|
|
|
|
func (idx *Index) PacksForBlobs(blobs pack.BlobSet) (packs backend.IDSet) {
|
|
|
|
packs = backend.NewIDSet()
|
|
|
|
|
|
|
|
for h := range blobs {
|
|
|
|
blob, ok := idx.Blobs[h]
|
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
for id := range blob.Packs {
|
|
|
|
packs.Insert(id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return packs
|
|
|
|
}
|
2016-08-14 11:38:59 +00:00
|
|
|
|
|
|
|
// Location describes the location of a blob in a pack.
|
|
|
|
type Location struct {
|
|
|
|
PackID backend.ID
|
|
|
|
pack.Blob
|
|
|
|
}
|
|
|
|
|
|
|
|
// ErrBlobNotFound is return by FindBlob when the blob could not be found in
|
|
|
|
// the index.
|
|
|
|
var ErrBlobNotFound = errors.New("blob not found in index")
|
|
|
|
|
|
|
|
// FindBlob returns a list of packs and positions the blob can be found in.
|
|
|
|
func (idx *Index) FindBlob(h pack.Handle) ([]Location, error) {
|
|
|
|
blob, ok := idx.Blobs[h]
|
|
|
|
if !ok {
|
|
|
|
return nil, ErrBlobNotFound
|
|
|
|
}
|
|
|
|
|
|
|
|
result := make([]Location, 0, len(blob.Packs))
|
|
|
|
for packID := range blob.Packs {
|
|
|
|
pack, ok := idx.Packs[packID]
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("pack %v not found in index", packID.Str())
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, entry := range pack.Entries {
|
|
|
|
if entry.Type != h.Type {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if !entry.ID.Equal(h.ID) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
loc := Location{PackID: packID, Blob: entry}
|
|
|
|
result = append(result, loc)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Save writes a new index containing the given packs.
|
|
|
|
func Save(repo *repository.Repository, packs map[backend.ID][]pack.Blob, supersedes backend.IDs) (backend.ID, error) {
|
|
|
|
idx := &indexJSON{
|
|
|
|
Supersedes: supersedes,
|
|
|
|
Packs: make([]*packJSON, 0, len(packs)),
|
|
|
|
}
|
|
|
|
|
|
|
|
for packID, blobs := range packs {
|
|
|
|
b := make([]blobJSON, 0, len(blobs))
|
|
|
|
for _, blob := range blobs {
|
|
|
|
b = append(b, blobJSON{
|
|
|
|
ID: blob.ID,
|
|
|
|
Type: blob.Type,
|
|
|
|
Offset: blob.Offset,
|
|
|
|
Length: blob.Length,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
p := &packJSON{
|
|
|
|
ID: packID,
|
|
|
|
Blobs: b,
|
|
|
|
}
|
|
|
|
|
|
|
|
idx.Packs = append(idx.Packs, p)
|
|
|
|
}
|
|
|
|
|
|
|
|
return repo.SaveJSONUnpacked(backend.Index, idx)
|
|
|
|
}
|