// Package index contains various data structures for indexing content in a repository or backend. package index import ( "context" "fmt" "os" "sync" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" "golang.org/x/sync/errgroup" ) // Pack contains information about the contents of a pack. type Pack struct { ID restic.ID Size int64 Entries []restic.Blob } // Index contains information about blobs and packs stored in a repo. type Index struct { Packs map[restic.ID]Pack IndexIDs restic.IDSet } func newIndex() *Index { return &Index{ Packs: make(map[restic.ID]Pack), IndexIDs: restic.NewIDSet(), } } const listPackWorkers = 10 // Lister lists files and their contents type Lister interface { // List runs fn for all files of type t in the repo. List(ctx context.Context, t restic.FileType, fn func(restic.ID, int64) error) error // ListPack returns the list of blobs saved in the pack id and the length // of the file as stored in the backend. ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, int64, error) } // New creates a new index for repo from scratch. InvalidFiles contains all IDs // of files that cannot be listed successfully. func New(ctx context.Context, repo Lister, ignorePacks restic.IDSet, p *progress.Counter) (idx *Index, invalidFiles restic.IDs, err error) { defer p.Done() type Job struct { PackID restic.ID Size int64 } type Result struct { Error error PackID restic.ID Size int64 Entries []restic.Blob } inputCh := make(chan Job) outputCh := make(chan Result) wg, ctx := errgroup.WithContext(ctx) // list the files in the repo, send to inputCh wg.Go(func() error { defer close(inputCh) return repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error { if ignorePacks.Has(id) { return nil } job := Job{ PackID: id, Size: size, } select { case inputCh <- job: case <-ctx.Done(): } return nil }) }) // run the workers listing the files, read from inputCh, send to outputCh var workers sync.WaitGroup for i := 0; i < listPackWorkers; i++ { workers.Add(1) go func() { defer workers.Done() for job := range inputCh { res := Result{PackID: job.PackID} res.Entries, res.Size, res.Error = repo.ListPack(ctx, job.PackID, job.Size) select { case outputCh <- res: case <-ctx.Done(): return } } }() } // wait until all the workers are done, then close outputCh wg.Go(func() error { workers.Wait() close(outputCh) return nil }) idx = newIndex() for res := range outputCh { p.Add(1) if res.Error != nil { cause := errors.Cause(res.Error) if _, ok := cause.(pack.InvalidFileError); ok { invalidFiles = append(invalidFiles, res.PackID) continue } fmt.Fprintf(os.Stderr, "pack file cannot be listed %v: %v\n", res.PackID, res.Error) continue } debug.Log("pack %v contains %d blobs", res.PackID, len(res.Entries)) err := idx.AddPack(res.PackID, res.Size, res.Entries) if err != nil { return nil, nil, err } select { case <-ctx.Done(): // an error occurred default: } } err = wg.Wait() if err != nil { return nil, nil, err } return idx, invalidFiles, nil } type packJSON struct { ID restic.ID `json:"id"` Blobs []blobJSON `json:"blobs"` } type blobJSON struct { ID restic.ID `json:"id"` Type restic.BlobType `json:"type"` Offset uint `json:"offset"` Length uint `json:"length"` } type indexJSON struct { Supersedes restic.IDs `json:"supersedes,omitempty"` Packs []packJSON `json:"packs"` } // ListLoader allows listing files and their content, in addition to loading and unmarshaling JSON files. type ListLoader interface { Lister LoadJSONUnpacked(context.Context, restic.FileType, restic.ID, interface{}) error } func loadIndexJSON(ctx context.Context, repo ListLoader, id restic.ID) (*indexJSON, error) { debug.Log("process index %v\n", id) var idx indexJSON err := repo.LoadJSONUnpacked(ctx, restic.IndexFile, id, &idx) if err != nil { return nil, err } return &idx, nil } // Load creates an index by loading all index files from the repo. func Load(ctx context.Context, repo ListLoader, p *progress.Counter) (*Index, error) { debug.Log("loading indexes") defer p.Done() supersedes := make(map[restic.ID]restic.IDSet) results := make(map[restic.ID]map[restic.ID]Pack) index := newIndex() err := repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error { p.Add(1) debug.Log("Load index %v", id) idx, err := loadIndexJSON(ctx, repo, id) if err != nil { return err } res := make(map[restic.ID]Pack) supersedes[id] = restic.NewIDSet() for _, sid := range idx.Supersedes { debug.Log(" index %v supersedes %v", id, sid) supersedes[id].Insert(sid) } for _, jpack := range idx.Packs { entries := make([]restic.Blob, 0, len(jpack.Blobs)) for _, blob := range jpack.Blobs { entry := restic.Blob{ ID: blob.ID, Type: blob.Type, Offset: blob.Offset, Length: blob.Length, } entries = append(entries, entry) } if err = index.AddPack(jpack.ID, 0, entries); err != nil { return err } } results[id] = res index.IndexIDs.Insert(id) return nil }) if err != nil { return nil, err } for superID, list := range supersedes { for indexID := range list { if _, ok := results[indexID]; !ok { continue } debug.Log(" removing index %v, superseded by %v", indexID, superID) fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str()) delete(results, indexID) } } return index, nil } // AddPack adds a pack to the index. If this pack is already in the index, an // error is returned. func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error { if _, ok := idx.Packs[id]; ok { return errors.Errorf("pack %v already present in the index", id.Str()) } idx.Packs[id] = Pack{ID: id, Size: size, Entries: entries} return nil } // RemovePack deletes a pack from the index. func (idx *Index) RemovePack(id restic.ID) error { if _, ok := idx.Packs[id]; !ok { return errors.Errorf("pack %v not found in the index", id.Str()) } delete(idx.Packs, id) return nil } // DuplicateBlobs returns a list of blobs that are stored more than once in the // repo. func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) { dups = restic.NewBlobSet() seen := restic.NewBlobSet() for _, p := range idx.Packs { for _, entry := range p.Entries { h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} if seen.Has(h) { dups.Insert(h) } seen.Insert(h) } } return dups } // PacksForBlobs returns the set of packs in which the blobs are contained. func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) { packs = restic.NewIDSet() for id, p := range idx.Packs { for _, entry := range p.Entries { if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) { packs.Insert(id) } } } return packs } const maxEntries = 3000 // Saver saves structures as JSON. type Saver interface { SaveJSONUnpacked(ctx context.Context, t restic.FileType, item interface{}) (restic.ID, error) } // Save writes the complete index to the repo. func (idx *Index) Save(ctx context.Context, repo Saver, supersedes restic.IDs) (restic.IDs, error) { debug.Log("pack files: %d\n", len(idx.Packs)) var indexIDs []restic.ID packs := 0 jsonIDX := &indexJSON{ Supersedes: supersedes, Packs: make([]packJSON, 0, maxEntries), } for packID, pack := range idx.Packs { debug.Log("%04d add pack %v with %d entries", packs, packID, len(pack.Entries)) b := make([]blobJSON, 0, len(pack.Entries)) for _, blob := range pack.Entries { b = append(b, blobJSON{ ID: blob.ID, Type: blob.Type, Offset: blob.Offset, Length: blob.Length, }) } p := packJSON{ ID: packID, Blobs: b, } jsonIDX.Packs = append(jsonIDX.Packs, p) packs++ if packs == maxEntries { id, err := repo.SaveJSONUnpacked(ctx, restic.IndexFile, jsonIDX) if err != nil { return nil, err } debug.Log("saved new index as %v", id) indexIDs = append(indexIDs, id) packs = 0 jsonIDX.Packs = jsonIDX.Packs[:0] } } if packs > 0 { id, err := repo.SaveJSONUnpacked(ctx, restic.IndexFile, jsonIDX) if err != nil { return nil, err } debug.Log("saved new index as %v", id) indexIDs = append(indexIDs, id) } return indexIDs, nil }