Parallelize MasterIndex.Save()

This commit is contained in:
Alexander Weiss 2020-11-12 02:49:53 +01:00
parent 1ec628ddf5
commit 187c8fb259

View file

@ -7,6 +7,7 @@ import (
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress" "github.com/restic/restic/internal/ui/progress"
"golang.org/x/sync/errgroup"
) )
// MasterIndex is a collection of indexes and IDs of chunks that are in the process of being saved. // MasterIndex is a collection of indexes and IDs of chunks that are in the process of being saved.
@ -261,10 +262,12 @@ func (mi *MasterIndex) MergeFinalIndexes() {
mi.idx = newIdx mi.idx = newIdx
} }
const saveIndexParallelism = 4
// Save saves all known indexes to index files, leaving out any // Save saves all known indexes to index files, leaving out any
// packs whose ID is contained in packBlacklist. The new index contains the IDs // packs whose ID is contained in packBlacklist from finalized indexes.
// of all known indexes in the "supersedes" field. The IDs are also returned in // The new index contains the IDs of all known indexes in the "supersedes"
// the IDSet obsolete // field. The IDs are also returned in the IDSet obsolete.
// After calling this function, you should remove the obsolete index files. // After calling this function, you should remove the obsolete index files.
func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBlacklist restic.IDSet, extraObsolete restic.IDs, p *progress.Counter) (obsolete restic.IDSet, err error) { func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBlacklist restic.IDSet, extraObsolete restic.IDs, p *progress.Counter) (obsolete restic.IDSet, err error) {
mi.idxMutex.Lock() mi.idxMutex.Lock()
@ -275,19 +278,27 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla
newIndex := NewIndex() newIndex := NewIndex()
obsolete = restic.NewIDSet() obsolete = restic.NewIDSet()
// track spawned goroutines using wg, create a new context which is
// cancelled as soon as an error occurs.
wg, ctx := errgroup.WithContext(ctx)
ch := make(chan *Index)
wg.Go(func() error {
defer close(ch)
for i, idx := range mi.idx { for i, idx := range mi.idx {
if idx.Final() { if idx.Final() {
ids, err := idx.IDs() ids, err := idx.IDs()
if err != nil { if err != nil {
debug.Log("index %d does not have an ID: %v", err) debug.Log("index %d does not have an ID: %v", err)
return nil, err return err
} }
debug.Log("adding index ids %v to supersedes field", ids) debug.Log("adding index ids %v to supersedes field", ids)
err = newIndex.AddToSupersedes(ids...) err = newIndex.AddToSupersedes(ids...)
if err != nil { if err != nil {
return nil, err return err
} }
obsolete.Merge(restic.NewIDSet(ids...)) obsolete.Merge(restic.NewIDSet(ids...))
} else { } else {
@ -300,9 +311,10 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla
newIndex.StorePack(pbs.packID, pbs.blobs) newIndex.StorePack(pbs.packID, pbs.blobs)
p.Add(1) p.Add(1)
if IndexFull(newIndex) { if IndexFull(newIndex) {
newIndex.Finalize() select {
if _, err := SaveIndex(ctx, repo, newIndex); err != nil { case ch <- newIndex:
return nil, err case <-ctx.Done():
return nil
} }
newIndex = NewIndex() newIndex = NewIndex()
} }
@ -311,14 +323,34 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla
err = newIndex.AddToSupersedes(extraObsolete...) err = newIndex.AddToSupersedes(extraObsolete...)
if err != nil { if err != nil {
return nil, err return err
} }
obsolete.Merge(restic.NewIDSet(extraObsolete...)) obsolete.Merge(restic.NewIDSet(extraObsolete...))
newIndex.Finalize() select {
if _, err := SaveIndex(ctx, repo, newIndex); err != nil { case ch <- newIndex:
return nil, err case <-ctx.Done():
}
return nil
})
// a worker receives an index from ch, and saves the index
worker := func() error {
for idx := range ch {
idx.Finalize()
if _, err := SaveIndex(ctx, repo, idx); err != nil {
return err
}
}
return nil
} }
return // run workers on ch
wg.Go(func() error {
return RunWorkers(saveIndexParallelism, worker)
})
err = wg.Wait()
return obsolete, err
} }