filter: parallelise reading of --files-from - fixes #2835

Before this change rclone would read the list of files from the
files-from parameter and check they existed one at a time.  This could
take a very long time for lots of files.

After this change, rclone will check up to --checkers in parallel.
This commit is contained in:
Nick Craig-Wood 2018-12-13 10:47:09 +00:00
parent 63b51c6742
commit 5ee1816a71
2 changed files with 35 additions and 11 deletions

View file

@ -13,6 +13,7 @@ import (
"github.com/ncw/rclone/fs"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
)
// Active is the globally active filter
@ -511,17 +512,33 @@ func (f *Filter) MakeListR(NewObject func(remote string) (fs.Object, error)) fs.
if !f.HaveFilesFrom() {
return errFilesFromNotSet
}
var entries fs.DirEntries
for remote := range f.files {
entry, err := NewObject(remote)
if err == fs.ErrorObjectNotFound {
// Skip files that are not found
} else if err != nil {
return err
} else {
entries = append(entries, entry)
}
var (
remotes = make(chan string, fs.Config.Checkers)
g errgroup.Group
)
for i := 0; i < fs.Config.Checkers; i++ {
g.Go(func() (err error) {
var entries = make(fs.DirEntries, 1)
for remote := range remotes {
entries[0], err = NewObject(remote)
if err == fs.ErrorObjectNotFound {
// Skip files that are not found
} else if err != nil {
return err
} else {
err = callback(entries)
if err != nil {
return err
}
}
}
return nil
})
}
return callback(entries)
for remote := range f.files {
remotes <- remote
}
close(remotes)
return g.Wait()
}
}

View file

@ -5,6 +5,7 @@ import (
"io/ioutil"
"os"
"strings"
"sync"
"testing"
"time"
@ -220,7 +221,10 @@ func TestNewFilterMakeListR(t *testing.T) {
// NewObject function for MakeListR
newObjects := FilesMap{}
var newObjectMu sync.Mutex
NewObject := func(remote string) (fs.Object, error) {
newObjectMu.Lock()
defer newObjectMu.Unlock()
if remote == "notfound" {
return nil, fs.ErrorObjectNotFound
} else if remote == "error" {
@ -233,7 +237,10 @@ func TestNewFilterMakeListR(t *testing.T) {
// Callback for ListRFn
listRObjects := FilesMap{}
var callbackMu sync.Mutex
listRcallback := func(entries fs.DirEntries) error {
callbackMu.Lock()
defer callbackMu.Unlock()
for _, entry := range entries {
listRObjects[entry.Remote()] = struct{}{}
}