walk: Implement walk.ListR which will use ListR if at all possible

It otherwise has the nearly the same interface as walk.Walk which it
will fall back to if it can't use ListR.

Using walk.ListR will speed up file system operations by default and
use much less memory and start immediately compared to if --fast-list
had been supplied.
This commit is contained in:
Nick Craig-Wood 2019-01-21 10:02:23 +00:00
parent 047f00a411
commit efd720b533
3 changed files with 596 additions and 6 deletions

View file

@ -51,7 +51,7 @@ type Func func(path string, entries fs.DirEntries, err error) error
// //
// Parent directories are always listed before their children // Parent directories are always listed before their children
// //
// This is implemented by WalkR if Config.UseRecursiveListing is true // This is implemented by WalkR if Config.UseUseListR is true
// and f supports it and level > 1, or WalkN otherwise. // and f supports it and level > 1, or WalkN otherwise.
// //
// If --files-from is set then a DirTree will be constructed with just // If --files-from is set then a DirTree will be constructed with just
@ -62,12 +62,265 @@ func Walk(f fs.Fs, path string, includeAll bool, maxLevel int, fn Func) error {
if filter.Active.HaveFilesFrom() { if filter.Active.HaveFilesFrom() {
return walkR(f, path, includeAll, maxLevel, fn, filter.Active.MakeListR(f.NewObject)) return walkR(f, path, includeAll, maxLevel, fn, filter.Active.MakeListR(f.NewObject))
} }
// FIXME should this just be maxLevel < 0 - why the maxLevel > 1
if (maxLevel < 0 || maxLevel > 1) && fs.Config.UseListR && f.Features().ListR != nil { if (maxLevel < 0 || maxLevel > 1) && fs.Config.UseListR && f.Features().ListR != nil {
return walkListR(f, path, includeAll, maxLevel, fn) return walkListR(f, path, includeAll, maxLevel, fn)
} }
return walkListDirSorted(f, path, includeAll, maxLevel, fn) return walkListDirSorted(f, path, includeAll, maxLevel, fn)
} }
// ListType is uses to choose which combination of files or directories is requires
type ListType byte
// Types of listing for ListR
const (
ListObjects ListType = 1 << iota // list objects only
ListDirs // list dirs only
ListAll = ListObjects | ListDirs // list files and dirs
)
// Objects returns true if the list type specifies objects
func (l ListType) Objects() bool {
return (l & ListObjects) != 0
}
// Dirs returns true if the list type specifies dirs
func (l ListType) Dirs() bool {
return (l & ListDirs) != 0
}
// Filter in (inplace) to only contain the type of list entry required
func (l ListType) Filter(in *fs.DirEntries) {
if l == ListAll {
return
}
out := (*in)[:0]
for _, entry := range *in {
switch entry.(type) {
case fs.Object:
if l.Objects() {
out = append(out, entry)
}
case fs.Directory:
if l.Dirs() {
out = append(out, entry)
}
default:
fs.Errorf(nil, "Unknown object type %T", entry)
}
}
*in = out
}
// ListR lists the directory recursively.
//
// If includeAll is not set it will use the filters defined.
//
// If maxLevel is < 0 then it will recurse indefinitely, else it will
// only do maxLevel levels.
//
// If synthesizeDirs is set then for bucket based remotes it will
// synthesize directories from the file structure. This uses extra
// memory so don't set this if you don't need directories, likewise do
// set this if you are interested in directories.
//
// It calls fn for each tranche of DirEntries read. Note that these
// don't necessarily represent a directory
//
// Note that fn will not be called concurrently whereas the directory
// listing will proceed concurrently.
//
// Directories are not listed in any particular order so you can't
// rely on parents coming before children or alphabetical ordering
//
// This is implemented by using ListR on the backend if possible and
// efficient, otherwise by Walk.
//
// NB (f, path) to be replaced by fs.Dir at some point
func ListR(f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error {
// FIXME disable this with --no-fast-list ??? `--disable ListR` will do it...
doListR := f.Features().ListR
// Can't use ListR if...
if doListR == nil || // ...no ListR
filter.Active.HaveFilesFrom() || // ...using --files-from
maxLevel >= 0 || // ...using bounded recursion
len(filter.Active.Opt.ExcludeFile) > 0 || // ...using --exclude-file
filter.Active.BoundedRecursion() { // ...filters imply bounded recursion
return listRwalk(f, path, includeAll, maxLevel, listType, fn)
}
return listR(f, path, includeAll, listType, fn, doListR, listType.Dirs() && f.Features().BucketBased)
}
// listRwalk walks the file tree for ListR using Walk
func listRwalk(f fs.Fs, path string, includeAll bool, maxLevel int, listType ListType, fn fs.ListRCallback) error {
var listErr error
walkErr := Walk(f, path, includeAll, maxLevel, func(path string, entries fs.DirEntries, err error) error {
// Carry on listing but return the error at the end
if err != nil {
listErr = err
fs.CountError(err)
fs.Errorf(path, "error listing: %v", err)
return nil
}
listType.Filter(&entries)
return fn(entries)
})
if listErr != nil {
return listErr
}
return walkErr
}
// dirMap keeps track of directories made for bucket based remotes.
// true => directory has been sent
// false => directory has been seen but not sent
type dirMap struct {
mu sync.Mutex
m map[string]bool
root string
}
// make a new dirMap
func newDirMap(root string) *dirMap {
return &dirMap{
m: make(map[string]bool),
root: root,
}
}
// add adds a directory and parents with sent
func (dm *dirMap) add(dir string, sent bool) {
for {
if dir == dm.root || dir == "" {
return
}
currentSent, found := dm.m[dir]
if found {
// If it has been sent already then nothing more to do
if currentSent {
return
}
// If not sent already don't override
if !sent {
return
}
// currenSent == false && sent == true so needs overriding
}
dm.m[dir] = sent
// Add parents in as unsent
dir = parentDir(dir)
sent = false
}
}
// add all the directories in entries and their parents to the dirMap
func (dm *dirMap) addEntries(entries fs.DirEntries) error {
dm.mu.Lock()
defer dm.mu.Unlock()
for _, entry := range entries {
switch x := entry.(type) {
case fs.Object:
dm.add(parentDir(x.Remote()), false)
case fs.Directory:
dm.add(x.Remote(), true)
default:
return errors.Errorf("unknown object type %T", entry)
}
}
return nil
}
// send any missing parents to fn
func (dm *dirMap) sendEntries(fn fs.ListRCallback) (err error) {
// Count the strings first so we allocate the minimum memory
n := 0
for _, sent := range dm.m {
if !sent {
n++
}
}
if n == 0 {
return nil
}
dirs := make([]string, 0, n)
// Fill the dirs up then sort it
for dir, sent := range dm.m {
if !sent {
dirs = append(dirs, dir)
}
}
sort.Strings(dirs)
// Now convert to bulkier Dir in batches and send
now := time.Now()
list := NewListRHelper(fn)
for _, dir := range dirs {
err = list.Add(fs.NewDir(dir, now))
if err != nil {
return err
}
}
return list.Flush()
}
// listR walks the file tree using ListR
func listR(f fs.Fs, path string, includeAll bool, listType ListType, fn fs.ListRCallback, doListR fs.ListRFn, synthesizeDirs bool) error {
includeDirectory := filter.Active.IncludeDirectory(f)
if !includeAll {
includeAll = filter.Active.InActive()
}
var dm *dirMap
if synthesizeDirs {
dm = newDirMap(path)
}
var mu sync.Mutex
err := doListR(path, func(entries fs.DirEntries) (err error) {
if synthesizeDirs {
err = dm.addEntries(entries)
if err != nil {
return err
}
}
listType.Filter(&entries)
if !includeAll {
filteredEntries := entries[:0]
for _, entry := range entries {
var include bool
switch x := entry.(type) {
case fs.Object:
include = filter.Active.IncludeObject(x)
case fs.Directory:
include, err = includeDirectory(x.Remote())
if err != nil {
return err
}
default:
return errors.Errorf("unknown object type %T", entry)
}
if include {
filteredEntries = append(filteredEntries, entry)
} else {
fs.Debugf(entry, "Excluded from sync (and deletion)")
}
}
entries = filteredEntries
}
mu.Lock()
defer mu.Unlock()
return fn(entries)
})
if err != nil {
return err
}
if synthesizeDirs {
err = dm.sendEntries(fn)
if err != nil {
return err
}
}
return nil
}
// walkListDirSorted lists the directory. // walkListDirSorted lists the directory.
// //
// It implements Walk using non recursive directory listing. // It implements Walk using non recursive directory listing.
@ -506,12 +759,9 @@ func walkR(f fs.Fs, path string, includeAll bool, maxLevel int, fn Func, listR f
return nil return nil
} }
// GetAll runs Walk getting all the results // GetAll runs ListR getting all the results
func GetAll(f fs.Fs, path string, includeAll bool, maxLevel int) (objs []fs.Object, dirs []fs.Directory, err error) { func GetAll(f fs.Fs, path string, includeAll bool, maxLevel int) (objs []fs.Object, dirs []fs.Directory, err error) {
err = Walk(f, path, includeAll, maxLevel, func(dirPath string, entries fs.DirEntries, err error) error { err = ListR(f, path, includeAll, maxLevel, ListAll, func(entries fs.DirEntries) error {
if err != nil {
return err
}
for _, entry := range entries { for _, entry := range entries {
switch x := entry.(type) { switch x := entry.(type) {
case fs.Object: case fs.Object:

View file

@ -2,12 +2,15 @@ package walk
import ( import (
"fmt" "fmt"
"io"
"strings"
"sync" "sync"
"testing" "testing"
"github.com/ncw/rclone/fs" "github.com/ncw/rclone/fs"
"github.com/ncw/rclone/fs/filter" "github.com/ncw/rclone/fs/filter"
"github.com/ncw/rclone/fstest/mockdir" "github.com/ncw/rclone/fstest/mockdir"
"github.com/ncw/rclone/fstest/mockfs"
"github.com/ncw/rclone/fstest/mockobject" "github.com/ncw/rclone/fstest/mockobject"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -634,3 +637,307 @@ b/c/d/
// Set to default value, to avoid side effects // Set to default value, to avoid side effects
filter.Active.Opt.ExcludeFile = "" filter.Active.Opt.ExcludeFile = ""
} }
func TestListType(t *testing.T) {
assert.Equal(t, true, ListObjects.Objects())
assert.Equal(t, false, ListObjects.Dirs())
assert.Equal(t, false, ListDirs.Objects())
assert.Equal(t, true, ListDirs.Dirs())
assert.Equal(t, true, ListAll.Objects())
assert.Equal(t, true, ListAll.Dirs())
var (
a = mockobject.Object("a")
b = mockobject.Object("b")
dir = mockdir.New("dir")
adir = mockobject.Object("dir/a")
dir2 = mockdir.New("dir2")
origEntries = fs.DirEntries{
a, b, dir, adir, dir2,
}
dirEntries = fs.DirEntries{
dir, dir2,
}
objEntries = fs.DirEntries{
a, b, adir,
}
)
copyOrigEntries := func() (out fs.DirEntries) {
out = make(fs.DirEntries, len(origEntries))
copy(out, origEntries)
return out
}
got := copyOrigEntries()
ListAll.Filter(&got)
assert.Equal(t, origEntries, got)
got = copyOrigEntries()
ListObjects.Filter(&got)
assert.Equal(t, objEntries, got)
got = copyOrigEntries()
ListDirs.Filter(&got)
assert.Equal(t, dirEntries, got)
}
func TestListR(t *testing.T) {
objects := fs.DirEntries{
mockobject.Object("a"),
mockobject.Object("b"),
mockdir.New("dir"),
mockobject.Object("dir/a"),
mockobject.Object("dir/b"),
mockobject.Object("dir/c"),
}
f := mockfs.NewFs("mock", "/")
var got []string
clearCallback := func() {
got = nil
}
callback := func(entries fs.DirEntries) error {
for _, entry := range entries {
got = append(got, entry.Remote())
}
return nil
}
doListR := func(dir string, callback fs.ListRCallback) error {
var os fs.DirEntries
for _, o := range objects {
if dir == "" || strings.HasPrefix(o.Remote(), dir+"/") {
os = append(os, o)
}
}
return callback(os)
}
// Setup filter
oldFilter := filter.Active
defer func() {
filter.Active = oldFilter
}()
var err error
filter.Active, err = filter.NewFilter(nil)
require.NoError(t, err)
require.NoError(t, filter.Active.AddRule("+ b"))
require.NoError(t, filter.Active.AddRule("- *"))
// Base case
clearCallback()
err = listR(f, "", true, ListAll, callback, doListR, false)
require.NoError(t, err)
require.Equal(t, []string{"a", "b", "dir", "dir/a", "dir/b", "dir/c"}, got)
// Base case - with Objects
clearCallback()
err = listR(f, "", true, ListObjects, callback, doListR, false)
require.NoError(t, err)
require.Equal(t, []string{"a", "b", "dir/a", "dir/b", "dir/c"}, got)
// Base case - with Dirs
clearCallback()
err = listR(f, "", true, ListDirs, callback, doListR, false)
require.NoError(t, err)
require.Equal(t, []string{"dir"}, got)
// With filter
clearCallback()
err = listR(f, "", false, ListAll, callback, doListR, false)
require.NoError(t, err)
require.Equal(t, []string{"b", "dir", "dir/b"}, got)
// With filter - with Objects
clearCallback()
err = listR(f, "", false, ListObjects, callback, doListR, false)
require.NoError(t, err)
require.Equal(t, []string{"b", "dir/b"}, got)
// With filter - with Dir
clearCallback()
err = listR(f, "", false, ListDirs, callback, doListR, false)
require.NoError(t, err)
require.Equal(t, []string{"dir"}, got)
// With filter and subdir
clearCallback()
err = listR(f, "dir", false, ListAll, callback, doListR, false)
require.NoError(t, err)
require.Equal(t, []string{"dir/b"}, got)
// Now bucket based
objects = fs.DirEntries{
mockobject.Object("a"),
mockobject.Object("b"),
mockobject.Object("dir/a"),
mockobject.Object("dir/b"),
mockobject.Object("dir/subdir/c"),
mockdir.New("dir/subdir"),
}
// Base case
clearCallback()
err = listR(f, "", true, ListAll, callback, doListR, true)
require.NoError(t, err)
require.Equal(t, []string{"a", "b", "dir/a", "dir/b", "dir/subdir/c", "dir/subdir", "dir"}, got)
// With filter
clearCallback()
err = listR(f, "", false, ListAll, callback, doListR, true)
require.NoError(t, err)
require.Equal(t, []string{"b", "dir/b", "dir/subdir", "dir"}, got)
// With filter and subdir
clearCallback()
err = listR(f, "dir", false, ListAll, callback, doListR, true)
require.NoError(t, err)
require.Equal(t, []string{"dir/b", "dir/subdir"}, got)
// With filter and subdir - with Objects
clearCallback()
err = listR(f, "dir", false, ListObjects, callback, doListR, true)
require.NoError(t, err)
require.Equal(t, []string{"dir/b"}, got)
// With filter and subdir - with Dirs
clearCallback()
err = listR(f, "dir", false, ListDirs, callback, doListR, true)
require.NoError(t, err)
require.Equal(t, []string{"dir/subdir"}, got)
}
func TestDirMapAdd(t *testing.T) {
type add struct {
dir string
sent bool
}
for i, test := range []struct {
root string
in []add
want map[string]bool
}{
{
root: "",
in: []add{
{"", true},
},
want: map[string]bool{},
},
{
root: "",
in: []add{
{"a/b/c", true},
},
want: map[string]bool{
"a/b/c": true,
"a/b": false,
"a": false,
},
},
{
root: "",
in: []add{
{"a/b/c", true},
{"a/b", true},
},
want: map[string]bool{
"a/b/c": true,
"a/b": true,
"a": false,
},
},
{
root: "",
in: []add{
{"a/b", true},
{"a/b/c", false},
},
want: map[string]bool{
"a/b/c": false,
"a/b": true,
"a": false,
},
},
{
root: "root",
in: []add{
{"root/a/b", true},
{"root/a/b/c", false},
},
want: map[string]bool{
"root/a/b/c": false,
"root/a/b": true,
"root/a": false,
},
},
} {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
dm := newDirMap(test.root)
for _, item := range test.in {
dm.add(item.dir, item.sent)
}
assert.Equal(t, test.want, dm.m)
})
}
}
func TestDirMapAddEntries(t *testing.T) {
dm := newDirMap("")
entries := fs.DirEntries{
mockobject.Object("dir/a"),
mockobject.Object("dir/b"),
mockdir.New("dir"),
mockobject.Object("dir2/a"),
mockobject.Object("dir2/b"),
}
require.NoError(t, dm.addEntries(entries))
assert.Equal(t, map[string]bool{"dir": true, "dir2": false}, dm.m)
}
func TestDirMapSendEntries(t *testing.T) {
var got []string
clearCallback := func() {
got = nil
}
callback := func(entries fs.DirEntries) error {
for _, entry := range entries {
got = append(got, entry.Remote())
}
return nil
}
// general test
dm := newDirMap("")
entries := fs.DirEntries{
mockobject.Object("dir/a"),
mockobject.Object("dir/b"),
mockdir.New("dir"),
mockobject.Object("dir2/a"),
mockobject.Object("dir2/b"),
mockobject.Object("dir1/a"),
mockobject.Object("dir3/b"),
}
require.NoError(t, dm.addEntries(entries))
clearCallback()
err := dm.sendEntries(callback)
require.NoError(t, err)
assert.Equal(t, []string{
"dir1",
"dir2",
"dir3",
}, got)
// return error from callback
callback2 := func(entries fs.DirEntries) error {
return io.EOF
}
err = dm.sendEntries(callback2)
require.Equal(t, io.EOF, err)
// empty
dm = newDirMap("")
clearCallback()
err = dm.sendEntries(callback)
require.NoError(t, err)
assert.Equal(t, []string(nil), got)
}

View file

@ -782,6 +782,39 @@ func Run(t *testing.T, opt *Opt) {
TestFsListDirFile2(t) TestFsListDirFile2(t)
}) })
// Test the files are all there with walk.ListR recursive listings
t.Run("FsListR", func(t *testing.T) {
skipIfNotOk(t)
objs, dirs, err := walk.GetAll(remote, "", true, -1)
require.NoError(t, err)
assert.Equal(t, []string{
"hello_ sausage",
"hello_ sausage/êé",
"hello_ sausage/êé/Hello, 世界",
"hello_ sausage/êé/Hello, 世界/ _ ' @ _ _ & _ + ≠",
}, dirsToNames(dirs))
assert.Equal(t, []string{
"file name.txt",
"hello_ sausage/êé/Hello, 世界/ _ ' @ _ _ & _ + ≠/z.txt",
}, objsToNames(objs))
})
// Test the files are all there with
// walk.ListR recursive listings on a sub dir
t.Run("FsListRSubdir", func(t *testing.T) {
skipIfNotOk(t)
objs, dirs, err := walk.GetAll(remote, path.Dir(path.Dir(path.Dir(path.Dir(file2.Path)))), true, -1)
require.NoError(t, err)
assert.Equal(t, []string{
"hello_ sausage/êé",
"hello_ sausage/êé/Hello, 世界",
"hello_ sausage/êé/Hello, 世界/ _ ' @ _ _ & _ + ≠",
}, dirsToNames(dirs))
assert.Equal(t, []string{
"hello_ sausage/êé/Hello, 世界/ _ ' @ _ _ & _ + ≠/z.txt",
}, objsToNames(objs))
})
// TestFsListDirRoot tests that DirList works in the root // TestFsListDirRoot tests that DirList works in the root
TestFsListDirRoot := func(t *testing.T) { TestFsListDirRoot := func(t *testing.T) {
skipIfNotOk(t) skipIfNotOk(t)