forked from TrueCloudLab/rclone
88bd80c1fa
When using `--no-traverse` the march routines call NewObject on each potential object in the destination. The concurrency limiter was accidentally arranged so that there were `--checkers` * `--checkers` NewObject calls going on at once. This became obvious when using the sftp backend which used too many connections. Fixes #5824
502 lines
14 KiB
Go
502 lines
14 KiB
Go
// Package march traverses two directories in lock step
|
|
package march
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/rclone/rclone/fs"
|
|
"github.com/rclone/rclone/fs/dirtree"
|
|
"github.com/rclone/rclone/fs/filter"
|
|
"github.com/rclone/rclone/fs/list"
|
|
"github.com/rclone/rclone/fs/walk"
|
|
"golang.org/x/text/unicode/norm"
|
|
)
|
|
|
|
// March holds the data used to traverse two Fs simultaneously,
|
|
// calling Callback for each match
|
|
type March struct {
|
|
// parameters
|
|
Ctx context.Context // context for background goroutines
|
|
Fdst fs.Fs // source Fs
|
|
Fsrc fs.Fs // dest Fs
|
|
Dir string // directory
|
|
NoTraverse bool // don't traverse the destination
|
|
SrcIncludeAll bool // don't include all files in the src
|
|
DstIncludeAll bool // don't include all files in the destination
|
|
Callback Marcher // object to call with results
|
|
NoCheckDest bool // transfer all objects regardless without checking dst
|
|
NoUnicodeNormalization bool // don't normalize unicode characters in filenames
|
|
// internal state
|
|
srcListDir listDirFn // function to call to list a directory in the src
|
|
dstListDir listDirFn // function to call to list a directory in the dst
|
|
transforms []matchTransformFn
|
|
limiter chan struct{} // make sure we don't do too many operations at once
|
|
}
|
|
|
|
// Marcher is called on each match
|
|
type Marcher interface {
|
|
// SrcOnly is called for a DirEntry found only in the source
|
|
SrcOnly(src fs.DirEntry) (recurse bool)
|
|
// DstOnly is called for a DirEntry found only in the destination
|
|
DstOnly(dst fs.DirEntry) (recurse bool)
|
|
// Match is called for a DirEntry found both in the source and destination
|
|
Match(ctx context.Context, dst, src fs.DirEntry) (recurse bool)
|
|
}
|
|
|
|
// init sets up a march over opt.Fsrc, and opt.Fdst calling back callback for each match
|
|
// Note: this will flag filter-aware backends on the source side
|
|
func (m *March) init(ctx context.Context) {
|
|
ci := fs.GetConfig(ctx)
|
|
m.srcListDir = m.makeListDir(ctx, m.Fsrc, m.SrcIncludeAll)
|
|
if !m.NoTraverse {
|
|
m.dstListDir = m.makeListDir(ctx, m.Fdst, m.DstIncludeAll)
|
|
}
|
|
// Now create the matching transform
|
|
// ..normalise the UTF8 first
|
|
if !m.NoUnicodeNormalization {
|
|
m.transforms = append(m.transforms, norm.NFC.String)
|
|
}
|
|
// ..if destination is caseInsensitive then make it lower case
|
|
// case Insensitive | src | dst | lower case compare |
|
|
// | No | No | No |
|
|
// | Yes | No | No |
|
|
// | No | Yes | Yes |
|
|
// | Yes | Yes | Yes |
|
|
if m.Fdst.Features().CaseInsensitive || ci.IgnoreCaseSync {
|
|
m.transforms = append(m.transforms, strings.ToLower)
|
|
}
|
|
// Limit parallelism for operations
|
|
m.limiter = make(chan struct{}, ci.Checkers)
|
|
}
|
|
|
|
// list a directory into entries, err
|
|
type listDirFn func(dir string) (entries fs.DirEntries, err error)
|
|
|
|
// makeListDir makes constructs a listing function for the given fs
|
|
// and includeAll flags for marching through the file system.
|
|
// Note: this will optionally flag filter-aware backends!
|
|
func (m *March) makeListDir(ctx context.Context, f fs.Fs, includeAll bool) listDirFn {
|
|
ci := fs.GetConfig(ctx)
|
|
fi := filter.GetConfig(ctx)
|
|
if !(ci.UseListR && f.Features().ListR != nil) && // !--fast-list active and
|
|
!(ci.NoTraverse && fi.HaveFilesFrom()) { // !(--files-from and --no-traverse)
|
|
return func(dir string) (entries fs.DirEntries, err error) {
|
|
dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List
|
|
return list.DirSorted(dirCtx, f, includeAll, dir)
|
|
}
|
|
}
|
|
|
|
// This returns a closure for use when --fast-list is active or for when
|
|
// --files-from and --no-traverse is set
|
|
var (
|
|
mu sync.Mutex
|
|
started bool
|
|
dirs dirtree.DirTree
|
|
dirsErr error
|
|
)
|
|
return func(dir string) (entries fs.DirEntries, err error) {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if !started {
|
|
dirCtx := filter.SetUseFilter(m.Ctx, f.Features().FilterAware && !includeAll) // make filter-aware backends constrain List
|
|
dirs, dirsErr = walk.NewDirTree(dirCtx, f, m.Dir, includeAll, ci.MaxDepth)
|
|
started = true
|
|
}
|
|
if dirsErr != nil {
|
|
return nil, dirsErr
|
|
}
|
|
entries, ok := dirs[dir]
|
|
if !ok {
|
|
err = fs.ErrorDirNotFound
|
|
} else {
|
|
delete(dirs, dir)
|
|
}
|
|
return entries, err
|
|
}
|
|
}
|
|
|
|
// listDirJob describe a directory listing that needs to be done
|
|
type listDirJob struct {
|
|
srcRemote string
|
|
dstRemote string
|
|
srcDepth int
|
|
dstDepth int
|
|
noSrc bool
|
|
noDst bool
|
|
}
|
|
|
|
// Run starts the matching process off
|
|
func (m *March) Run(ctx context.Context) error {
|
|
ci := fs.GetConfig(ctx)
|
|
fi := filter.GetConfig(ctx)
|
|
m.init(ctx)
|
|
|
|
srcDepth := ci.MaxDepth
|
|
if srcDepth < 0 {
|
|
srcDepth = fs.MaxLevel
|
|
}
|
|
dstDepth := srcDepth
|
|
if fi.Opt.DeleteExcluded {
|
|
dstDepth = fs.MaxLevel
|
|
}
|
|
|
|
var mu sync.Mutex // Protects vars below
|
|
var jobError error
|
|
var errCount int
|
|
|
|
// Start some directory listing go routines
|
|
var wg sync.WaitGroup // sync closing of go routines
|
|
var traversing sync.WaitGroup // running directory traversals
|
|
checkers := ci.Checkers
|
|
in := make(chan listDirJob, checkers)
|
|
for i := 0; i < checkers; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
for {
|
|
select {
|
|
case <-m.Ctx.Done():
|
|
return
|
|
case job, ok := <-in:
|
|
if !ok {
|
|
return
|
|
}
|
|
jobs, err := m.processJob(job)
|
|
if err != nil {
|
|
mu.Lock()
|
|
// Keep reference only to the first encountered error
|
|
if jobError == nil {
|
|
jobError = err
|
|
}
|
|
errCount++
|
|
mu.Unlock()
|
|
}
|
|
if len(jobs) > 0 {
|
|
traversing.Add(len(jobs))
|
|
go func() {
|
|
// Now we have traversed this directory, send these
|
|
// jobs off for traversal in the background
|
|
for _, newJob := range jobs {
|
|
select {
|
|
case <-m.Ctx.Done():
|
|
// discard job if finishing
|
|
traversing.Done()
|
|
case in <- newJob:
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
traversing.Done()
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Start the process
|
|
traversing.Add(1)
|
|
in <- listDirJob{
|
|
srcRemote: m.Dir,
|
|
srcDepth: srcDepth - 1,
|
|
dstRemote: m.Dir,
|
|
dstDepth: dstDepth - 1,
|
|
noDst: m.NoCheckDest,
|
|
}
|
|
go func() {
|
|
// when the context is cancelled discard the remaining jobs
|
|
<-m.Ctx.Done()
|
|
for range in {
|
|
traversing.Done()
|
|
}
|
|
}()
|
|
traversing.Wait()
|
|
close(in)
|
|
wg.Wait()
|
|
|
|
if errCount > 1 {
|
|
return fmt.Errorf("march failed with %d error(s): first error: %w", errCount, jobError)
|
|
}
|
|
return jobError
|
|
}
|
|
|
|
// Check to see if the context has been cancelled
|
|
func (m *March) aborting() bool {
|
|
select {
|
|
case <-m.Ctx.Done():
|
|
return true
|
|
default:
|
|
}
|
|
return false
|
|
}
|
|
|
|
// matchEntry is an entry plus transformed name
|
|
type matchEntry struct {
|
|
entry fs.DirEntry
|
|
leaf string
|
|
name string
|
|
}
|
|
|
|
// matchEntries contains many matchEntry~s
|
|
type matchEntries []matchEntry
|
|
|
|
// Len is part of sort.Interface.
|
|
func (es matchEntries) Len() int { return len(es) }
|
|
|
|
// Swap is part of sort.Interface.
|
|
func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] }
|
|
|
|
// Less is part of sort.Interface.
|
|
//
|
|
// Compare in order (name, leaf, remote)
|
|
func (es matchEntries) Less(i, j int) bool {
|
|
ei, ej := &es[i], &es[j]
|
|
if ei.name == ej.name {
|
|
if ei.leaf == ej.leaf {
|
|
return fs.CompareDirEntries(ei.entry, ej.entry) < 0
|
|
}
|
|
return ei.leaf < ej.leaf
|
|
}
|
|
return ei.name < ej.name
|
|
}
|
|
|
|
// Sort the directory entries by (name, leaf, remote)
|
|
//
|
|
// We use a stable sort here just in case there are
|
|
// duplicates. Assuming the remote delivers the entries in a
|
|
// consistent order, this will give the best user experience
|
|
// in syncing as it will use the first entry for the sync
|
|
// comparison.
|
|
func (es matchEntries) sort() {
|
|
sort.Stable(es)
|
|
}
|
|
|
|
// make a matchEntries from a newMatch entries
|
|
func newMatchEntries(entries fs.DirEntries, transforms []matchTransformFn) matchEntries {
|
|
es := make(matchEntries, len(entries))
|
|
for i := range es {
|
|
es[i].entry = entries[i]
|
|
name := path.Base(entries[i].Remote())
|
|
es[i].leaf = name
|
|
for _, transform := range transforms {
|
|
name = transform(name)
|
|
}
|
|
es[i].name = name
|
|
}
|
|
es.sort()
|
|
return es
|
|
}
|
|
|
|
// matchPair is a matched pair of direntries returned by matchListings
|
|
type matchPair struct {
|
|
src, dst fs.DirEntry
|
|
}
|
|
|
|
// matchTransformFn converts a name into a form which is used for
|
|
// comparison in matchListings.
|
|
type matchTransformFn func(name string) string
|
|
|
|
// Process the two listings, matching up the items in the two slices
|
|
// using the transform function on each name first.
|
|
//
|
|
// Into srcOnly go Entries which only exist in the srcList
|
|
// Into dstOnly go Entries which only exist in the dstList
|
|
// Into matches go matchPair's of src and dst which have the same name
|
|
//
|
|
// This checks for duplicates and checks the list is sorted.
|
|
func matchListings(srcListEntries, dstListEntries fs.DirEntries, transforms []matchTransformFn) (srcOnly fs.DirEntries, dstOnly fs.DirEntries, matches []matchPair) {
|
|
srcList := newMatchEntries(srcListEntries, transforms)
|
|
dstList := newMatchEntries(dstListEntries, transforms)
|
|
|
|
for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 {
|
|
var src, dst fs.DirEntry
|
|
var srcName, dstName string
|
|
if iSrc < len(srcList) {
|
|
src = srcList[iSrc].entry
|
|
srcName = srcList[iSrc].name
|
|
}
|
|
if iDst < len(dstList) {
|
|
dst = dstList[iDst].entry
|
|
dstName = dstList[iDst].name
|
|
}
|
|
if src == nil && dst == nil {
|
|
break
|
|
}
|
|
if src != nil && iSrc > 0 {
|
|
prev := srcList[iSrc-1].entry
|
|
prevName := srcList[iSrc-1].name
|
|
if srcName == prevName && fs.DirEntryType(prev) == fs.DirEntryType(src) {
|
|
fs.Logf(src, "Duplicate %s found in source - ignoring", fs.DirEntryType(src))
|
|
iDst-- // ignore the src and retry the dst
|
|
continue
|
|
} else if srcName < prevName {
|
|
// this should never happen since we sort the listings
|
|
panic("Out of order listing in source")
|
|
}
|
|
}
|
|
if dst != nil && iDst > 0 {
|
|
prev := dstList[iDst-1].entry
|
|
prevName := dstList[iDst-1].name
|
|
if dstName == prevName && fs.DirEntryType(dst) == fs.DirEntryType(prev) {
|
|
fs.Logf(dst, "Duplicate %s found in destination - ignoring", fs.DirEntryType(dst))
|
|
iSrc-- // ignore the dst and retry the src
|
|
continue
|
|
} else if dstName < prevName {
|
|
// this should never happen since we sort the listings
|
|
panic("Out of order listing in destination")
|
|
}
|
|
}
|
|
if src != nil && dst != nil {
|
|
// we can't use CompareDirEntries because srcName, dstName could
|
|
// be different then src.Remote() or dst.Remote()
|
|
srcType := fs.DirEntryType(src)
|
|
dstType := fs.DirEntryType(dst)
|
|
if srcName > dstName || (srcName == dstName && srcType > dstType) {
|
|
src = nil
|
|
iSrc--
|
|
} else if srcName < dstName || (srcName == dstName && srcType < dstType) {
|
|
dst = nil
|
|
iDst--
|
|
}
|
|
}
|
|
// Debugf(nil, "src = %v, dst = %v", src, dst)
|
|
switch {
|
|
case src == nil && dst == nil:
|
|
// do nothing
|
|
case src == nil:
|
|
dstOnly = append(dstOnly, dst)
|
|
case dst == nil:
|
|
srcOnly = append(srcOnly, src)
|
|
default:
|
|
matches = append(matches, matchPair{src: src, dst: dst})
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// processJob processes a listDirJob listing the source and
|
|
// destination directories, comparing them and returning a slice of
|
|
// more jobs
|
|
//
|
|
// returns errors using processError
|
|
func (m *March) processJob(job listDirJob) ([]listDirJob, error) {
|
|
var (
|
|
jobs []listDirJob
|
|
srcList, dstList fs.DirEntries
|
|
srcListErr, dstListErr error
|
|
wg sync.WaitGroup
|
|
mu sync.Mutex
|
|
)
|
|
|
|
// List the src and dst directories
|
|
if !job.noSrc {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
srcList, srcListErr = m.srcListDir(job.srcRemote)
|
|
}()
|
|
}
|
|
if !m.NoTraverse && !job.noDst {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
dstList, dstListErr = m.dstListDir(job.dstRemote)
|
|
}()
|
|
}
|
|
|
|
// Wait for listings to complete and report errors
|
|
wg.Wait()
|
|
if srcListErr != nil {
|
|
if job.srcRemote != "" {
|
|
fs.Errorf(job.srcRemote, "error reading source directory: %v", srcListErr)
|
|
} else {
|
|
fs.Errorf(m.Fsrc, "error reading source root directory: %v", srcListErr)
|
|
}
|
|
srcListErr = fs.CountError(srcListErr)
|
|
return nil, srcListErr
|
|
}
|
|
if dstListErr == fs.ErrorDirNotFound {
|
|
// Copy the stuff anyway
|
|
} else if dstListErr != nil {
|
|
if job.dstRemote != "" {
|
|
fs.Errorf(job.dstRemote, "error reading destination directory: %v", dstListErr)
|
|
} else {
|
|
fs.Errorf(m.Fdst, "error reading destination root directory: %v", dstListErr)
|
|
}
|
|
dstListErr = fs.CountError(dstListErr)
|
|
return nil, dstListErr
|
|
}
|
|
|
|
// If NoTraverse is set, then try to find a matching object
|
|
// for each item in the srcList to head dst object
|
|
if m.NoTraverse && !m.NoCheckDest {
|
|
for _, src := range srcList {
|
|
wg.Add(1)
|
|
m.limiter <- struct{}{}
|
|
go func(src fs.DirEntry) {
|
|
defer wg.Done()
|
|
if srcObj, ok := src.(fs.Object); ok {
|
|
leaf := path.Base(srcObj.Remote())
|
|
dstObj, err := m.Fdst.NewObject(m.Ctx, path.Join(job.dstRemote, leaf))
|
|
if err == nil {
|
|
mu.Lock()
|
|
dstList = append(dstList, dstObj)
|
|
mu.Unlock()
|
|
}
|
|
}
|
|
<-m.limiter
|
|
}(src)
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
// Work out what to do and do it
|
|
srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms)
|
|
for _, src := range srcOnly {
|
|
if m.aborting() {
|
|
return nil, m.Ctx.Err()
|
|
}
|
|
recurse := m.Callback.SrcOnly(src)
|
|
if recurse && job.srcDepth > 0 {
|
|
jobs = append(jobs, listDirJob{
|
|
srcRemote: src.Remote(),
|
|
dstRemote: src.Remote(),
|
|
srcDepth: job.srcDepth - 1,
|
|
noDst: true,
|
|
})
|
|
}
|
|
|
|
}
|
|
for _, dst := range dstOnly {
|
|
if m.aborting() {
|
|
return nil, m.Ctx.Err()
|
|
}
|
|
recurse := m.Callback.DstOnly(dst)
|
|
if recurse && job.dstDepth > 0 {
|
|
jobs = append(jobs, listDirJob{
|
|
srcRemote: dst.Remote(),
|
|
dstRemote: dst.Remote(),
|
|
dstDepth: job.dstDepth - 1,
|
|
noSrc: true,
|
|
})
|
|
}
|
|
}
|
|
for _, match := range matches {
|
|
if m.aborting() {
|
|
return nil, m.Ctx.Err()
|
|
}
|
|
recurse := m.Callback.Match(m.Ctx, match.dst, match.src)
|
|
if recurse && job.srcDepth > 0 && job.dstDepth > 0 {
|
|
jobs = append(jobs, listDirJob{
|
|
srcRemote: match.src.Remote(),
|
|
dstRemote: match.dst.Remote(),
|
|
srcDepth: job.srcDepth - 1,
|
|
dstDepth: job.dstDepth - 1,
|
|
})
|
|
}
|
|
}
|
|
return jobs, nil
|
|
}
|