forked from TrueCloudLab/rclone
Implement --no-traverse flag to stop copy traversing the destination remote.
Refactor sync/copy/move * Don't load the src listing unless doing a sync and --delete-before * Don't load the dst listing if doing copy/move and --no-traverse is set `rclone --no-traverse copy src dst` now won't load either of the listings into memory so will use the minimum amount of memory. This change will reduce the amount of memory rclone uses dramatically too as in normal operations (copy without --notraverse or sync) as it no longer loads the source file listing into memory at all. Fixes #8 Fixes #544 Fixes #546
This commit is contained in:
parent
13797a1fb8
commit
af4ef8ad8d
5 changed files with 174 additions and 109 deletions
|
@ -87,6 +87,9 @@ written a trailing / - meaning "copy the contents of this directory".
|
||||||
This applies to all commands and whether you are talking about the
|
This applies to all commands and whether you are talking about the
|
||||||
source or destination.
|
source or destination.
|
||||||
|
|
||||||
|
See the `--no-traverse` option for controlling whether rclone lists
|
||||||
|
the destination directory or not.
|
||||||
|
|
||||||
### rclone sync source:path dest:path ###
|
### rclone sync source:path dest:path ###
|
||||||
|
|
||||||
Sync the source to the destination, changing the destination
|
Sync the source to the destination, changing the destination
|
||||||
|
@ -561,12 +564,15 @@ The default is `1m`. Use 0 to disable.
|
||||||
This option allows you to specify when files on your destination are
|
This option allows you to specify when files on your destination are
|
||||||
deleted when you sync folders.
|
deleted when you sync folders.
|
||||||
|
|
||||||
Specifying the value `--delete-before` will delete all files present on the
|
Specifying the value `--delete-before` will delete all files present
|
||||||
destination, but not on the source *before* starting the transfer
|
on the destination, but not on the source *before* starting the
|
||||||
of any new or updated files.
|
transfer of any new or updated files. This uses extra memory as it
|
||||||
|
has to store the source listing before proceeding.
|
||||||
|
|
||||||
Specifying `--delete-during` (default value) will delete files while checking
|
Specifying `--delete-during` (default value) will delete files while
|
||||||
and uploading files. This is usually the fastest option.
|
checking and uploading files. This is usually the fastest option.
|
||||||
|
Currently this works the same as `--delete-after` but it may change in
|
||||||
|
the future.
|
||||||
|
|
||||||
Specifying `--delete-after` will delay deletion of files until all new/updated
|
Specifying `--delete-after` will delay deletion of files until all new/updated
|
||||||
files have been successfully transfered.
|
files have been successfully transfered.
|
||||||
|
@ -731,6 +737,24 @@ This option defaults to `false`.
|
||||||
|
|
||||||
**This should be used only for testing.**
|
**This should be used only for testing.**
|
||||||
|
|
||||||
|
### --no-traverse ###
|
||||||
|
|
||||||
|
The `--no-traverse` flag controls whether the destination file system
|
||||||
|
is traversed when using the `copy` or `move` commands.
|
||||||
|
|
||||||
|
If you are only copying a small number of files and/or have a large
|
||||||
|
number of files on the destination then `--no-traverse` will stop
|
||||||
|
rclone listing the destination and save time.
|
||||||
|
|
||||||
|
However if you are copying a large number of files, escpecially if you
|
||||||
|
are doing a copy where lots of the files haven't changed and won't
|
||||||
|
need copying then you shouldn't use `--no-traverse`.
|
||||||
|
|
||||||
|
It can also be used to reduce the memory usage of rclone when copying
|
||||||
|
- `rclone --no-traverse copy src dst` won't load either the source or
|
||||||
|
destination listings into memory so will use the minimum amount of
|
||||||
|
memory.
|
||||||
|
|
||||||
Filtering
|
Filtering
|
||||||
---------
|
---------
|
||||||
|
|
||||||
|
|
|
@ -88,6 +88,7 @@ var (
|
||||||
dedupeMode = pflag.StringP("dedupe-mode", "", "interactive", "Dedupe mode interactive|skip|first|newest|oldest|rename.")
|
dedupeMode = pflag.StringP("dedupe-mode", "", "interactive", "Dedupe mode interactive|skip|first|newest|oldest|rename.")
|
||||||
maxDepth = pflag.IntP("max-depth", "", -1, "If set limits the recursion depth to this.")
|
maxDepth = pflag.IntP("max-depth", "", -1, "If set limits the recursion depth to this.")
|
||||||
ignoreSize = pflag.BoolP("ignore-size", "", false, "Ignore size when skipping use mod-time or checksum.")
|
ignoreSize = pflag.BoolP("ignore-size", "", false, "Ignore size when skipping use mod-time or checksum.")
|
||||||
|
noTraverse = pflag.BoolP("no-traverse", "", false, "Don't traverse destination file system on copy.")
|
||||||
bwLimit SizeSuffix
|
bwLimit SizeSuffix
|
||||||
|
|
||||||
// Key to use for password en/decryption.
|
// Key to use for password en/decryption.
|
||||||
|
@ -223,6 +224,7 @@ type ConfigInfo struct {
|
||||||
DedupeMode DeduplicateMode
|
DedupeMode DeduplicateMode
|
||||||
MaxDepth int
|
MaxDepth int
|
||||||
IgnoreSize bool
|
IgnoreSize bool
|
||||||
|
NoTraverse bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transport returns an http.RoundTripper with the correct timeouts
|
// Transport returns an http.RoundTripper with the correct timeouts
|
||||||
|
@ -327,6 +329,7 @@ func LoadConfig() {
|
||||||
Config.NoGzip = *noGzip
|
Config.NoGzip = *noGzip
|
||||||
Config.MaxDepth = *maxDepth
|
Config.MaxDepth = *maxDepth
|
||||||
Config.IgnoreSize = *ignoreSize
|
Config.IgnoreSize = *ignoreSize
|
||||||
|
Config.NoTraverse = *noTraverse
|
||||||
|
|
||||||
ConfigPath = *configFile
|
ConfigPath = *configFile
|
||||||
|
|
||||||
|
|
1
fs/fs.go
1
fs/fs.go
|
@ -44,6 +44,7 @@ var (
|
||||||
ErrorListAborted = errors.New("list aborted")
|
ErrorListAborted = errors.New("list aborted")
|
||||||
ErrorListOnlyRoot = errors.New("can only list from root")
|
ErrorListOnlyRoot = errors.New("can only list from root")
|
||||||
ErrorIsFile = errors.New("is a file not a directory")
|
ErrorIsFile = errors.New("is a file not a directory")
|
||||||
|
ErrorNotDeleting = errors.New("not deleting files as there were IO errors")
|
||||||
)
|
)
|
||||||
|
|
||||||
// RegInfo provides information about a filesystem
|
// RegInfo provides information about a filesystem
|
||||||
|
|
206
fs/operations.go
206
fs/operations.go
|
@ -472,13 +472,11 @@ func DeleteFiles(toBeDeleted ObjectsChan) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read a map of Object.Remote to Object for the given Fs.
|
// Read a Objects into add() for the given Fs.
|
||||||
// dir is the start directory, "" for root
|
// dir is the start directory, "" for root
|
||||||
// If includeAll is specified all files will be added,
|
// If includeAll is specified all files will be added,
|
||||||
// otherwise only files passing the filter will be added.
|
// otherwise only files passing the filter will be added.
|
||||||
func readFilesMap(fs Fs, includeAll bool, dir string) (files map[string]Object, err error) {
|
func readFilesFn(fs Fs, includeAll bool, dir string, add func(Object)) (err error) {
|
||||||
files = make(map[string]Object)
|
|
||||||
normalised := make(map[string]struct{})
|
|
||||||
list := NewLister()
|
list := NewLister()
|
||||||
if !includeAll {
|
if !includeAll {
|
||||||
list.SetFilter(Config.Filter)
|
list.SetFilter(Config.Filter)
|
||||||
|
@ -488,30 +486,45 @@ func readFilesMap(fs Fs, includeAll bool, dir string) (files map[string]Object,
|
||||||
for {
|
for {
|
||||||
o, err := list.GetObject()
|
o, err := list.GetObject()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return files, err
|
return err
|
||||||
}
|
}
|
||||||
// Check if we are finished
|
// Check if we are finished
|
||||||
if o == nil {
|
if o == nil {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
// Make sure we don't delete excluded files if not required
|
||||||
|
if includeAll || Config.Filter.IncludeObject(o) {
|
||||||
|
add(o)
|
||||||
|
} else {
|
||||||
|
Debug(o, "Excluded from sync (and deletion)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read a map of Object.Remote to Object for the given Fs.
|
||||||
|
// dir is the start directory, "" for root
|
||||||
|
// If includeAll is specified all files will be added,
|
||||||
|
// otherwise only files passing the filter will be added.
|
||||||
|
//
|
||||||
|
// This also detects duplicates and normalised duplicates
|
||||||
|
func readFilesMap(fs Fs, includeAll bool, dir string) (files map[string]Object, err error) {
|
||||||
|
files = make(map[string]Object)
|
||||||
|
normalised := make(map[string]struct{})
|
||||||
|
err = readFilesFn(fs, includeAll, dir, func(o Object) {
|
||||||
remote := o.Remote()
|
remote := o.Remote()
|
||||||
normalisedRemote := strings.ToLower(norm.NFC.String(remote))
|
normalisedRemote := strings.ToLower(norm.NFC.String(remote))
|
||||||
if _, ok := files[remote]; !ok {
|
if _, ok := files[remote]; !ok {
|
||||||
// Make sure we don't delete excluded files if not required
|
files[remote] = o
|
||||||
if includeAll || Config.Filter.IncludeObject(o) {
|
if _, ok := normalised[normalisedRemote]; ok {
|
||||||
files[remote] = o
|
Log(o, "Warning: File found with same name but different case on %v", o.Fs())
|
||||||
if _, ok := normalised[normalisedRemote]; ok {
|
|
||||||
Log(o, "Warning: File found with same name but different case on %v", o.Fs())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Debug(o, "Excluded from sync (and deletion)")
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Log(o, "Duplicate file detected")
|
Log(o, "Duplicate file detected")
|
||||||
}
|
}
|
||||||
normalised[normalisedRemote] = struct{}{}
|
normalised[normalisedRemote] = struct{}{}
|
||||||
}
|
})
|
||||||
return files, nil
|
return files, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// readFilesMaps runs readFilesMap on fdst and fsrc at the same time
|
// readFilesMaps runs readFilesMap on fdst and fsrc at the same time
|
||||||
|
@ -698,115 +711,99 @@ func (s *syncCopyMove) stopTransfers() {
|
||||||
// If DoMove is true then files will be moved instead of copied
|
// If DoMove is true then files will be moved instead of copied
|
||||||
//
|
//
|
||||||
// dir is the start directory, "" for root
|
// dir is the start directory, "" for root
|
||||||
func syncCopyMove(fdst, fsrc Fs, Delete bool, DoMove bool, dir string) error {
|
func (s *syncCopyMove) run() error {
|
||||||
if Same(fdst, fsrc) {
|
if Same(s.fdst, s.fsrc) {
|
||||||
ErrorLog(fdst, "Nothing to do as source and destination are the same")
|
ErrorLog(s.fdst, "Nothing to do as source and destination are the same")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
err := Mkdir(fdst)
|
err := Mkdir(s.fdst)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the files of both source and destination in parallel
|
// Start reading dstFiles if required
|
||||||
dstFiles, srcFiles, err := readFilesMaps(fdst, Config.Filter.DeleteExcluded, fsrc, false, dir)
|
if !s.noTraverse {
|
||||||
if err != nil {
|
go s.readDstFiles()
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
startDeletion := make(chan struct{}, 0)
|
// If s.deleteBefore then we need to read the whole source map first
|
||||||
|
if s.deleteBefore {
|
||||||
// Delete files if asked
|
// Read source files into the map
|
||||||
var delWg sync.WaitGroup
|
s.srcFiles, err = readFilesMap(s.fsrc, false, s.dir)
|
||||||
delWg.Add(1)
|
if err != nil {
|
||||||
go func() {
|
return err
|
||||||
if !Delete {
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
defer func() {
|
// Pump the map into s.srcFilesChan
|
||||||
Debug(fdst, "Deletion finished")
|
go s.readSrcUsingMap()
|
||||||
delWg.Done()
|
} else {
|
||||||
}()
|
go s.readSrcUsingChan()
|
||||||
|
}
|
||||||
|
|
||||||
_ = <-startDeletion
|
// Wait for dstfiles to finish reading if we were reading them
|
||||||
Debug(fdst, "Starting deletion")
|
// and report any errors
|
||||||
|
if !s.noTraverse {
|
||||||
if Stats.Errored() {
|
err = <-s.dstFilesResult
|
||||||
ErrorLog(fdst, "Not deleting files as there were IO errors")
|
if err != nil {
|
||||||
return
|
return err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Delete the spare files
|
// Delete files first if required
|
||||||
toDelete := make(ObjectsChan, Config.Transfers)
|
// Have dstFiles and srcFiles complete at this point
|
||||||
|
if s.deleteBefore {
|
||||||
|
err = s.deleteFiles(true)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
// Start background checking and transferring pipeline
|
||||||
for key, fs := range dstFiles {
|
s.startCheckers()
|
||||||
_, exists := srcFiles[key]
|
s.startTransfers()
|
||||||
if !exists {
|
|
||||||
toDelete <- fs
|
// Do the transfers
|
||||||
|
for src := range s.srcFilesChan {
|
||||||
|
remote := src.Remote()
|
||||||
|
var dst Object
|
||||||
|
if s.noTraverse {
|
||||||
|
var err error
|
||||||
|
dst, err = s.fdst.NewObject(remote)
|
||||||
|
if err != nil {
|
||||||
|
dst = nil
|
||||||
|
if err != ErrorObjectNotFound {
|
||||||
|
Debug(src, "Error making NewObject: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close(toDelete)
|
|
||||||
}()
|
|
||||||
DeleteFiles(toDelete)
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Start deleting, unless we must delete after transfer
|
|
||||||
if Delete && !Config.DeleteAfter {
|
|
||||||
close(startDeletion)
|
|
||||||
}
|
|
||||||
|
|
||||||
// If deletes must finish before starting transfers, we must wait now.
|
|
||||||
if Delete && Config.DeleteBefore {
|
|
||||||
Log(fdst, "Waiting for deletes to finish (before)")
|
|
||||||
delWg.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read source files checking them off against dest files
|
|
||||||
toBeChecked := make(ObjectPairChan, Config.Transfers)
|
|
||||||
toBeUploaded := make(ObjectPairChan, Config.Transfers)
|
|
||||||
|
|
||||||
var checkerWg sync.WaitGroup
|
|
||||||
checkerWg.Add(Config.Checkers)
|
|
||||||
for i := 0; i < Config.Checkers; i++ {
|
|
||||||
go PairChecker(toBeChecked, toBeUploaded, &checkerWg)
|
|
||||||
}
|
|
||||||
|
|
||||||
var copierWg sync.WaitGroup
|
|
||||||
copierWg.Add(Config.Transfers)
|
|
||||||
for i := 0; i < Config.Transfers; i++ {
|
|
||||||
if DoMove {
|
|
||||||
go PairMover(toBeUploaded, fdst, &copierWg)
|
|
||||||
} else {
|
} else {
|
||||||
go PairCopier(toBeUploaded, fdst, &copierWg)
|
dst = s.dstFiles[remote]
|
||||||
|
// Remove file from s.dstFiles because it exists in srcFiles
|
||||||
|
delete(s.dstFiles, remote)
|
||||||
}
|
}
|
||||||
}
|
if dst != nil {
|
||||||
|
s.toBeChecked <- ObjectPair{src, dst}
|
||||||
for remote, src := range srcFiles {
|
|
||||||
if dst, dstFound := dstFiles[remote]; dstFound {
|
|
||||||
toBeChecked <- ObjectPair{src, dst}
|
|
||||||
} else {
|
} else {
|
||||||
// No need to check since doesn't exist
|
// No need to check since doesn't exist
|
||||||
toBeUploaded <- ObjectPair{src, nil}
|
s.toBeUploaded <- ObjectPair{src, nil}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close(toBeChecked)
|
|
||||||
|
|
||||||
Log(fdst, "Waiting for checks to finish")
|
// Stop background checking and transferring pipeline
|
||||||
checkerWg.Wait()
|
s.stopCheckers()
|
||||||
close(toBeUploaded)
|
s.stopTransfers()
|
||||||
Log(fdst, "Waiting for transfers to finish")
|
|
||||||
copierWg.Wait()
|
|
||||||
|
|
||||||
// If deleting after, start deletion now
|
// Retrieve the delayed error from the source listing goroutine
|
||||||
if Delete && Config.DeleteAfter {
|
err = <-s.srcFilesResult
|
||||||
close(startDeletion)
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
// Unless we have already waited, wait for deletion to finish.
|
|
||||||
if Delete && !Config.DeleteBefore {
|
// Delete files during or after
|
||||||
Log(fdst, "Waiting for deletes to finish (during+after)")
|
if s.Delete && (Config.DeleteDuring || Config.DeleteAfter) {
|
||||||
delWg.Wait()
|
err = s.deleteFiles(false)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -814,12 +811,17 @@ func syncCopyMove(fdst, fsrc Fs, Delete bool, DoMove bool, dir string) error {
|
||||||
|
|
||||||
// Sync fsrc into fdst
|
// Sync fsrc into fdst
|
||||||
func Sync(fdst, fsrc Fs) error {
|
func Sync(fdst, fsrc Fs) error {
|
||||||
return syncCopyMove(fdst, fsrc, true, false, "")
|
return newSyncCopyMove(fdst, fsrc, true, false).run()
|
||||||
}
|
}
|
||||||
|
|
||||||
// CopyDir copies fsrc into fdst
|
// CopyDir copies fsrc into fdst
|
||||||
func CopyDir(fdst, fsrc Fs) error {
|
func CopyDir(fdst, fsrc Fs) error {
|
||||||
return syncCopyMove(fdst, fsrc, false, false, "")
|
return newSyncCopyMove(fdst, fsrc, false, false).run()
|
||||||
|
}
|
||||||
|
|
||||||
|
// moveDir moves fsrc into fdst
|
||||||
|
func moveDir(fdst, fsrc Fs) error {
|
||||||
|
return newSyncCopyMove(fdst, fsrc, false, true).run()
|
||||||
}
|
}
|
||||||
|
|
||||||
// MoveDir moves fsrc into fdst
|
// MoveDir moves fsrc into fdst
|
||||||
|
@ -847,7 +849,7 @@ func MoveDir(fdst, fsrc Fs) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now move the files
|
// Now move the files
|
||||||
err := syncCopyMove(fdst, fsrc, false, true, "")
|
err := moveDir(fdst, fsrc)
|
||||||
if err != nil || Stats.Errored() {
|
if err != nil || Stats.Errored() {
|
||||||
ErrorLog(fdst, "Not deleting files as there were IO errors")
|
ErrorLog(fdst, "Not deleting files as there were IO errors")
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -35,6 +35,7 @@ import (
|
||||||
"github.com/ncw/rclone/fs"
|
"github.com/ncw/rclone/fs"
|
||||||
_ "github.com/ncw/rclone/fs/all" // import all fs
|
_ "github.com/ncw/rclone/fs/all" // import all fs
|
||||||
"github.com/ncw/rclone/fstest"
|
"github.com/ncw/rclone/fstest"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Globals
|
// Globals
|
||||||
|
@ -293,6 +294,40 @@ func TestCopy(t *testing.T) {
|
||||||
fstest.CheckItems(t, r.fremote, file1)
|
fstest.CheckItems(t, r.fremote, file1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Now with --no-traverse
|
||||||
|
func TestCopyNoTraverse(t *testing.T) {
|
||||||
|
r := NewRun(t)
|
||||||
|
defer r.Finalise()
|
||||||
|
|
||||||
|
fs.Config.NoTraverse = true
|
||||||
|
defer func() { fs.Config.NoTraverse = false }()
|
||||||
|
|
||||||
|
file1 := r.WriteFile("sub dir/hello world", "hello world", t1)
|
||||||
|
|
||||||
|
err := fs.CopyDir(r.fremote, r.flocal)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
fstest.CheckItems(t, r.flocal, file1)
|
||||||
|
fstest.CheckItems(t, r.fremote, file1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now with --no-traverse
|
||||||
|
func TestSyncNoTraverse(t *testing.T) {
|
||||||
|
r := NewRun(t)
|
||||||
|
defer r.Finalise()
|
||||||
|
|
||||||
|
fs.Config.NoTraverse = true
|
||||||
|
defer func() { fs.Config.NoTraverse = false }()
|
||||||
|
|
||||||
|
file1 := r.WriteFile("sub dir/hello world", "hello world", t1)
|
||||||
|
|
||||||
|
err := fs.Sync(r.fremote, r.flocal)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
fstest.CheckItems(t, r.flocal, file1)
|
||||||
|
fstest.CheckItems(t, r.fremote, file1)
|
||||||
|
}
|
||||||
|
|
||||||
// Test copy with depth
|
// Test copy with depth
|
||||||
func TestCopyWithDepth(t *testing.T) {
|
func TestCopyWithDepth(t *testing.T) {
|
||||||
r := NewRun(t)
|
r := NewRun(t)
|
||||||
|
@ -712,8 +747,8 @@ func TestSyncAfterRemovingAFileAndAddingAFileWithErrors(t *testing.T) {
|
||||||
fs.Stats.ResetCounters()
|
fs.Stats.ResetCounters()
|
||||||
fs.Stats.Error()
|
fs.Stats.Error()
|
||||||
err := fs.Sync(r.fremote, r.flocal)
|
err := fs.Sync(r.fremote, r.flocal)
|
||||||
if err != nil {
|
if err != fs.ErrorNotDeleting {
|
||||||
t.Fatalf("Sync failed: %v", err)
|
t.Fatalf("Unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
fstest.CheckItems(t, r.flocal, file1, file3)
|
fstest.CheckItems(t, r.flocal, file1, file3)
|
||||||
fstest.CheckItems(t, r.fremote, file1, file2, file3)
|
fstest.CheckItems(t, r.fremote, file1, file2, file3)
|
||||||
|
|
Loading…
Reference in a new issue