From e22b445cff0b2940f93708ccb62df466e3cb6731 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sat, 6 Jun 2015 08:38:45 +0100 Subject: [PATCH] Implement --size-only flag to sync on size not checksum & modtime - fixes #75 --- README.md | 3 +- docs/content/docs.md | 1 + docs/content/dropbox.md | 7 ++++ fs/accounting.go | 10 +++++ fs/config.go | 5 ++- fs/operations.go | 17 ++++++--- fs/operations_test.go | 84 ++++++++++++++++++++++++++++++++++------- 7 files changed, 105 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index c7eaa1fd7..e6329b3a9 100644 --- a/README.md +++ b/README.md @@ -136,13 +136,14 @@ General options: ``` --bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G --checkers=8: Number of checkers to run in parallel. - -c, --checksum=false: Skip based on checksum, not mod-time & size + -c, --checksum=false: Skip based on checksum & size, not mod-time & size --config="~/.rclone.conf": Config file. --contimeout=1m0s: Connect timeout -n, --dry-run=false: Do a trial run with no permanent changes --log-file="": Log everything to this file --modify-window=1ns: Max time diff to be considered the same -q, --quiet=false: Print as little stuff as possible + --size-only=false: Skip based on size only, not mod-time or checksum --stats=1m0s: Interval to print stats (0 to disable) --timeout=5m0s: IO idle timeout --transfers=4: Number of file transfers to run in parallel. diff --git a/docs/content/docs.md b/docs/content/docs.md index 3d358d6ca..729228e33 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -124,6 +124,7 @@ This help. --log-file="": Log everything to this file --modify-window=1ns: Max time diff to be considered the same -q, --quiet=false: Print as little stuff as possible + --size-only=false: Skip based on size only, not mod-time or checksum --stats=1m0s: Interval to print stats (0 to disable) --timeout=5m0s: IO idle timeout --transfers=4: Number of file transfers to run in parallel. diff --git a/docs/content/dropbox.md b/docs/content/dropbox.md index 976d1b40c..b7016ee0f 100644 --- a/docs/content/dropbox.md +++ b/docs/content/dropbox.md @@ -84,3 +84,10 @@ Dropbox datastores are limited to 100,000 rows so this is the maximum number of files rclone can manage on Dropbox. Dropbox is case sensitive which can sometimes cause duplicated files. + +If you use the desktop sync tool and rclone on the same files then the +md5sums and modification times may get out of sync as far as rclone is +concerned. This will cause `Corrupted on transfer: md5sums differ` +error message when fetching files. You can work around this by using +the `--size-only` flag to ignore the md5sums and modification times +for these files. diff --git a/fs/accounting.go b/fs/accounting.go index 772864717..8dc674b2f 100644 --- a/fs/accounting.go +++ b/fs/accounting.go @@ -124,6 +124,16 @@ func (s *StatsInfo) GetErrors() int64 { return s.errors } +// ResetCounters sets the counters (bytes, checks, errors, transfers) to 0 +func (s *StatsInfo) ResetCounters() { + s.lock.RLock() + defer s.lock.RUnlock() + s.bytes = 0 + s.errors = 0 + s.checks = 0 + s.transfers = 0 +} + // Errored returns whether there have been any errors func (s *StatsInfo) Errored() bool { s.lock.RLock() diff --git a/fs/config.go b/fs/config.go index 8acc89417..edf768799 100644 --- a/fs/config.go +++ b/fs/config.go @@ -44,7 +44,8 @@ var ( checkers = pflag.IntP("checkers", "", 8, "Number of checkers to run in parallel.") transfers = pflag.IntP("transfers", "", 4, "Number of file transfers to run in parallel.") configFile = pflag.StringP("config", "", ConfigPath, "Config file.") - checkSum = pflag.BoolP("checksum", "c", false, "Skip based on checksum, not mod-time & size") + checkSum = pflag.BoolP("checksum", "c", false, "Skip based on checksum & size, not mod-time & size") + sizeOnly = pflag.BoolP("size-only", "", false, "Skip based on size only, not mod-time or checksum") dryRun = pflag.BoolP("dry-run", "n", false, "Do a trial run with no permanent changes") connectTimeout = pflag.DurationP("contimeout", "", 60*time.Second, "Connect timeout") timeout = pflag.DurationP("timeout", "", 5*60*time.Second, "IO idle timeout") @@ -121,6 +122,7 @@ type ConfigInfo struct { Quiet bool DryRun bool CheckSum bool + SizeOnly bool ModifyWindow time.Duration Checkers int Transfers int @@ -197,6 +199,7 @@ func LoadConfig() { Config.Timeout = *timeout Config.ConnectTimeout = *connectTimeout Config.CheckSum = *checkSum + Config.SizeOnly = *sizeOnly ConfigPath = *configFile diff --git a/fs/operations.go b/fs/operations.go index 456a32dc4..56e2ae870 100644 --- a/fs/operations.go +++ b/fs/operations.go @@ -54,15 +54,16 @@ func CheckMd5sums(src, dst Object) (bool, error) { // size, mtime and MD5SUM // // If the src and dst size are different then it is considered to be -// not equal. +// not equal. If --size-only is in effect then this is the only check +// that is done. // // If the size is the same and the mtime is the same then it is -// considered to be equal. This is the heuristic rsync uses when -// not using --checksum. +// considered to be equal. This check is skipped if using --checksum. // -// If the size is the same and and mtime is different or unreadable -// and the MD5SUM is the same then the file is considered to be equal. -// In this case the mtime on the dst is updated. +// If the size is the same and mtime is different, unreadable or +// --checksum is set and the MD5SUM is the same then the file is +// considered to be equal. In this case the mtime on the dst is +// updated if --checksum is not set. // // Otherwise the file is considered to be not equal including if there // were errors reading info. @@ -71,6 +72,10 @@ func Equal(src, dst Object) bool { Debug(src, "Sizes differ") return false } + if Config.SizeOnly { + Debug(src, "Sizes identical") + return true + } var srcModTime time.Time if !Config.CheckSum { diff --git a/fs/operations_test.go b/fs/operations_test.go index 256f734cb..a2c702481 100644 --- a/fs/operations_test.go +++ b/fs/operations_test.go @@ -175,44 +175,100 @@ func TestCopyRedownload(t *testing.T) { func TestSyncBasedOnCheckSum(t *testing.T) { cleanTempDir(t) fs.Config.CheckSum = true + defer func() { fs.Config.CheckSum = false }() WriteFile("check sum", "", t1) + local_items := []fstest.Item{ + {Path: "check sum", Size: 0, ModTime: t1, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, + } + fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) - transfers_before := fs.Stats.GetTransfers() + fs.Stats.ResetCounters() err := fs.Sync(fremote, flocal, true) if err != nil { t.Fatalf("Initial sync failed: %v", err) } - transfers_after := fs.Stats.GetTransfers() - //We should have transferred exactly one file. - if transfers_after-1 != transfers_before { - t.Fatalf("Initial sync didn't do what we wanted.") + // We should have transferred exactly one file. + if fs.Stats.GetTransfers() != 1 { + t.Fatalf("Sync 1: want 1 transfer, got %d", fs.Stats.GetTransfers()) } + remote_items := local_items + fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow) + err = os.Chtimes(localName+"/check sum", t2, t2) if err != nil { t.Fatalf("Chtimes failed: %v", err) } + local_items = []fstest.Item{ + {Path: "check sum", Size: 0, ModTime: t2, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, + } + fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) - transfers_before = fs.Stats.GetTransfers() + fs.Stats.ResetCounters() err = fs.Sync(fremote, flocal, true) if err != nil { t.Fatalf("Sync failed: %v", err) } - transfers_after = fs.Stats.GetTransfers() - //We should have transferred no files - if transfers_after != transfers_before { - t.Fatalf("We synced, though we shouldn't have.") + // We should have transferred no files + if fs.Stats.GetTransfers() != 0 { + t.Fatalf("Sync 2: want 0 transfers, got %d", fs.Stats.GetTransfers()) } - remote_items := []fstest.Item{ - {Path: "check sum", Size: 0, ModTime: t1, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, - } + fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) + fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow) + + cleanTempDir(t) +} + +// Create a file and sync it. Change the last modified date and the +// file contents but not the size. If we're only doing sync by size +// only, we expect nothing to to be transferred on the second sync. +func TestSyncSizeOnly(t *testing.T) { + cleanTempDir(t) + fs.Config.SizeOnly = true + defer func() { fs.Config.SizeOnly = false }() + + WriteFile("sizeonly", "potato", t1) local_items := []fstest.Item{ - {Path: "check sum", Size: 0, ModTime: t2, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, + {Path: "sizeonly", Size: 6, ModTime: t1, Md5sum: "8ee2027983915ec78acc45027d874316"}, } + fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) + + fs.Stats.ResetCounters() + err := fs.Sync(fremote, flocal, true) + if err != nil { + t.Fatalf("Initial sync failed: %v", err) + } + + // We should have transferred exactly one file. + if fs.Stats.GetTransfers() != 1 { + t.Fatalf("Sync 1: want 1 transfer, got %d", fs.Stats.GetTransfers()) + } + + remote_items := local_items + fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow) + + // Update mtime, md5sum but not length of file + WriteFile("sizeonly", "POTATO", t2) + local_items = []fstest.Item{ + {Path: "sizeonly", Size: 6, ModTime: t2, Md5sum: "8ac6f27a282e4938125482607ccfb55f"}, + } + fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) + + fs.Stats.ResetCounters() + err = fs.Sync(fremote, flocal, true) + if err != nil { + t.Fatalf("Sync failed: %v", err) + } + + // We should have transferred no files + if fs.Stats.GetTransfers() != 0 { + t.Fatalf("Sync 2: want 0 transfers, got %d", fs.Stats.GetTransfers()) + } + fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow)