From 7af128237542d4f9a1339139a12c421a729a2964 Mon Sep 17 00:00:00 2001 From: Alex Couper Date: Wed, 3 Jun 2015 14:08:27 +0000 Subject: [PATCH] Add --checksum flag to only discard transfers by MD5SUM - fixes #61 Useful for copying between backends where checksum fetching is fast, ie any of s3, swift, drive or googlecloudstorage. --- README.md | 5 +++-- docs/content/docs.md | 5 +++-- fs/accounting.go | 7 ++++++ fs/config.go | 3 +++ fs/operations.go | 32 ++++++++++++++++----------- fs/operations_test.go | 50 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 85 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 01c9c4c5f..613d34409 100644 --- a/README.md +++ b/README.md @@ -123,11 +123,11 @@ Checks the files in the source and destination match. It compares sizes and MD5SUMs and prints a report of files which don't match. It doesn't alter the source or destination. - rclone config + rclone config Enter an interactive configuration session. - rclone help + rclone help This help. @@ -136,6 +136,7 @@ General options: ``` --bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G --checkers=8: Number of checkers to run in parallel. + -c, --checksum=false: Skip based on checksum, not mod-time & size --config="~/.rclone.conf": Config file. --contimeout=1m0s: Connect timeout -n, --dry-run=false: Do a trial run with no permanent changes diff --git a/docs/content/docs.md b/docs/content/docs.md index 8d2cca5f4..d9d670e54 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -106,17 +106,18 @@ Checks the files in the source and destination match. It compares sizes and MD5SUMs and prints a report of files which don't match. It doesn't alter the source or destination. - rclone config + rclone config Enter an interactive configuration session. - rclone help + rclone help This help. ``` --bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G --checkers=8: Number of checkers to run in parallel. + -c, --checksum=false: Skip based on checksum, not mod-time & size --config="~/.rclone.conf": Config file. --contimeout=1m0s: Connect timeout -n, --dry-run=false: Do a trial run with no permanent changes diff --git a/fs/accounting.go b/fs/accounting.go index 93f38c613..772864717 100644 --- a/fs/accounting.go +++ b/fs/accounting.go @@ -153,6 +153,13 @@ func (s *StatsInfo) DoneChecking(o Object) { s.checks += 1 } +// GetTransfers reads the number of transfers +func (s *StatsInfo) GetTransfers() int64 { + s.lock.RLock() + defer s.lock.RUnlock() + return s.transfers +} + // Transferring adds a transfer into the stats func (s *StatsInfo) Transferring(o Object) { s.lock.Lock() diff --git a/fs/config.go b/fs/config.go index 38a4cb6ad..8acc89417 100644 --- a/fs/config.go +++ b/fs/config.go @@ -44,6 +44,7 @@ var ( checkers = pflag.IntP("checkers", "", 8, "Number of checkers to run in parallel.") transfers = pflag.IntP("transfers", "", 4, "Number of file transfers to run in parallel.") configFile = pflag.StringP("config", "", ConfigPath, "Config file.") + checkSum = pflag.BoolP("checksum", "c", false, "Skip based on checksum, not mod-time & size") dryRun = pflag.BoolP("dry-run", "n", false, "Do a trial run with no permanent changes") connectTimeout = pflag.DurationP("contimeout", "", 60*time.Second, "Connect timeout") timeout = pflag.DurationP("timeout", "", 5*60*time.Second, "IO idle timeout") @@ -119,6 +120,7 @@ type ConfigInfo struct { Verbose bool Quiet bool DryRun bool + CheckSum bool ModifyWindow time.Duration Checkers int Transfers int @@ -194,6 +196,7 @@ func LoadConfig() { Config.DryRun = *dryRun Config.Timeout = *timeout Config.ConnectTimeout = *connectTimeout + Config.CheckSum = *checkSum ConfigPath = *configFile diff --git a/fs/operations.go b/fs/operations.go index 768328255..456a32dc4 100644 --- a/fs/operations.go +++ b/fs/operations.go @@ -8,6 +8,7 @@ import ( "mime" "path" "sync" + "time" ) // Work out modify window for fses passed in - sets Config.ModifyWindow @@ -71,16 +72,19 @@ func Equal(src, dst Object) bool { return false } - // Size the same so check the mtime - srcModTime := src.ModTime() - dstModTime := dst.ModTime() - dt := dstModTime.Sub(srcModTime) - ModifyWindow := Config.ModifyWindow - if dt >= ModifyWindow || dt <= -ModifyWindow { - Debug(src, "Modification times differ by %s: %v, %v", dt, srcModTime, dstModTime) - } else { - Debug(src, "Size and modification time the same (differ by %s, within tolerance %s)", dt, ModifyWindow) - return true + var srcModTime time.Time + if !Config.CheckSum { + // Size the same so check the mtime + srcModTime = src.ModTime() + dstModTime := dst.ModTime() + dt := dstModTime.Sub(srcModTime) + ModifyWindow := Config.ModifyWindow + if dt >= ModifyWindow || dt <= -ModifyWindow { + Debug(src, "Modification times differ by %s: %v, %v", dt, srcModTime, dstModTime) + } else { + Debug(src, "Size and modification time the same (differ by %s, within tolerance %s)", dt, ModifyWindow) + return true + } } // mtime is unreadable or different but size is the same so @@ -91,9 +95,11 @@ func Equal(src, dst Object) bool { return false } - // Size and MD5 the same but mtime different so update the - // mtime of the dst object here - dst.SetModTime(srcModTime) + if !Config.CheckSum { + // Size and MD5 the same but mtime different so update the + // mtime of the dst object here + dst.SetModTime(srcModTime) + } Debug(src, "Size and MD5SUM of src and dst objects identical") return true diff --git a/fs/operations_test.go b/fs/operations_test.go index 99d7776d9..256f734cb 100644 --- a/fs/operations_test.go +++ b/fs/operations_test.go @@ -169,6 +169,56 @@ func TestCopyRedownload(t *testing.T) { cleanTempDir(t) } +// Create a file and sync it. Change the last modified date and resync. +// If we're only doing sync by size and checksum, we expect nothing to +// to be transferred on the second sync. +func TestSyncBasedOnCheckSum(t *testing.T) { + cleanTempDir(t) + fs.Config.CheckSum = true + + WriteFile("check sum", "", t1) + + transfers_before := fs.Stats.GetTransfers() + err := fs.Sync(fremote, flocal, true) + if err != nil { + t.Fatalf("Initial sync failed: %v", err) + } + transfers_after := fs.Stats.GetTransfers() + + //We should have transferred exactly one file. + if transfers_after-1 != transfers_before { + t.Fatalf("Initial sync didn't do what we wanted.") + } + + err = os.Chtimes(localName+"/check sum", t2, t2) + if err != nil { + t.Fatalf("Chtimes failed: %v", err) + } + + transfers_before = fs.Stats.GetTransfers() + err = fs.Sync(fremote, flocal, true) + if err != nil { + t.Fatalf("Sync failed: %v", err) + } + transfers_after = fs.Stats.GetTransfers() + + //We should have transferred no files + if transfers_after != transfers_before { + t.Fatalf("We synced, though we shouldn't have.") + } + + remote_items := []fstest.Item{ + {Path: "check sum", Size: 0, ModTime: t1, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, + } + local_items := []fstest.Item{ + {Path: "check sum", Size: 0, ModTime: t2, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, + } + fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) + fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow) + + cleanTempDir(t) +} + func TestSyncAfterChangingModtimeOnly(t *testing.T) { WriteFile("empty space", "", t1)