Add --checksum flag to only discard transfers by MD5SUM - fixes #61

Useful for copying between backends where checksum fetching is fast,
ie any of s3, swift, drive or googlecloudstorage.
This commit is contained in:
Alex Couper 2015-06-03 14:08:27 +00:00 committed by Nick Craig-Wood
parent d9fcc32f70
commit 7af1282375
6 changed files with 85 additions and 17 deletions

View file

@ -123,11 +123,11 @@ Checks the files in the source and destination match. It
compares sizes and MD5SUMs and prints a report of files which
don't match. It doesn't alter the source or destination.
rclone config
rclone config
Enter an interactive configuration session.
rclone help
rclone help
This help.
@ -136,6 +136,7 @@ General options:
```
--bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G
--checkers=8: Number of checkers to run in parallel.
-c, --checksum=false: Skip based on checksum, not mod-time & size
--config="~/.rclone.conf": Config file.
--contimeout=1m0s: Connect timeout
-n, --dry-run=false: Do a trial run with no permanent changes

View file

@ -106,17 +106,18 @@ Checks the files in the source and destination match. It
compares sizes and MD5SUMs and prints a report of files which
don't match. It doesn't alter the source or destination.
rclone config
rclone config
Enter an interactive configuration session.
rclone help
rclone help
This help.
```
--bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G
--checkers=8: Number of checkers to run in parallel.
-c, --checksum=false: Skip based on checksum, not mod-time & size
--config="~/.rclone.conf": Config file.
--contimeout=1m0s: Connect timeout
-n, --dry-run=false: Do a trial run with no permanent changes

View file

@ -153,6 +153,13 @@ func (s *StatsInfo) DoneChecking(o Object) {
s.checks += 1
}
// GetTransfers reads the number of transfers
func (s *StatsInfo) GetTransfers() int64 {
s.lock.RLock()
defer s.lock.RUnlock()
return s.transfers
}
// Transferring adds a transfer into the stats
func (s *StatsInfo) Transferring(o Object) {
s.lock.Lock()

View file

@ -44,6 +44,7 @@ var (
checkers = pflag.IntP("checkers", "", 8, "Number of checkers to run in parallel.")
transfers = pflag.IntP("transfers", "", 4, "Number of file transfers to run in parallel.")
configFile = pflag.StringP("config", "", ConfigPath, "Config file.")
checkSum = pflag.BoolP("checksum", "c", false, "Skip based on checksum, not mod-time & size")
dryRun = pflag.BoolP("dry-run", "n", false, "Do a trial run with no permanent changes")
connectTimeout = pflag.DurationP("contimeout", "", 60*time.Second, "Connect timeout")
timeout = pflag.DurationP("timeout", "", 5*60*time.Second, "IO idle timeout")
@ -119,6 +120,7 @@ type ConfigInfo struct {
Verbose bool
Quiet bool
DryRun bool
CheckSum bool
ModifyWindow time.Duration
Checkers int
Transfers int
@ -194,6 +196,7 @@ func LoadConfig() {
Config.DryRun = *dryRun
Config.Timeout = *timeout
Config.ConnectTimeout = *connectTimeout
Config.CheckSum = *checkSum
ConfigPath = *configFile

View file

@ -8,6 +8,7 @@ import (
"mime"
"path"
"sync"
"time"
)
// Work out modify window for fses passed in - sets Config.ModifyWindow
@ -71,16 +72,19 @@ func Equal(src, dst Object) bool {
return false
}
// Size the same so check the mtime
srcModTime := src.ModTime()
dstModTime := dst.ModTime()
dt := dstModTime.Sub(srcModTime)
ModifyWindow := Config.ModifyWindow
if dt >= ModifyWindow || dt <= -ModifyWindow {
Debug(src, "Modification times differ by %s: %v, %v", dt, srcModTime, dstModTime)
} else {
Debug(src, "Size and modification time the same (differ by %s, within tolerance %s)", dt, ModifyWindow)
return true
var srcModTime time.Time
if !Config.CheckSum {
// Size the same so check the mtime
srcModTime = src.ModTime()
dstModTime := dst.ModTime()
dt := dstModTime.Sub(srcModTime)
ModifyWindow := Config.ModifyWindow
if dt >= ModifyWindow || dt <= -ModifyWindow {
Debug(src, "Modification times differ by %s: %v, %v", dt, srcModTime, dstModTime)
} else {
Debug(src, "Size and modification time the same (differ by %s, within tolerance %s)", dt, ModifyWindow)
return true
}
}
// mtime is unreadable or different but size is the same so
@ -91,9 +95,11 @@ func Equal(src, dst Object) bool {
return false
}
// Size and MD5 the same but mtime different so update the
// mtime of the dst object here
dst.SetModTime(srcModTime)
if !Config.CheckSum {
// Size and MD5 the same but mtime different so update the
// mtime of the dst object here
dst.SetModTime(srcModTime)
}
Debug(src, "Size and MD5SUM of src and dst objects identical")
return true

View file

@ -169,6 +169,56 @@ func TestCopyRedownload(t *testing.T) {
cleanTempDir(t)
}
// Create a file and sync it. Change the last modified date and resync.
// If we're only doing sync by size and checksum, we expect nothing to
// to be transferred on the second sync.
func TestSyncBasedOnCheckSum(t *testing.T) {
cleanTempDir(t)
fs.Config.CheckSum = true
WriteFile("check sum", "", t1)
transfers_before := fs.Stats.GetTransfers()
err := fs.Sync(fremote, flocal, true)
if err != nil {
t.Fatalf("Initial sync failed: %v", err)
}
transfers_after := fs.Stats.GetTransfers()
//We should have transferred exactly one file.
if transfers_after-1 != transfers_before {
t.Fatalf("Initial sync didn't do what we wanted.")
}
err = os.Chtimes(localName+"/check sum", t2, t2)
if err != nil {
t.Fatalf("Chtimes failed: %v", err)
}
transfers_before = fs.Stats.GetTransfers()
err = fs.Sync(fremote, flocal, true)
if err != nil {
t.Fatalf("Sync failed: %v", err)
}
transfers_after = fs.Stats.GetTransfers()
//We should have transferred no files
if transfers_after != transfers_before {
t.Fatalf("We synced, though we shouldn't have.")
}
remote_items := []fstest.Item{
{Path: "check sum", Size: 0, ModTime: t1, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"},
}
local_items := []fstest.Item{
{Path: "check sum", Size: 0, ModTime: t2, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"},
}
fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow)
cleanTempDir(t)
}
func TestSyncAfterChangingModtimeOnly(t *testing.T) {
WriteFile("empty space", "", t1)