Implement --size-only flag to sync on size not checksum & modtime - fixes #75

This commit is contained in:
Nick Craig-Wood 2015-06-06 08:38:45 +01:00
parent 5ab7970e18
commit e22b445cff
7 changed files with 105 additions and 22 deletions

View file

@ -136,13 +136,14 @@ General options:
``` ```
--bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G --bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G
--checkers=8: Number of checkers to run in parallel. --checkers=8: Number of checkers to run in parallel.
-c, --checksum=false: Skip based on checksum, not mod-time & size -c, --checksum=false: Skip based on checksum & size, not mod-time & size
--config="~/.rclone.conf": Config file. --config="~/.rclone.conf": Config file.
--contimeout=1m0s: Connect timeout --contimeout=1m0s: Connect timeout
-n, --dry-run=false: Do a trial run with no permanent changes -n, --dry-run=false: Do a trial run with no permanent changes
--log-file="": Log everything to this file --log-file="": Log everything to this file
--modify-window=1ns: Max time diff to be considered the same --modify-window=1ns: Max time diff to be considered the same
-q, --quiet=false: Print as little stuff as possible -q, --quiet=false: Print as little stuff as possible
--size-only=false: Skip based on size only, not mod-time or checksum
--stats=1m0s: Interval to print stats (0 to disable) --stats=1m0s: Interval to print stats (0 to disable)
--timeout=5m0s: IO idle timeout --timeout=5m0s: IO idle timeout
--transfers=4: Number of file transfers to run in parallel. --transfers=4: Number of file transfers to run in parallel.

View file

@ -124,6 +124,7 @@ This help.
--log-file="": Log everything to this file --log-file="": Log everything to this file
--modify-window=1ns: Max time diff to be considered the same --modify-window=1ns: Max time diff to be considered the same
-q, --quiet=false: Print as little stuff as possible -q, --quiet=false: Print as little stuff as possible
--size-only=false: Skip based on size only, not mod-time or checksum
--stats=1m0s: Interval to print stats (0 to disable) --stats=1m0s: Interval to print stats (0 to disable)
--timeout=5m0s: IO idle timeout --timeout=5m0s: IO idle timeout
--transfers=4: Number of file transfers to run in parallel. --transfers=4: Number of file transfers to run in parallel.

View file

@ -84,3 +84,10 @@ Dropbox datastores are limited to 100,000 rows so this is the maximum
number of files rclone can manage on Dropbox. number of files rclone can manage on Dropbox.
Dropbox is case sensitive which can sometimes cause duplicated files. Dropbox is case sensitive which can sometimes cause duplicated files.
If you use the desktop sync tool and rclone on the same files then the
md5sums and modification times may get out of sync as far as rclone is
concerned. This will cause `Corrupted on transfer: md5sums differ`
error message when fetching files. You can work around this by using
the `--size-only` flag to ignore the md5sums and modification times
for these files.

View file

@ -124,6 +124,16 @@ func (s *StatsInfo) GetErrors() int64 {
return s.errors return s.errors
} }
// ResetCounters sets the counters (bytes, checks, errors, transfers) to 0
func (s *StatsInfo) ResetCounters() {
s.lock.RLock()
defer s.lock.RUnlock()
s.bytes = 0
s.errors = 0
s.checks = 0
s.transfers = 0
}
// Errored returns whether there have been any errors // Errored returns whether there have been any errors
func (s *StatsInfo) Errored() bool { func (s *StatsInfo) Errored() bool {
s.lock.RLock() s.lock.RLock()

View file

@ -44,7 +44,8 @@ var (
checkers = pflag.IntP("checkers", "", 8, "Number of checkers to run in parallel.") checkers = pflag.IntP("checkers", "", 8, "Number of checkers to run in parallel.")
transfers = pflag.IntP("transfers", "", 4, "Number of file transfers to run in parallel.") transfers = pflag.IntP("transfers", "", 4, "Number of file transfers to run in parallel.")
configFile = pflag.StringP("config", "", ConfigPath, "Config file.") configFile = pflag.StringP("config", "", ConfigPath, "Config file.")
checkSum = pflag.BoolP("checksum", "c", false, "Skip based on checksum, not mod-time & size") checkSum = pflag.BoolP("checksum", "c", false, "Skip based on checksum & size, not mod-time & size")
sizeOnly = pflag.BoolP("size-only", "", false, "Skip based on size only, not mod-time or checksum")
dryRun = pflag.BoolP("dry-run", "n", false, "Do a trial run with no permanent changes") dryRun = pflag.BoolP("dry-run", "n", false, "Do a trial run with no permanent changes")
connectTimeout = pflag.DurationP("contimeout", "", 60*time.Second, "Connect timeout") connectTimeout = pflag.DurationP("contimeout", "", 60*time.Second, "Connect timeout")
timeout = pflag.DurationP("timeout", "", 5*60*time.Second, "IO idle timeout") timeout = pflag.DurationP("timeout", "", 5*60*time.Second, "IO idle timeout")
@ -121,6 +122,7 @@ type ConfigInfo struct {
Quiet bool Quiet bool
DryRun bool DryRun bool
CheckSum bool CheckSum bool
SizeOnly bool
ModifyWindow time.Duration ModifyWindow time.Duration
Checkers int Checkers int
Transfers int Transfers int
@ -197,6 +199,7 @@ func LoadConfig() {
Config.Timeout = *timeout Config.Timeout = *timeout
Config.ConnectTimeout = *connectTimeout Config.ConnectTimeout = *connectTimeout
Config.CheckSum = *checkSum Config.CheckSum = *checkSum
Config.SizeOnly = *sizeOnly
ConfigPath = *configFile ConfigPath = *configFile

View file

@ -54,15 +54,16 @@ func CheckMd5sums(src, dst Object) (bool, error) {
// size, mtime and MD5SUM // size, mtime and MD5SUM
// //
// If the src and dst size are different then it is considered to be // If the src and dst size are different then it is considered to be
// not equal. // not equal. If --size-only is in effect then this is the only check
// that is done.
// //
// If the size is the same and the mtime is the same then it is // If the size is the same and the mtime is the same then it is
// considered to be equal. This is the heuristic rsync uses when // considered to be equal. This check is skipped if using --checksum.
// not using --checksum.
// //
// If the size is the same and and mtime is different or unreadable // If the size is the same and mtime is different, unreadable or
// and the MD5SUM is the same then the file is considered to be equal. // --checksum is set and the MD5SUM is the same then the file is
// In this case the mtime on the dst is updated. // considered to be equal. In this case the mtime on the dst is
// updated if --checksum is not set.
// //
// Otherwise the file is considered to be not equal including if there // Otherwise the file is considered to be not equal including if there
// were errors reading info. // were errors reading info.
@ -71,6 +72,10 @@ func Equal(src, dst Object) bool {
Debug(src, "Sizes differ") Debug(src, "Sizes differ")
return false return false
} }
if Config.SizeOnly {
Debug(src, "Sizes identical")
return true
}
var srcModTime time.Time var srcModTime time.Time
if !Config.CheckSum { if !Config.CheckSum {

View file

@ -175,44 +175,100 @@ func TestCopyRedownload(t *testing.T) {
func TestSyncBasedOnCheckSum(t *testing.T) { func TestSyncBasedOnCheckSum(t *testing.T) {
cleanTempDir(t) cleanTempDir(t)
fs.Config.CheckSum = true fs.Config.CheckSum = true
defer func() { fs.Config.CheckSum = false }()
WriteFile("check sum", "", t1) WriteFile("check sum", "", t1)
local_items := []fstest.Item{
{Path: "check sum", Size: 0, ModTime: t1, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"},
}
fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
transfers_before := fs.Stats.GetTransfers() fs.Stats.ResetCounters()
err := fs.Sync(fremote, flocal, true) err := fs.Sync(fremote, flocal, true)
if err != nil { if err != nil {
t.Fatalf("Initial sync failed: %v", err) t.Fatalf("Initial sync failed: %v", err)
} }
transfers_after := fs.Stats.GetTransfers()
// We should have transferred exactly one file. // We should have transferred exactly one file.
if transfers_after-1 != transfers_before { if fs.Stats.GetTransfers() != 1 {
t.Fatalf("Initial sync didn't do what we wanted.") t.Fatalf("Sync 1: want 1 transfer, got %d", fs.Stats.GetTransfers())
} }
remote_items := local_items
fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow)
err = os.Chtimes(localName+"/check sum", t2, t2) err = os.Chtimes(localName+"/check sum", t2, t2)
if err != nil { if err != nil {
t.Fatalf("Chtimes failed: %v", err) t.Fatalf("Chtimes failed: %v", err)
} }
local_items = []fstest.Item{
{Path: "check sum", Size: 0, ModTime: t2, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"},
}
fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
transfers_before = fs.Stats.GetTransfers() fs.Stats.ResetCounters()
err = fs.Sync(fremote, flocal, true) err = fs.Sync(fremote, flocal, true)
if err != nil { if err != nil {
t.Fatalf("Sync failed: %v", err) t.Fatalf("Sync failed: %v", err)
} }
transfers_after = fs.Stats.GetTransfers()
// We should have transferred no files // We should have transferred no files
if transfers_after != transfers_before { if fs.Stats.GetTransfers() != 0 {
t.Fatalf("We synced, though we shouldn't have.") t.Fatalf("Sync 2: want 0 transfers, got %d", fs.Stats.GetTransfers())
} }
remote_items := []fstest.Item{ fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
{Path: "check sum", Size: 0, ModTime: t1, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow)
cleanTempDir(t)
} }
// Create a file and sync it. Change the last modified date and the
// file contents but not the size. If we're only doing sync by size
// only, we expect nothing to to be transferred on the second sync.
func TestSyncSizeOnly(t *testing.T) {
cleanTempDir(t)
fs.Config.SizeOnly = true
defer func() { fs.Config.SizeOnly = false }()
WriteFile("sizeonly", "potato", t1)
local_items := []fstest.Item{ local_items := []fstest.Item{
{Path: "check sum", Size: 0, ModTime: t2, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"}, {Path: "sizeonly", Size: 6, ModTime: t1, Md5sum: "8ee2027983915ec78acc45027d874316"},
} }
fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
fs.Stats.ResetCounters()
err := fs.Sync(fremote, flocal, true)
if err != nil {
t.Fatalf("Initial sync failed: %v", err)
}
// We should have transferred exactly one file.
if fs.Stats.GetTransfers() != 1 {
t.Fatalf("Sync 1: want 1 transfer, got %d", fs.Stats.GetTransfers())
}
remote_items := local_items
fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow)
// Update mtime, md5sum but not length of file
WriteFile("sizeonly", "POTATO", t2)
local_items = []fstest.Item{
{Path: "sizeonly", Size: 6, ModTime: t2, Md5sum: "8ac6f27a282e4938125482607ccfb55f"},
}
fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
fs.Stats.ResetCounters()
err = fs.Sync(fremote, flocal, true)
if err != nil {
t.Fatalf("Sync failed: %v", err)
}
// We should have transferred no files
if fs.Stats.GetTransfers() != 0 {
t.Fatalf("Sync 2: want 0 transfers, got %d", fs.Stats.GetTransfers())
}
fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow) fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow) fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow)