From e3096508dd839d5e02bb5d0d1cba7625ac435d1f Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 28 Nov 2012 11:17:31 +0000 Subject: [PATCH] If the remote object doesn't have mtime then check the md5sum to see if changed --- notes.txt | 2 ++ swiftsync.go | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/notes.txt b/notes.txt index 474061330..0e25850d2 100644 --- a/notes.txt +++ b/notes.txt @@ -20,3 +20,5 @@ FIXME progress meter would be nice! Do this by wrapping the Reader with a progre Do bandwidth limit by wrapping the Reader too If length is same but remote has no mtime, then could fall back to checking the checksum and if that was OK then just update the object meta time. + +Could have an integrity check mode where we check the MD5sums of the local vs the remote diff --git a/swiftsync.go b/swiftsync.go index a2bbd0fcf..d53505f69 100644 --- a/swiftsync.go +++ b/swiftsync.go @@ -4,13 +4,16 @@ package main import ( + "crypto/md5" "flag" "fmt" "github.com/ncw/swift" + "io" "log" "os" "path/filepath" "runtime/pprof" + "strings" ) // Globals @@ -39,9 +42,31 @@ type FsObject struct { type FsObjects map[string]FsObject +// md5sum calculates the md5sum of a file returning a lowercase hex string +func md5sum(path string) (string, error) { + in, err := os.Open(path) + if err != nil { + log.Printf("Failed to open %s: %s", path, err) + return "", err + } + defer in.Close() // FIXME ignoring error + hash := md5.New() + _, err = io.Copy(hash, in) + if err != nil { + log.Printf("Failed to read from %s: %s", path, err) + return "", err + } + return fmt.Sprintf("%x", hash.Sum(nil)), nil +} + // Checks to see if an object has changed or not by looking at its size and mtime // // This is the heuristic rsync uses when not using --checksum +// +// If the remote object doesn't have the mtime metadata set then the +// checksum is checked +// +// FIXME should update the checksum of the remote object with the mtime func (fs *FsObject) changed(c *swift.Connection, container string) bool { obj, h, err := c.Object(container, fs.rel) if err != nil { @@ -56,7 +81,20 @@ func (fs *FsObject) changed(c *swift.Connection, container string) bool { t, err := m.GetModTime() if err != nil { log.Printf("Failed to read mtime %s: %s", fs.path, err) - return true + localMd5, err := md5sum(fs.path) + // log.Printf("Local MD5 %s", localMd5) + // log.Printf("Remote MD5 %s", obj.Hash) + if err != nil { + log.Printf("Failed to calculate md5 %s: %s", fs.path, err) + return true + } + if localMd5 != strings.ToLower(obj.Hash) { + log.Printf("Md5sums differ %s", fs.path) + return true + } + log.Printf("Md5sums identical - skipping %s", fs.path) + // FIXME update the mtime of the remote object here + return false } if !t.Equal(fs.info.ModTime()) { log.Printf("mtimes differ: %s", fs.path) @@ -88,7 +126,6 @@ func (fs *FsObject) put(c *swift.Connection, container string) { defer in.Close() m := swift.Metadata{} m.SetModTime(fs.info.ModTime()) - log.Println(m.ObjectHeaders()) _, err = c.ObjectPut(container, fs.rel, in, true, "", "", m.ObjectHeaders()) if err != nil { log.Printf("Failed to upload %s: %s", fs.path, err)