If the remote object doesn't have mtime then check the md5sum to see if changed

This commit is contained in:
Nick Craig-Wood 2012-11-28 11:17:31 +00:00
parent 81b7f41dfb
commit e3096508dd
2 changed files with 41 additions and 2 deletions

View file

@ -20,3 +20,5 @@ FIXME progress meter would be nice! Do this by wrapping the Reader with a progre
Do bandwidth limit by wrapping the Reader too Do bandwidth limit by wrapping the Reader too
If length is same but remote has no mtime, then could fall back to checking the checksum and if that was OK then just update the object meta time. If length is same but remote has no mtime, then could fall back to checking the checksum and if that was OK then just update the object meta time.
Could have an integrity check mode where we check the MD5sums of the local vs the remote

View file

@ -4,13 +4,16 @@
package main package main
import ( import (
"crypto/md5"
"flag" "flag"
"fmt" "fmt"
"github.com/ncw/swift" "github.com/ncw/swift"
"io"
"log" "log"
"os" "os"
"path/filepath" "path/filepath"
"runtime/pprof" "runtime/pprof"
"strings"
) )
// Globals // Globals
@ -39,9 +42,31 @@ type FsObject struct {
type FsObjects map[string]FsObject type FsObjects map[string]FsObject
// md5sum calculates the md5sum of a file returning a lowercase hex string
func md5sum(path string) (string, error) {
in, err := os.Open(path)
if err != nil {
log.Printf("Failed to open %s: %s", path, err)
return "", err
}
defer in.Close() // FIXME ignoring error
hash := md5.New()
_, err = io.Copy(hash, in)
if err != nil {
log.Printf("Failed to read from %s: %s", path, err)
return "", err
}
return fmt.Sprintf("%x", hash.Sum(nil)), nil
}
// Checks to see if an object has changed or not by looking at its size and mtime // Checks to see if an object has changed or not by looking at its size and mtime
// //
// This is the heuristic rsync uses when not using --checksum // This is the heuristic rsync uses when not using --checksum
//
// If the remote object doesn't have the mtime metadata set then the
// checksum is checked
//
// FIXME should update the checksum of the remote object with the mtime
func (fs *FsObject) changed(c *swift.Connection, container string) bool { func (fs *FsObject) changed(c *swift.Connection, container string) bool {
obj, h, err := c.Object(container, fs.rel) obj, h, err := c.Object(container, fs.rel)
if err != nil { if err != nil {
@ -56,7 +81,20 @@ func (fs *FsObject) changed(c *swift.Connection, container string) bool {
t, err := m.GetModTime() t, err := m.GetModTime()
if err != nil { if err != nil {
log.Printf("Failed to read mtime %s: %s", fs.path, err) log.Printf("Failed to read mtime %s: %s", fs.path, err)
return true localMd5, err := md5sum(fs.path)
// log.Printf("Local MD5 %s", localMd5)
// log.Printf("Remote MD5 %s", obj.Hash)
if err != nil {
log.Printf("Failed to calculate md5 %s: %s", fs.path, err)
return true
}
if localMd5 != strings.ToLower(obj.Hash) {
log.Printf("Md5sums differ %s", fs.path)
return true
}
log.Printf("Md5sums identical - skipping %s", fs.path)
// FIXME update the mtime of the remote object here
return false
} }
if !t.Equal(fs.info.ModTime()) { if !t.Equal(fs.info.ModTime()) {
log.Printf("mtimes differ: %s", fs.path) log.Printf("mtimes differ: %s", fs.path)
@ -88,7 +126,6 @@ func (fs *FsObject) put(c *swift.Connection, container string) {
defer in.Close() defer in.Close()
m := swift.Metadata{} m := swift.Metadata{}
m.SetModTime(fs.info.ModTime()) m.SetModTime(fs.info.ModTime())
log.Println(m.ObjectHeaders())
_, err = c.ObjectPut(container, fs.rel, in, true, "", "", m.ObjectHeaders()) _, err = c.ObjectPut(container, fs.rel, in, true, "", "", m.ObjectHeaders())
if err != nil { if err != nil {
log.Printf("Failed to upload %s: %s", fs.path, err) log.Printf("Failed to upload %s: %s", fs.path, err)