From 78c3a5ccfa48effcafb9d1ef7ead6ca37daeb363 Mon Sep 17 00:00:00 2001 From: klauspost Date: Mon, 11 Jan 2016 13:39:33 +0100 Subject: [PATCH] Add support for multiple hash types. Add support for multiple hash types with negotiation of common hash types for comparison. Manually rebased version of #277 (see discussion there) --- amazonclouddrive/amazonclouddrive.go | 12 +- b2/b2.go | 67 ++++-- docs/content/docs.md | 5 + docs/content/filtering.md | 4 +- drive/drive.go | 12 +- dropbox/dropbox.go | 11 +- fs/fs.go | 5 +- fs/hash.go | 235 ++++++++++++++++++++ fs/hash_test.go | 260 +++++++++++++++++++++++ fs/limited.go | 5 + fs/operations.go | 118 ++++++---- fs/operations_test.go | 2 +- fstest/fstest.go | 21 +- fstest/fstests/fstests.go | 8 +- googlecloudstorage/googlecloudstorage.go | 12 +- hubic/hubic.go | 6 + local/local.go | 91 ++++---- onedrive/onedrive.go | 28 ++- rclone.go | 12 ++ s3/s3.go | 12 +- swift/swift.go | 12 +- yandex/yandex.go | 12 +- 22 files changed, 815 insertions(+), 135 deletions(-) create mode 100644 fs/hash.go create mode 100644 fs/hash_test.go diff --git a/amazonclouddrive/amazonclouddrive.go b/amazonclouddrive/amazonclouddrive.go index 78f048b3b..0a8b7e72e 100644 --- a/amazonclouddrive/amazonclouddrive.go +++ b/amazonclouddrive/amazonclouddrive.go @@ -533,6 +533,11 @@ func (f *Fs) Precision() time.Duration { return fs.ModTimeNotSupported } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + // Copy src to this remote using server side copy operations. // // This is stored with the remote path given @@ -585,8 +590,11 @@ func (o *Object) Remote() string { return o.remote } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { +// Hash returns the Md5sum of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashMD5 { + return "", fs.ErrHashUnsupported + } if o.info.ContentProperties.Md5 != nil { return *o.info.ContentProperties.Md5, nil } diff --git a/b2/b2.go b/b2/b2.go index b5322629c..82dbbbf00 100644 --- a/b2/b2.go +++ b/b2/b2.go @@ -74,6 +74,7 @@ type Object struct { remote string // The remote path info api.File // Info from the b2 object if known modTime time.Time // The modified time of the object if known + sha1 string // SHA-1 hash if known } // ------------------------------------------------------------ @@ -580,6 +581,11 @@ func (f *Fs) Purge() error { return errReturn } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashSHA1) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -600,9 +606,16 @@ func (o *Object) Remote() string { return o.remote } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { - return "", nil +// Hash returns the Sha-1 of an object returning a lowercase hex string +// Hash returns the Md5sum of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashSHA1 { + return "", fs.ErrHashUnsupported + } + + // Error is logged in readFileMetadata + _ = o.readFileMetadata() + return o.sha1, nil } // Size returns the size of an object in bytes @@ -652,23 +665,40 @@ func parseTimeString(timeString string) (result time.Time, err error) { // // It attempts to read the objects mtime and if that isn't present the // LastModified returned in the http headers +// +// SHA-1 will also be updated once the request has completed. func (o *Object) ModTime() (result time.Time) { - if !o.modTime.IsZero() { - return o.modTime + // The error is logged in readFileMetadata + _ = o.readFileMetadata() + return o.modTime +} + +// readFileMetadata attempts to read the modified time and +// SHA-1 hash of the remote object. +// +// If the objects mtime and if that isn't present the +// LastModified returned in the http headers. +// +// It is safe to call this function multiple times, and the +// result is cached between calls. +func (o *Object) readFileMetadata() error { + // Return if already know it + if !o.modTime.IsZero() && o.sha1 != "" { + return nil } - // Return the current time if can't read metadata - result = time.Now() + // Set modtime to now, as default value. + o.modTime = time.Now() - // Read metadata (need ID) + // Read metadata (we need the ID) err := o.readMetaData() if err != nil { - fs.Debug(o, "Failed to read metadata: %v", err) - return result + fs.Debug(o, "Failed to get file metadata: %v", err) + return err } - // Return the UploadTimestamp if can't get file info - result = time.Time(o.info.UploadTimestamp) + // Use the UploadTimestamp if can't get file info + o.modTime = time.Time(o.info.UploadTimestamp) // Now read the metadata for the modified time opts := rest.Opts{ @@ -682,17 +712,20 @@ func (o *Object) ModTime() (result time.Time) { _, err = o.fs.srv.CallJSON(&opts, &request, &response) if err != nil { fs.Debug(o, "Failed to get file info: %v", err) - return result + return err } + o.sha1 = response.SHA1 // Parse the result timeString := response.Info[timeKey] parsed, err := parseTimeString(timeString) if err != nil { fs.Debug(o, "Failed to parse mod time string %q: %v", timeString, err) - return result + return err } - return parsed + o.modTime = parsed + + return nil } // SetModTime sets the modification time of the local fs object @@ -785,6 +818,9 @@ func (o *Object) Open() (in io.ReadCloser, err error) { } else { o.modTime = parsed } + if o.sha1 == "" { + o.sha1 = resp.Header.Get(sha1Header) + } return newOpenFile(o, resp), nil } @@ -939,6 +975,7 @@ func (o *Object) Update(in io.Reader, modTime time.Time, size int64) (err error) o.info.Action = "upload" o.info.Size = response.Size o.info.UploadTimestamp = api.Timestamp(time.Now()) // FIXME not quite right + o.sha1 = response.SHA1 return nil } diff --git a/docs/content/docs.md b/docs/content/docs.md index 19cfbace5..251782638 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -84,6 +84,11 @@ size and path. Produces an md5sum file for all the objects in the path. This is in the same format as the standard md5sum tool produces. +### rclone sha1sum remote:path ### + +Produces an sha1sum file for all the objects in the path. This +is in the same format as the standard sha1sum tool produces. + ### rclone size remote:path ### Prints the total size of objects in remote:path and the number of diff --git a/docs/content/filtering.md b/docs/content/filtering.md index 12ead6f51..a0cbef7c7 100644 --- a/docs/content/filtering.md +++ b/docs/content/filtering.md @@ -10,8 +10,8 @@ Rclone has a sophisticated set of include and exclude rules. Some of these are based on patterns and some on other things like file size. The filters are applied for the `copy`, `sync`, `move`, `ls`, `lsl`, -`md5sum`, `size` and `check` operations. Note that `purge` does not -obey the filters. +`md5sum`, `sha1sum`, `size` and `check` operations. +Note that `purge` does not obey the filters. Each path as it passes through rclone is matched against the include and exclude rules. The paths are matched without a leading `/`. diff --git a/drive/drive.go b/drive/drive.go index 155a67b37..2d7dd90c1 100644 --- a/drive/drive.go +++ b/drive/drive.go @@ -782,6 +782,11 @@ func (f *Fs) DirMove(src fs.Fs) error { return nil } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -802,8 +807,11 @@ func (o *Object) Remote() string { return o.remote } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { +// Hash returns the Md5sum of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashMD5 { + return "", fs.ErrHashUnsupported + } return o.md5sum, nil } diff --git a/dropbox/dropbox.go b/dropbox/dropbox.go index 8d3952ee8..9332550c6 100644 --- a/dropbox/dropbox.go +++ b/dropbox/dropbox.go @@ -523,6 +523,11 @@ func (f *Fs) DirMove(src fs.Fs) error { return nil } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashNone) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -543,9 +548,9 @@ func (o *Object) Remote() string { return o.remote } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { - return "", nil +// Hash is unsupported on Dropbox +func (o *Object) Hash(t fs.HashType) (string, error) { + return "", fs.ErrHashUnsupported } // Size returns the size of an object in bytes diff --git a/fs/fs.go b/fs/fs.go index aef392093..cce1a6a98 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -106,6 +106,9 @@ type Fs interface { // Precision of the ModTimes in this Fs Precision() time.Duration + + // Returns the supported hash types of the filesystem + Hashes() HashSet } // Object is a filesystem like object provided by an Fs @@ -121,7 +124,7 @@ type Object interface { // Md5sum returns the md5 checksum of the file // If no Md5sum is available it returns "" - Md5sum() (string, error) + Hash(HashType) (string, error) // ModTime returns the modification date of the file // It should return a best guess if one isn't available diff --git a/fs/hash.go b/fs/hash.go new file mode 100644 index 000000000..8efbdacf2 --- /dev/null +++ b/fs/hash.go @@ -0,0 +1,235 @@ +package fs + +import ( + "crypto/md5" + "crypto/sha1" + "encoding/hex" + "fmt" + "hash" + "io" + "strings" +) + +// HashType indicates a standard hashing algorithm +type HashType int + +// ErrHashUnsupported should be returned by filesystem, +// if it is requested to deliver an unsupported hash type. +var ErrHashUnsupported = fmt.Errorf("hash type not supported") + +const ( + // HashNone indicates no hashes are supported + HashNone HashType = 0 + + // HashMD5 indicates MD5 support + HashMD5 HashType = 1 << iota + + // HashSHA1 indicates SHA-1 support + HashSHA1 +) + +// SupportedHashes returns a set of all the supported hashes by +// HashStream and MultiHasher. +var SupportedHashes = NewHashSet(HashMD5, HashSHA1) + +// HashStream will calculate hashes of all supported hash types. +func HashStream(r io.Reader) (map[HashType]string, error) { + return HashStreamTypes(r, SupportedHashes) +} + +// HashStreamTypes will calculate hashes of the requested hash types. +func HashStreamTypes(r io.Reader, set HashSet) (map[HashType]string, error) { + hashers, err := hashFromTypes(set) + if err != nil { + return nil, err + } + + _, err = io.Copy(hashToMultiWriter(hashers), r) + if err != nil { + return nil, err + } + var ret = make(map[HashType]string) + for k, v := range hashers { + ret[k] = hex.EncodeToString(v.Sum(nil)) + } + return ret, nil +} + +// String returns a string representation of the hash type. +// The function will panic if the hash type is unknown. +func (h HashType) String() string { + switch h { + case HashNone: + return "None" + case HashMD5: + return "MD5" + case HashSHA1: + return "SHA-1" + default: + err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h)) + panic(err) + } +} + +// hashFromTypes will return hashers for all the requested types. +// The types must be a subset of SupportedHashes, +// and this function must support all types. +func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) { + if !set.SubsetOf(SupportedHashes) { + return nil, fmt.Errorf("Requested set %08x contains unknown hash types", int(set)) + } + var hashers = make(map[HashType]hash.Hash) + types := set.Array() + for _, t := range types { + switch t { + case HashMD5: + hashers[t] = md5.New() + case HashSHA1: + hashers[t] = sha1.New() + default: + err := fmt.Sprintf("internal error: Unsupported hash type %v", t) + panic(err) + } + } + return hashers, nil +} + +// hashToMultiWriter will return a set of hashers into a +// single multiwriter, where one write will update all +// the hashers. +func hashToMultiWriter(h map[HashType]hash.Hash) io.Writer { + // Convert to to slice + var w = make([]io.Writer, 0, len(h)) + for _, v := range h { + w = append(w, v) + } + return io.MultiWriter(w...) +} + +// A MultiHasher will construct various hashes on +// all incoming writes. +type MultiHasher struct { + io.Writer + h map[HashType]hash.Hash // Hashes +} + +// NewMultiHasher will return a hash writer that will write all +// supported hash types. +func NewMultiHasher() *MultiHasher { + h, err := NewMultiHasherTypes(SupportedHashes) + if err != nil { + panic("internal error: could not create multihasher") + } + return h +} + +// NewMultiHasherTypes will return a hash writer that will write +// the requested hash types. +func NewMultiHasherTypes(set HashSet) (*MultiHasher, error) { + hashers, err := hashFromTypes(set) + if err != nil { + return nil, err + } + m := MultiHasher{h: hashers, Writer: hashToMultiWriter(hashers)} + return &m, nil +} + +// Sums returns the sums of all accumulated hashes as hex encoded +// strings. +func (m *MultiHasher) Sums() map[HashType]string { + dst := make(map[HashType]string) + for k, v := range m.h { + dst[k] = hex.EncodeToString(v.Sum(nil)) + } + return dst +} + +// A HashSet Indicates one or more hash types. +type HashSet int + +// NewHashSet will create a new hash set with the hash types supplied +func NewHashSet(t ...HashType) HashSet { + h := HashSet(HashNone) + return h.Add(t...) +} + +// Add one or more hash types to the set. +// Returns the modified hash set. +func (h *HashSet) Add(t ...HashType) HashSet { + for _, v := range t { + *h |= HashSet(v) + } + return *h +} + +// Contains returns true if the +func (h HashSet) Contains(t HashType) bool { + return int(h)&int(t) != 0 +} + +// Overlap returns the overlapping hash types +func (h HashSet) Overlap(t HashSet) HashSet { + return HashSet(int(h) & int(t)) +} + +// SubsetOf will return true if all types of h +// is present in the set c +func (h HashSet) SubsetOf(c HashSet) bool { + return int(h)|int(c) == int(c) +} + +// GetOne will return a hash type. +// Currently the first is returned, but it could be +// improved to return the strongest. +func (h HashSet) GetOne() HashType { + v := int(h) + i := uint(0) + for v != 0 { + if v&1 != 0 { + return HashType(1 << i) + } + i++ + v >>= 1 + } + return HashType(HashNone) +} + +// Array returns an array of all hash types in the set +func (h HashSet) Array() (ht []HashType) { + v := int(h) + i := uint(0) + for v != 0 { + if v&1 != 0 { + ht = append(ht, HashType(1<>= 1 + } + return ht +} + +// Count returns the number of hash types in the set +func (h HashSet) Count() int { + if int(h) == 0 { + return 0 + } + // credit: https://code.google.com/u/arnehormann/ + x := uint64(h) + x -= (x >> 1) & 0x5555555555555555 + x = (x>>2)&0x3333333333333333 + x&0x3333333333333333 + x += x >> 4 + x &= 0x0f0f0f0f0f0f0f0f + x *= 0x0101010101010101 + return int(x >> 56) +} + +// String returns a string representation of the hash set. +// The function will panic if it contains an unknown type. +func (h HashSet) String() string { + a := h.Array() + var r []string + for _, v := range a { + r = append(r, v.String()) + } + return "[" + strings.Join(r, ", ") + "]" +} diff --git a/fs/hash_test.go b/fs/hash_test.go new file mode 100644 index 000000000..a171ced67 --- /dev/null +++ b/fs/hash_test.go @@ -0,0 +1,260 @@ +package fs_test + +import ( + "bytes" + "io" + "testing" + + "github.com/ncw/rclone/fs" +) + +func TestHashSet(t *testing.T) { + var h fs.HashSet + + if h.Count() != 0 { + t.Fatalf("expected empty set to have 0 elements, got %d", h.Count()) + } + a := h.Array() + if len(a) != 0 { + t.Fatalf("expected empty slice, got %d", len(a)) + } + + h = h.Add(fs.HashMD5) + if h.Count() != 1 { + t.Fatalf("expected 1 element, got %d", h.Count()) + } + if h.GetOne() != fs.HashMD5 { + t.Fatalf("expected HashMD5, got %v", h.GetOne()) + } + a = h.Array() + if len(a) != 1 { + t.Fatalf("expected 1 element, got %d", len(a)) + } + if a[0] != fs.HashMD5 { + t.Fatalf("expected HashMD5, got %v", a[0]) + } + + // Test overlap, with all hashes + h = h.Overlap(fs.SupportedHashes) + if h.Count() != 1 { + t.Fatalf("expected 1 element, got %d", h.Count()) + } + if h.GetOne() != fs.HashMD5 { + t.Fatalf("expected HashMD5, got %v", h.GetOne()) + } + if !h.SubsetOf(fs.SupportedHashes) { + t.Fatalf("expected to be subset of all hashes") + } + if !h.SubsetOf(fs.NewHashSet(fs.HashMD5)) { + t.Fatalf("expected to be subset of itself") + } + + h = h.Add(fs.HashSHA1) + if h.Count() != 2 { + t.Fatalf("expected 2 elements, got %d", h.Count()) + } + one := h.GetOne() + if !(one == fs.HashMD5 || one == fs.HashSHA1) { + t.Fatalf("expected to be either MD5 or SHA1, got %v", one) + } + if !h.SubsetOf(fs.SupportedHashes) { + t.Fatalf("expected to be subset of all hashes") + } + if h.SubsetOf(fs.NewHashSet(fs.HashMD5)) { + t.Fatalf("did not expect to be subset of only MD5") + } + if h.SubsetOf(fs.NewHashSet(fs.HashSHA1)) { + t.Fatalf("did not expect to be subset of only SHA1") + } + if !h.SubsetOf(fs.NewHashSet(fs.HashMD5, fs.HashSHA1)) { + t.Fatalf("expected to be subset of MD5/SHA1") + } + a = h.Array() + if len(a) != 2 { + t.Fatalf("expected 2 elements, got %d", len(a)) + } + + ol := h.Overlap(fs.NewHashSet(fs.HashMD5)) + if ol.Count() != 1 { + t.Fatalf("expected 1 element overlap, got %d", ol.Count()) + } + if !ol.Contains(fs.HashMD5) { + t.Fatalf("expected overlap to be MD5, got %v", ol) + } + if ol.Contains(fs.HashSHA1) { + t.Fatalf("expected overlap NOT to contain SHA1, got %v", ol) + } + + ol = h.Overlap(fs.NewHashSet(fs.HashMD5, fs.HashSHA1)) + if ol.Count() != 2 { + t.Fatalf("expected 2 element overlap, got %d", ol.Count()) + } + if !ol.Contains(fs.HashMD5) { + t.Fatalf("expected overlap to contain MD5, got %v", ol) + } + if !ol.Contains(fs.HashSHA1) { + t.Fatalf("expected overlap to contain SHA1, got %v", ol) + } +} + +type hashTest struct { + input []byte + output map[fs.HashType]string +} + +var hashTestSet = []hashTest{ + hashTest{ + input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, + output: map[fs.HashType]string{ + fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe", + fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166", + }, + }, + // Empty data set + hashTest{ + input: []byte{}, + output: map[fs.HashType]string{ + fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e", + fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709", + }, + }, +} + +func TestMultiHasher(t *testing.T) { + for _, test := range hashTestSet { + mh := fs.NewMultiHasher() + n, err := io.Copy(mh, bytes.NewBuffer(test.input)) + if err != nil { + t.Fatal(err) + } + if int(n) != len(test.input) { + t.Fatalf("copy mismatch: %d != %d", n, len(test.input)) + } + sums := mh.Sums() + for k, v := range sums { + expect, ok := test.output[k] + if !ok { + t.Errorf("Unknown hash type %v, sum: %q", k, v) + } + if expect != v { + t.Errorf("hash %v mismatch %q != %q", k, v, expect) + } + } + // Test that all are present + for k, v := range test.output { + expect, ok := sums[k] + if !ok { + t.Errorf("did not calculate hash type %v, sum: %q", k, v) + } + if expect != v { + t.Errorf("hash %d mismatch %q != %q", k, v, expect) + } + } + } +} + +func TestMultiHasherTypes(t *testing.T) { + h := fs.HashSHA1 + for _, test := range hashTestSet { + mh, err := fs.NewMultiHasherTypes(fs.NewHashSet(h)) + if err != nil { + t.Fatal(err) + } + n, err := io.Copy(mh, bytes.NewBuffer(test.input)) + if err != nil { + t.Fatal(err) + } + if int(n) != len(test.input) { + t.Fatalf("copy mismatch: %d != %d", n, len(test.input)) + } + sums := mh.Sums() + if len(sums) != 1 { + t.Fatalf("expected 1 sum, got %d", len(sums)) + } + expect := test.output[h] + if expect != sums[h] { + t.Errorf("hash %v mismatch %q != %q", h, sums[h], expect) + } + } +} + +func TestHashStream(t *testing.T) { + for _, test := range hashTestSet { + sums, err := fs.HashStream(bytes.NewBuffer(test.input)) + if err != nil { + t.Fatal(err) + } + for k, v := range sums { + expect, ok := test.output[k] + if !ok { + t.Errorf("Unknown hash type %v, sum: %q", k, v) + } + if expect != v { + t.Errorf("hash %v mismatch %q != %q", k, v, expect) + } + } + // Test that all are present + for k, v := range test.output { + expect, ok := sums[k] + if !ok { + t.Errorf("did not calculate hash type %v, sum: %q", k, v) + } + if expect != v { + t.Errorf("hash %v mismatch %q != %q", k, v, expect) + } + } + } +} + +func TestHashStreamTypes(t *testing.T) { + h := fs.HashSHA1 + for _, test := range hashTestSet { + sums, err := fs.HashStreamTypes(bytes.NewBuffer(test.input), fs.NewHashSet(h)) + if err != nil { + t.Fatal(err) + } + if len(sums) != 1 { + t.Fatalf("expected 1 sum, got %d", len(sums)) + } + expect := test.output[h] + if expect != sums[h] { + t.Errorf("hash %d mismatch %q != %q", h, sums[h], expect) + } + } +} + +func TestHashSetStringer(t *testing.T) { + h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5) + s := h.String() + expect := "[MD5, SHA-1]" + if s != expect { + t.Errorf("unexpected stringer: was %q, expected %q", s, expect) + } + h = fs.NewHashSet(fs.HashSHA1) + s = h.String() + expect = "[SHA-1]" + if s != expect { + t.Errorf("unexpected stringer: was %q, expected %q", s, expect) + } + h = fs.NewHashSet() + s = h.String() + expect = "[]" + if s != expect { + t.Errorf("unexpected stringer: was %q, expected %q", s, expect) + } +} + +func TestHashStringer(t *testing.T) { + h := fs.HashMD5 + s := h.String() + expect := "MD5" + if s != expect { + t.Errorf("unexpected stringer: was %q, expected %q", s, expect) + } + h = fs.HashNone + s = h.String() + expect = "None" + if s != expect { + t.Errorf("unexpected stringer: was %q, expected %q", s, expect) + } +} diff --git a/fs/limited.go b/fs/limited.go index c37d4954d..fe9768ecb 100644 --- a/fs/limited.go +++ b/fs/limited.go @@ -96,6 +96,11 @@ func (f *Limited) Precision() time.Duration { return f.fs.Precision() } +// Hashes returns the supported hash sets. +func (f *Limited) Hashes() HashSet { + return f.fs.Hashes() +} + // Copy src to this remote using server side copy operations. // // This is stored with the remote path given diff --git a/fs/operations.go b/fs/operations.go index bd3739996..ba8464022 100644 --- a/fs/operations.go +++ b/fs/operations.go @@ -33,48 +33,54 @@ func CalculateModifyWindow(fs ...Fs) { Debug(fs[0], "Modify window is %s", Config.ModifyWindow) } -// Md5sumsEqual checks to see if src == dst, but ignores empty strings -func Md5sumsEqual(src, dst string) bool { +// HashEquals checks to see if src == dst, but ignores empty strings +// and returns true if either is empty. +func HashEquals(src, dst string) bool { if src == "" || dst == "" { return true } return src == dst } -// CheckMd5sums checks the two files to see if the MD5sums are the same +// CheckHashes checks the two files to see if they have common +// known hash types and compares them // // Returns two bools, the first of which is equality and the second of -// which is true if either of the MD5SUMs were unset. +// which is true if either of the hashes were unset. // // May return an error which will already have been logged // // If an error is returned it will return equal as false -func CheckMd5sums(src, dst Object) (equal bool, unset bool, err error) { - srcMd5, err := src.Md5sum() - if err != nil { - Stats.Error() - ErrorLog(src, "Failed to calculate src md5: %s", err) - return false, false, err - } - if srcMd5 == "" { +func CheckHashes(src, dst Object) (equal bool, unset bool, err error) { + common := src.Fs().Hashes().Overlap(dst.Fs().Hashes()) + Debug(nil, "Shared hashes: %v", common) + if common.Count() == 0 { return true, true, nil } - dstMd5, err := dst.Md5sum() + usehash := common.GetOne() + srcHash, err := src.Hash(usehash) if err != nil { Stats.Error() - ErrorLog(dst, "Failed to calculate dst md5: %s", err) + ErrorLog(src, "Failed to calculate src hash: %s", err) return false, false, err } - if dstMd5 == "" { + if srcHash == "" { return true, true, nil } - // Debug("Src MD5 %s", srcMd5) - // Debug("Dst MD5 %s", obj.Hash) - return Md5sumsEqual(srcMd5, dstMd5), false, nil + dstHash, err := dst.Hash(usehash) + if err != nil { + Stats.Error() + ErrorLog(dst, "Failed to calculate dst hash: %s", err) + return false, false, err + } + if dstHash == "" { + return true, true, nil + } + return srcHash == dstHash, false, nil } // Equal checks to see if the src and dst objects are equal by looking at -// size, mtime and MD5SUM +// size, mtime and hash // // If the src and dst size are different then it is considered to be // not equal. If --size-only is in effect then this is the only check @@ -84,7 +90,7 @@ func CheckMd5sums(src, dst Object) (equal bool, unset bool, err error) { // considered to be equal. This check is skipped if using --checksum. // // If the size is the same and mtime is different, unreadable or -// --checksum is set and the MD5SUM is the same then the file is +// --checksum is set and the hash is the same then the file is // considered to be equal. In this case the mtime on the dst is // updated if --checksum is not set. // @@ -120,23 +126,23 @@ func Equal(src, dst Object) bool { } // mtime is unreadable or different but size is the same so - // check the MD5SUM - same, md5unset, _ := CheckMd5sums(src, dst) + // check the hash + same, hashunset, _ := CheckHashes(src, dst) if !same { - Debug(src, "Md5sums differ") + Debug(src, "Hash differ") return false } if !Config.CheckSum { - // Size and MD5 the same but mtime different so update the + // Size and hash the same but mtime different so update the // mtime of the dst object here dst.SetModTime(srcModTime) } - if md5unset { + if hashunset { Debug(src, "Size of src and dst objects identical") } else { - Debug(src, "Size and MD5SUM of src and dst objects identical") + Debug(src, "Size and hash of src and dst objects identical") } return true } @@ -245,20 +251,27 @@ tryAgain: return } - // Verify md5sums are the same after transfer - ignoring blank md5sums - if !Config.SizeOnly { - srcMd5sum, md5sumErr := src.Md5sum() - if md5sumErr != nil { + // Verify hashes are the same after transfer - ignoring blank hashes + // TODO(klauspost): This could be extended, so we always create a hash type matching + // the destination, and calculate it while sending. + common := src.Fs().Hashes().Overlap(dst.Fs().Hashes()) + Debug(src, "common hashes: %v", common) + if !Config.SizeOnly && common.Count() > 0 { + // Get common hash type + hashType := common.GetOne() + + srcSum, err := src.Hash(hashType) + if err != nil { Stats.Error() - ErrorLog(src, "Failed to read md5sum: %s", md5sumErr) - } else if srcMd5sum != "" { - dstMd5sum, md5sumErr := dst.Md5sum() - if md5sumErr != nil { + ErrorLog(src, "Failed to read src hash: %s", err) + } else if srcSum != "" { + dstSum, err := dst.Hash(hashType) + if err != nil { Stats.Error() - ErrorLog(dst, "Failed to read md5sum: %s", md5sumErr) - } else if !Md5sumsEqual(srcMd5sum, dstMd5sum) { + ErrorLog(dst, "Failed to read hash: %s", err) + } else if !HashEquals(srcSum, dstSum) { Stats.Error() - err = fmt.Errorf("Corrupted on transfer: md5sums differ %q vs %q", srcMd5sum, dstMd5sum) + err = fmt.Errorf("Corrupted on transfer: %v hash differ %q vs %q", hashType, srcSum, dstSum) ErrorLog(dst, "%s", err) removeFailedCopy(dst) return @@ -296,7 +309,7 @@ func checkOne(pair ObjectPair, out ObjectPairChan) { // PairChecker reads Objects~s on in send to out if they need transferring. // -// FIXME potentially doing lots of MD5SUMS at once +// FIXME potentially doing lots of hashes at once func PairChecker(in ObjectPairChan, out ObjectPairChan, wg *sync.WaitGroup) { defer wg.Done() for pair := range in { @@ -540,7 +553,7 @@ func MoveDir(fdst, fsrc Fs) error { return Purge(fsrc) } -// Check the files in fsrc and fdst according to Size and MD5SUM +// Check the files in fsrc and fdst according to Size and hash func Check(fdst, fsrc Fs) error { var ( wg sync.WaitGroup @@ -614,7 +627,7 @@ func Check(fdst, fsrc Fs) error { ErrorLog(src, "Sizes differ") continue } - same, _, err := CheckMd5sums(src, dst) + same, _, err := CheckHashes(src, dst) Stats.DoneChecking(src) if err != nil { continue @@ -702,15 +715,30 @@ func ListLong(f Fs, w io.Writer) error { // // Lists in parallel which may get them out of order func Md5sum(f Fs, w io.Writer) error { + return hashLister(HashMD5, f, w) +} + +// Sha1sum list the Fs to the supplied writer +// +// Obeys includes and excludes +// +// Lists in parallel which may get them out of order +func Sha1sum(f Fs, w io.Writer) error { + return hashLister(HashSHA1, f, w) +} + +func hashLister(ht HashType, f Fs, w io.Writer) error { return ListFn(f, func(o Object) { Stats.Checking(o) - md5sum, err := o.Md5sum() + sum, err := o.Hash(ht) Stats.DoneChecking(o) - if err != nil { - Debug(o, "Failed to read MD5: %v", err) - md5sum = "ERROR" + if err == ErrHashUnsupported { + sum = "UNSUPPORTED" + } else if err != nil { + Debug(o, "Failed to read %v: %v", ht, err) + sum = "ERROR" } - syncFprintf(w, "%32s %s\n", md5sum, o.Remote()) + syncFprintf(w, "%32s %s\n", sum, o.Remote()) }) } diff --git a/fs/operations_test.go b/fs/operations_test.go index 661829134..7b2b059af 100644 --- a/fs/operations_test.go +++ b/fs/operations_test.go @@ -365,7 +365,7 @@ func TestSyncAfterChangingFilesSizeOnly(t *testing.T) { fstest.CheckListingWithPrecision(t, fremote, items, fs.Config.ModifyWindow) } -// Sync after changing a file's contents, modtime but not length +// Sync after changing a file's contents, maintaining modtime and length func TestSyncAfterChangingContentsOnly(t *testing.T) { if fremote.Precision() == fs.ModTimeNotSupported { t.Logf("ModTimeNotSupported so forcing file to be a different size") diff --git a/fstest/fstest.go b/fstest/fstest.go index 82f53a288..26a020803 100644 --- a/fstest/fstest.go +++ b/fstest/fstest.go @@ -54,16 +54,21 @@ func (i *Item) Check(t *testing.T, obj fs.Object, precision time.Duration) { if obj == nil { t.Fatalf("Object is nil") } - // Check attributes - Md5sum, err := obj.Md5sum() - if err != nil { - t.Fatalf("Failed to read md5sum for %q: %v", obj.Remote(), err) - } - if !fs.Md5sumsEqual(i.Md5sum, Md5sum) { - t.Errorf("%s: Md5sum incorrect - expecting %q got %q", obj.Remote(), i.Md5sum, Md5sum) + types := obj.Fs().Hashes().Array() + for _, hash := range types { + // Check attributes + sum, err := obj.Hash(hash) + if err != nil { + t.Fatalf("%s: Failed to read hash %v for %q: %v", obj.Fs().String(), hash, obj.Remote(), err) + } + if hash == fs.HashMD5 { + if !fs.HashEquals(i.Md5sum, sum) { + t.Errorf("%s/%s: md5 hash incorrect - expecting %q got %q", obj.Fs().String(), obj.Remote(), i.Md5sum, sum) + } + } } if i.Size != obj.Size() { - t.Errorf("%s: Size incorrect - expecting %d got %d", obj.Remote(), i.Size, obj.Size()) + t.Errorf("%s/%s: Size incorrect - expecting %d got %d", obj.Fs().String(), obj.Remote(), i.Size, obj.Size()) } i.CheckModTime(t, obj, obj.ModTime(), precision) } diff --git a/fstest/fstests/fstests.go b/fstest/fstests/fstests.go index 165bfffb2..d114b771a 100644 --- a/fstest/fstests/fstests.go +++ b/fstest/fstests/fstests.go @@ -469,11 +469,11 @@ func TestObjectRemote(t *testing.T) { func TestObjectMd5sum(t *testing.T) { skipIfNotOk(t) obj := findObject(t, file1.Path) - Md5sum, err := obj.Md5sum() - if err != nil { + Md5sum, err := obj.Hash(fs.HashMD5) + if err != nil && err != fs.ErrHashUnsupported { t.Errorf("Error in Md5sum: %v", err) } - if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) { + if !fs.HashEquals(Md5sum, file1.Md5sum) { t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum) } } @@ -527,7 +527,7 @@ func TestObjectOpen(t *testing.T) { t.Fatalf("in.Close() return error: %v", err) } Md5sum := hex.EncodeToString(hash.Sum(nil)) - if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) { + if !fs.HashEquals(Md5sum, file1.Md5sum) { t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum) } } diff --git a/googlecloudstorage/googlecloudstorage.go b/googlecloudstorage/googlecloudstorage.go index 84514c8a1..e81f8b96d 100644 --- a/googlecloudstorage/googlecloudstorage.go +++ b/googlecloudstorage/googlecloudstorage.go @@ -458,6 +458,11 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) { return dstObj, nil } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -478,8 +483,11 @@ func (o *Object) Remote() string { return o.remote } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { +// Hash returns the Md5sum of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashMD5 { + return "", fs.ErrHashUnsupported + } return o.md5sum, nil } diff --git a/hubic/hubic.go b/hubic/hubic.go index 379a6a425..aff747587 100644 --- a/hubic/hubic.go +++ b/hubic/hubic.go @@ -207,6 +207,12 @@ func (f *Fs) UnWrap() fs.Fs { return f.Fs } +// Hashes returns the supported hash sets. +// Inherited from swift +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + // Check the interfaces are satisfied var ( _ fs.Fs = (*Fs)(nil) diff --git a/local/local.go b/local/local.go index cd2892ec6..127a2579f 100644 --- a/local/local.go +++ b/local/local.go @@ -2,10 +2,7 @@ package local import ( - "crypto/md5" - "encoding/hex" "fmt" - "hash" "io" "io/ioutil" "os" @@ -50,11 +47,11 @@ type Fs struct { // Object represents a local filesystem object type Object struct { - fs *Fs // The Fs this object is part of - remote string // The remote path - path string // The local path - info os.FileInfo // Interface for file info (always present) - md5sum string // the md5sum of the object or "" if not calculated + fs *Fs // The Fs this object is part of + remote string // The remote path + path string // The local path + info os.FileInfo // Interface for file info (always present) + hashes map[fs.HashType]string // Hashes } // ------------------------------------------------------------ @@ -417,6 +414,11 @@ func (f *Fs) DirMove(src fs.Fs) error { return os.Rename(srcFs.root, f.root) } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.SupportedHashes +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -437,32 +439,39 @@ func (o *Object) Remote() string { return o.fs.cleanUtf8(o.remote) } -// Md5sum calculates the Md5sum of a file returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { - if o.md5sum != "" { - return o.md5sum, nil +// Hash returns the requested hash of a file as a lowercase hex string +func (o *Object) Hash(r fs.HashType) (string, error) { + // Check that the underlying file hasn't changed + oldtime := o.info.ModTime() + oldsize := o.info.Size() + _ = o.lstat() + + if !o.info.ModTime().Equal(oldtime) || oldsize != o.info.Size() { + o.hashes = nil } - in, err := os.Open(o.path) - if err != nil { - fs.Stats.Error() - fs.ErrorLog(o, "Failed to open: %s", err) - return "", err + + if o.hashes == nil { + o.hashes = make(map[fs.HashType]string) + in, err := os.Open(o.path) + if err != nil { + fs.Stats.Error() + fs.ErrorLog(o, "Failed to open: %s", err) + return "", err + } + o.hashes, err = fs.HashStream(in) + closeErr := in.Close() + if err != nil { + fs.Stats.Error() + fs.ErrorLog(o, "Failed to read: %s", err) + return "", err + } + if closeErr != nil { + fs.Stats.Error() + fs.ErrorLog(o, "Failed to close: %s", closeErr) + return "", closeErr + } } - hash := md5.New() - _, err = io.Copy(hash, in) - closeErr := in.Close() - if err != nil { - fs.Stats.Error() - fs.ErrorLog(o, "Failed to read: %s", err) - return "", err - } - if closeErr != nil { - fs.Stats.Error() - fs.ErrorLog(o, "Failed to close: %s", closeErr) - return "", closeErr - } - o.md5sum = hex.EncodeToString(hash.Sum(nil)) - return o.md5sum, nil + return o.hashes[r], nil } // Size returns the size of an object in bytes @@ -506,9 +515,9 @@ func (o *Object) Storable() bool { // localOpenFile wraps an io.ReadCloser and updates the md5sum of the // object that is read type localOpenFile struct { - o *Object // object that is open - in io.ReadCloser // handle we are wrapping - hash hash.Hash // currently accumulating MD5 + o *Object // object that is open + in io.ReadCloser // handle we are wrapping + hash *fs.MultiHasher // currently accumulating hashes } // Read bytes from the object - see io.Reader @@ -525,9 +534,9 @@ func (file *localOpenFile) Read(p []byte) (n int, err error) { func (file *localOpenFile) Close() (err error) { err = file.in.Close() if err == nil { - file.o.md5sum = hex.EncodeToString(file.hash.Sum(nil)) + file.o.hashes = file.hash.Sums() } else { - file.o.md5sum = "" + file.o.hashes = nil } return err } @@ -542,7 +551,7 @@ func (o *Object) Open() (in io.ReadCloser, err error) { in = &localOpenFile{ o: o, in: in, - hash: md5.New(), + hash: fs.NewMultiHasher(), } return } @@ -566,7 +575,7 @@ func (o *Object) Update(in io.Reader, modTime time.Time, size int64) error { } // Calculate the md5sum of the object we are reading as we go along - hash := md5.New() + hash := fs.NewMultiHasher() in = io.TeeReader(in, hash) _, err = io.Copy(out, in) @@ -578,8 +587,8 @@ func (o *Object) Update(in io.Reader, modTime time.Time, size int64) error { return outErr } - // All successful so update the md5sum - o.md5sum = hex.EncodeToString(hash.Sum(nil)) + // All successful so update the hashes + o.hashes = hash.Sums() // Set the mtime o.SetModTime(modTime) diff --git a/onedrive/onedrive.go b/onedrive/onedrive.go index c2fe62aae..e1c4ada4c 100644 --- a/onedrive/onedrive.go +++ b/onedrive/onedrive.go @@ -4,6 +4,7 @@ package onedrive import ( "bytes" + "encoding/base64" "fmt" "io" "log" @@ -13,6 +14,8 @@ import ( "sync" "time" + "encoding/hex" + "github.com/ncw/rclone/dircache" "github.com/ncw/rclone/fs" "github.com/ncw/rclone/oauthutil" @@ -95,6 +98,7 @@ type Object struct { size int64 // size of the object modTime time.Time // modification time of the object id string // ID of the object + sha1 string // SHA-1 of the object content } // ------------------------------------------------------------ @@ -670,6 +674,11 @@ func (f *Fs) Purge() error { return f.purgeCheck(false) } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashSHA1) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -695,9 +704,12 @@ func (o *Object) srvPath() string { return replaceReservedChars(o.fs.rootSlash() + o.remote) } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { - return "", nil // not supported by one drive +// Hash returns the SHA-1 of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashSHA1 { + return "", fs.ErrHashUnsupported + } + return o.sha1, nil } // Size returns the size of an object in bytes @@ -714,6 +726,16 @@ func (o *Object) Size() int64 { func (o *Object) setMetaData(info *api.Item) { o.hasMetaData = true o.size = info.Size + + // In OneDrive for Business, SHA1 and CRC32 hash values are not returned for files. + if info.File != nil && info.File.Hashes.Sha1Hash != "" { + sha1sumData, err := base64.StdEncoding.DecodeString(info.File.Hashes.Sha1Hash) + if err != nil { + fs.Log(o, "Bad SHA1 decode: %v", err) + } else { + o.sha1 = hex.EncodeToString(sha1sumData) + } + } if info.FileSystemInfo != nil { o.modTime = time.Time(info.FileSystemInfo.LastModifiedDateTime) } else { diff --git a/rclone.go b/rclone.go index 579de5960..16a8d4631 100644 --- a/rclone.go +++ b/rclone.go @@ -147,6 +147,18 @@ var Commands = []Command{ MinArgs: 1, MaxArgs: 1, }, + { + Name: "sha1sum", + ArgsHelp: "remote:path", + Help: ` + Produces an sha1sum file for all the objects in the path. This + is in the same format as the standard sha1sum tool produces.`, + Run: func(fdst, fsrc fs.Fs) error { + return fs.Sha1sum(fdst, os.Stdout) + }, + MinArgs: 1, + MaxArgs: 1, + }, { Name: "size", ArgsHelp: "remote:path", diff --git a/s3/s3.go b/s3/s3.go index de2e3e416..48c37d8a6 100644 --- a/s3/s3.go +++ b/s3/s3.go @@ -537,6 +537,11 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) { return f.NewFsObject(remote), err } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -559,8 +564,11 @@ func (o *Object) Remote() string { var matchMd5 = regexp.MustCompile(`^[0-9a-f]{32}$`) -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { +// Hash returns the Md5sum of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashMD5 { + return "", fs.ErrHashUnsupported + } etag := strings.Trim(strings.ToLower(o.etag), `"`) // Check the etag is a valid md5sum if !matchMd5.MatchString(etag) { diff --git a/swift/swift.go b/swift/swift.go index 4e232e138..1241b70db 100644 --- a/swift/swift.go +++ b/swift/swift.go @@ -431,6 +431,11 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) { return f.NewFsObject(remote), nil } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -451,8 +456,11 @@ func (o *Object) Remote() string { return o.remote } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { +// Hash returns the Md5sum of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashMD5 { + return "", fs.ErrHashUnsupported + } isManifest, err := o.isManifestFile() if err != nil { return "", err diff --git a/yandex/yandex.go b/yandex/yandex.go index a1782a228..98f0e13ec 100644 --- a/yandex/yandex.go +++ b/yandex/yandex.go @@ -382,6 +382,11 @@ func (f *Fs) Purge() error { return f.purgeCheck(false) } +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() fs.HashSet { + return fs.HashSet(fs.HashMD5) +} + // ------------------------------------------------------------ // Fs returns the parent Fs @@ -402,8 +407,11 @@ func (o *Object) Remote() string { return o.remote } -// Md5sum returns the Md5sum of an object returning a lowercase hex string -func (o *Object) Md5sum() (string, error) { +// Hash returns the Md5sum of an object returning a lowercase hex string +func (o *Object) Hash(t fs.HashType) (string, error) { + if t != fs.HashMD5 { + return "", fs.ErrHashUnsupported + } return o.md5sum, nil }