From 63819598505cf9a3346a37a553c31381afc71bb9 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Fri, 26 May 2017 15:09:31 +0100 Subject: [PATCH] dropbox: support Dropbox content hashing scheme - fixes #1302 * add support to hashing module * add dbhashsum to list the hashes * add support to dropbox module This means objects up and downloaded to/from Dropbox will have their hashes checked. Note after this change local objects are calculating MD5, SHA1 and DBHASH which is excessive and needs to be fixed. --- cmd/all/all.go | 1 + docs/content/dropbox.md | 6 +++++- docs/content/overview.md | 8 ++++++-- dropbox/dropbox.go | 30 +++++++++++++++++++----------- fs/hash.go | 16 +++++++++++++--- fs/hash_test.go | 18 ++++++++++-------- fs/operations.go | 9 +++++++++ fs/operations_test.go | 34 ++++++++++++++++++++++------------ 8 files changed, 85 insertions(+), 37 deletions(-) diff --git a/cmd/all/all.go b/cmd/all/all.go index 0c2d2e3b0..a355348f5 100644 --- a/cmd/all/all.go +++ b/cmd/all/all.go @@ -13,6 +13,7 @@ import ( _ "github.com/ncw/rclone/cmd/copy" _ "github.com/ncw/rclone/cmd/copyto" _ "github.com/ncw/rclone/cmd/cryptcheck" + _ "github.com/ncw/rclone/cmd/dbhashsum" _ "github.com/ncw/rclone/cmd/dedupe" _ "github.com/ncw/rclone/cmd/delete" _ "github.com/ncw/rclone/cmd/genautocomplete" diff --git a/docs/content/dropbox.md b/docs/content/dropbox.md index 03b21dad2..49a3d7a75 100644 --- a/docs/content/dropbox.md +++ b/docs/content/dropbox.md @@ -91,7 +91,7 @@ To copy a local directory to a dropbox directory called backup rclone copy /home/source remote:backup -### Modified time and MD5SUMs ### +### Modified time and Hashes ### Dropbox supports modified times, but the only way to set a modification time is to re-upload the file. @@ -102,6 +102,10 @@ decide to upload all your old data to fix the modification times. If you don't want this to happen use `--size-only` or `--checksum` flag to stop it. +Dropbox supports [its own hash +type](https://www.dropbox.com/developers/reference/content-hash) which +is checked for all transfers. + ### Specific options ### Here are the command line options specific to this cloud storage diff --git a/docs/content/overview.md b/docs/content/overview.md index 02ed89cc5..f88d1415d 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -20,7 +20,7 @@ Here is an overview of the major features of each cloud storage system. | Google Drive | MD5 | Yes | No | Yes | R/W | | Amazon S3 | MD5 | Yes | No | No | R/W | | Openstack Swift | MD5 | Yes | No | No | R/W | -| Dropbox | - | Yes | Yes | No | - | +| Dropbox | DBHASH †| Yes | Yes | No | - | | Google Cloud Storage | MD5 | Yes | No | No | R/W | | Amazon Drive | MD5 | No | Yes | No | R | | Microsoft OneDrive | SHA1 | Yes | Yes | No | R | @@ -28,7 +28,7 @@ Here is an overview of the major features of each cloud storage system. | Backblaze B2 | SHA1 | Yes | No | No | R/W | | Yandex Disk | MD5 | Yes | No | No | R/W | | SFTP | - | Yes | Depends | No | - | -| FTP | None | No | Yes | No | - | +| FTP | - | No | Yes | No | - | | The local filesystem | All | Yes | Depends | No | - | ### Hash ### @@ -41,6 +41,10 @@ the `check` command. To use the checksum checks between filesystems they must support a common hash type. +† Note that Dropbox supports [its own custom +hash](https://www.dropbox.com/developers/reference/content-hash). +This is an SHA256 sum of all the 4MB block SHA256s. + ### ModTime ### The cloud storage system supports setting modification times on diff --git a/dropbox/dropbox.go b/dropbox/dropbox.go index 394be939b..e005cf5ba 100644 --- a/dropbox/dropbox.go +++ b/dropbox/dropbox.go @@ -2,7 +2,6 @@ package dropbox // FIXME put low level retries in -// FIXME add dropbox style hashes // FIXME dropbox for business would be quite easy to add /* @@ -99,12 +98,14 @@ type Fs struct { } // Object describes a dropbox object +// +// Dropbox Objects always have full metadata type Object struct { - fs *Fs // what this object is part of - remote string // The remote path - bytes int64 // size of the object - modTime time.Time // time it was last modified - hasMetadata bool // metadata is valid + fs *Fs // what this object is part of + remote string // The remote path + bytes int64 // size of the object + modTime time.Time // time it was last modified + hash string // content_hash of the object } // ------------------------------------------------------------ @@ -640,7 +641,7 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error { // Hashes returns the supported hash sets. func (f *Fs) Hashes() fs.HashSet { - return fs.HashSet(fs.HashNone) + return fs.HashSet(fs.HashDropbox) } // ------------------------------------------------------------ @@ -663,9 +664,16 @@ func (o *Object) Remote() string { return o.remote } -// Hash is unsupported on Dropbox +// Hash returns the dropbox special hash func (o *Object) Hash(t fs.HashType) (string, error) { - return "", fs.ErrHashUnsupported + if t != fs.HashDropbox { + return "", fs.ErrHashUnsupported + } + err := o.readMetaData() + if err != nil { + return "", errors.Wrap(err, "failed to read hash from metadata") + } + return o.hash, nil } // Size returns the size of an object in bytes @@ -679,7 +687,7 @@ func (o *Object) Size() int64 { func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error { o.bytes = int64(info.Size) o.modTime = info.ClientModified - o.hasMetadata = true + o.hash = info.ContentHash return nil } @@ -722,7 +730,7 @@ func (o *Object) metadataKey() string { // readMetaData gets the info if it hasn't already been fetched func (o *Object) readMetaData() (err error) { - if o.hasMetadata { + if !o.modTime.IsZero() { return nil } // Last resort diff --git a/fs/hash.go b/fs/hash.go index e8d9d8e1e..cd808fbd0 100644 --- a/fs/hash.go +++ b/fs/hash.go @@ -9,6 +9,7 @@ import ( "io" "strings" + "github.com/ncw/rclone/dropbox/dbhash" "github.com/pkg/errors" ) @@ -26,18 +27,23 @@ const ( // HashSHA1 indicates SHA-1 support HashSHA1 + // HashDropbox indicates Dropbox special hash + // https://www.dropbox.com/developers/reference/content-hash + HashDropbox + // HashNone indicates no hashes are supported HashNone HashType = 0 ) // SupportedHashes returns a set of all the supported hashes by // HashStream and MultiHasher. -var SupportedHashes = NewHashSet(HashMD5, HashSHA1) +var SupportedHashes = NewHashSet(HashMD5, HashSHA1, HashDropbox) // HashWidth returns the width in characters for any HashType var HashWidth = map[HashType]int{ - HashMD5: 32, - HashSHA1: 40, + HashMD5: 32, + HashSHA1: 40, + HashDropbox: 64, } // HashStream will calculate hashes of all supported hash types. @@ -73,6 +79,8 @@ func (h HashType) String() string { return "MD5" case HashSHA1: return "SHA-1" + case HashDropbox: + return "DropboxHash" default: err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h)) panic(err) @@ -94,6 +102,8 @@ func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) { hashers[t] = md5.New() case HashSHA1: hashers[t] = sha1.New() + case HashDropbox: + hashers[t] = dbhash.New() default: err := fmt.Sprintf("internal error: Unsupported hash type %v", t) panic(err) diff --git a/fs/hash_test.go b/fs/hash_test.go index cfe4791a2..b3cc8d07e 100644 --- a/fs/hash_test.go +++ b/fs/hash_test.go @@ -65,16 +65,18 @@ var hashTestSet = []hashTest{ { input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, output: map[fs.HashType]string{ - fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe", - fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166", + fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe", + fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166", + fs.HashDropbox: "214d2fcf3566e94c99ad2f59bd993daca46d8521a0c447adf4b324f53fddc0c7", }, }, // Empty data set { input: []byte{}, output: map[fs.HashType]string{ - fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e", - fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709", + fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e", + fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709", + fs.HashDropbox: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, }, } @@ -88,13 +90,13 @@ func TestMultiHasher(t *testing.T) { sums := mh.Sums() for k, v := range sums { expect, ok := test.output[k] - require.True(t, ok) + require.True(t, ok, "test output for hash not found") assert.Equal(t, v, expect) } // Test that all are present for k, v := range test.output { expect, ok := sums[k] - require.True(t, ok) + require.True(t, ok, "test output for hash not found") assert.Equal(t, v, expect) } } @@ -145,8 +147,8 @@ func TestHashStreamTypes(t *testing.T) { } func TestHashSetStringer(t *testing.T) { - h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5) - assert.Equal(t, h.String(), "[MD5, SHA-1]") + h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5, fs.HashDropbox) + assert.Equal(t, h.String(), "[MD5, SHA-1, DropboxHash]") h = fs.NewHashSet(fs.HashSHA1) assert.Equal(t, h.String(), "[SHA-1]") h = fs.NewHashSet() diff --git a/fs/operations.go b/fs/operations.go index 752203106..923f53fc6 100644 --- a/fs/operations.go +++ b/fs/operations.go @@ -978,6 +978,15 @@ func Sha1sum(f Fs, w io.Writer) error { return hashLister(HashSHA1, f, w) } +// DropboxHashSum list the Fs to the supplied writer +// +// Obeys includes and excludes +// +// Lists in parallel which may get them out of order +func DropboxHashSum(f Fs, w io.Writer) error { + return hashLister(HashDropbox, f, w) +} + func hashLister(ht HashType, f Fs, w io.Writer) error { return ListFn(f, func(o Object) { Stats.Checking(o.Remote()) diff --git a/fs/operations_test.go b/fs/operations_test.go index 40a1af16a..0a0d84bb6 100644 --- a/fs/operations_test.go +++ b/fs/operations_test.go @@ -386,7 +386,7 @@ func TestLsLong(t *testing.T) { } } -func TestMd5sum(t *testing.T) { +func TestHashSums(t *testing.T) { r := NewRun(t) defer r.Finalise() file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1) @@ -394,6 +394,8 @@ func TestMd5sum(t *testing.T) { fstest.CheckItems(t, r.fremote, file1, file2) + // MD5 Sum + var buf bytes.Buffer err := fs.Md5sum(r.fremote, &buf) require.NoError(t, err) @@ -408,20 +410,12 @@ func TestMd5sum(t *testing.T) { !strings.Contains(res, " potato2\n") { t.Errorf("potato2 missing: %q", res) } -} -func TestSha1sum(t *testing.T) { - r := NewRun(t) - defer r.Finalise() - file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1) - file2 := r.WriteBoth("empty space", "", t2) + // SHA1 Sum - fstest.CheckItems(t, r.fremote, file1, file2) - - var buf bytes.Buffer - err := fs.Sha1sum(r.fremote, &buf) + err = fs.Sha1sum(r.fremote, &buf) require.NoError(t, err) - res := buf.String() + res = buf.String() if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709 empty space\n") && !strings.Contains(res, " UNSUPPORTED empty space\n") && !strings.Contains(res, " empty space\n") { @@ -432,6 +426,22 @@ func TestSha1sum(t *testing.T) { !strings.Contains(res, " potato2\n") { t.Errorf("potato2 missing: %q", res) } + + // Dropbox Hash Sum + + err = fs.DropboxHashSum(r.fremote, &buf) + require.NoError(t, err) + res = buf.String() + if !strings.Contains(res, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 empty space\n") && + !strings.Contains(res, " UNSUPPORTED empty space\n") && + !strings.Contains(res, " empty space\n") { + t.Errorf("empty space missing: %q", res) + } + if !strings.Contains(res, "a979481df794fed9c3990a6a422e0b1044ac802c15fab13af9c687f8bdbee01a potato2\n") && + !strings.Contains(res, " UNSUPPORTED potato2\n") && + !strings.Contains(res, " potato2\n") { + t.Errorf("potato2 missing: %q", res) + } } func TestCount(t *testing.T) {