dropbox: support Dropbox content hashing scheme - fixes #1302

* add support to hashing module
  * add dbhashsum to list the hashes
  * add support to dropbox module

This means objects up and downloaded to/from Dropbox will have their
hashes checked.

Note after this change local objects are calculating MD5, SHA1 and
DBHASH which is excessive and needs to be fixed.
This commit is contained in:
Nick Craig-Wood 2017-05-26 15:09:31 +01:00
parent 8916455e4f
commit 6381959850
8 changed files with 85 additions and 37 deletions

View file

@ -13,6 +13,7 @@ import (
_ "github.com/ncw/rclone/cmd/copy" _ "github.com/ncw/rclone/cmd/copy"
_ "github.com/ncw/rclone/cmd/copyto" _ "github.com/ncw/rclone/cmd/copyto"
_ "github.com/ncw/rclone/cmd/cryptcheck" _ "github.com/ncw/rclone/cmd/cryptcheck"
_ "github.com/ncw/rclone/cmd/dbhashsum"
_ "github.com/ncw/rclone/cmd/dedupe" _ "github.com/ncw/rclone/cmd/dedupe"
_ "github.com/ncw/rclone/cmd/delete" _ "github.com/ncw/rclone/cmd/delete"
_ "github.com/ncw/rclone/cmd/genautocomplete" _ "github.com/ncw/rclone/cmd/genautocomplete"

View file

@ -91,7 +91,7 @@ To copy a local directory to a dropbox directory called backup
rclone copy /home/source remote:backup rclone copy /home/source remote:backup
### Modified time and MD5SUMs ### ### Modified time and Hashes ###
Dropbox supports modified times, but the only way to set a Dropbox supports modified times, but the only way to set a
modification time is to re-upload the file. modification time is to re-upload the file.
@ -102,6 +102,10 @@ decide to upload all your old data to fix the modification times. If
you don't want this to happen use `--size-only` or `--checksum` flag you don't want this to happen use `--size-only` or `--checksum` flag
to stop it. to stop it.
Dropbox supports [its own hash
type](https://www.dropbox.com/developers/reference/content-hash) which
is checked for all transfers.
### Specific options ### ### Specific options ###
Here are the command line options specific to this cloud storage Here are the command line options specific to this cloud storage

View file

@ -20,7 +20,7 @@ Here is an overview of the major features of each cloud storage system.
| Google Drive | MD5 | Yes | No | Yes | R/W | | Google Drive | MD5 | Yes | No | Yes | R/W |
| Amazon S3 | MD5 | Yes | No | No | R/W | | Amazon S3 | MD5 | Yes | No | No | R/W |
| Openstack Swift | MD5 | Yes | No | No | R/W | | Openstack Swift | MD5 | Yes | No | No | R/W |
| Dropbox | - | Yes | Yes | No | - | | Dropbox | DBHASH †| Yes | Yes | No | - |
| Google Cloud Storage | MD5 | Yes | No | No | R/W | | Google Cloud Storage | MD5 | Yes | No | No | R/W |
| Amazon Drive | MD5 | No | Yes | No | R | | Amazon Drive | MD5 | No | Yes | No | R |
| Microsoft OneDrive | SHA1 | Yes | Yes | No | R | | Microsoft OneDrive | SHA1 | Yes | Yes | No | R |
@ -28,7 +28,7 @@ Here is an overview of the major features of each cloud storage system.
| Backblaze B2 | SHA1 | Yes | No | No | R/W | | Backblaze B2 | SHA1 | Yes | No | No | R/W |
| Yandex Disk | MD5 | Yes | No | No | R/W | | Yandex Disk | MD5 | Yes | No | No | R/W |
| SFTP | - | Yes | Depends | No | - | | SFTP | - | Yes | Depends | No | - |
| FTP | None | No | Yes | No | - | | FTP | - | No | Yes | No | - |
| The local filesystem | All | Yes | Depends | No | - | | The local filesystem | All | Yes | Depends | No | - |
### Hash ### ### Hash ###
@ -41,6 +41,10 @@ the `check` command.
To use the checksum checks between filesystems they must support a To use the checksum checks between filesystems they must support a
common hash type. common hash type.
† Note that Dropbox supports [its own custom
hash](https://www.dropbox.com/developers/reference/content-hash).
This is an SHA256 sum of all the 4MB block SHA256s.
### ModTime ### ### ModTime ###
The cloud storage system supports setting modification times on The cloud storage system supports setting modification times on

View file

@ -2,7 +2,6 @@
package dropbox package dropbox
// FIXME put low level retries in // FIXME put low level retries in
// FIXME add dropbox style hashes
// FIXME dropbox for business would be quite easy to add // FIXME dropbox for business would be quite easy to add
/* /*
@ -99,12 +98,14 @@ type Fs struct {
} }
// Object describes a dropbox object // Object describes a dropbox object
//
// Dropbox Objects always have full metadata
type Object struct { type Object struct {
fs *Fs // what this object is part of fs *Fs // what this object is part of
remote string // The remote path remote string // The remote path
bytes int64 // size of the object bytes int64 // size of the object
modTime time.Time // time it was last modified modTime time.Time // time it was last modified
hasMetadata bool // metadata is valid hash string // content_hash of the object
} }
// ------------------------------------------------------------ // ------------------------------------------------------------
@ -640,7 +641,7 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error {
// Hashes returns the supported hash sets. // Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet { func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashNone) return fs.HashSet(fs.HashDropbox)
} }
// ------------------------------------------------------------ // ------------------------------------------------------------
@ -663,9 +664,16 @@ func (o *Object) Remote() string {
return o.remote return o.remote
} }
// Hash is unsupported on Dropbox // Hash returns the dropbox special hash
func (o *Object) Hash(t fs.HashType) (string, error) { func (o *Object) Hash(t fs.HashType) (string, error) {
return "", fs.ErrHashUnsupported if t != fs.HashDropbox {
return "", fs.ErrHashUnsupported
}
err := o.readMetaData()
if err != nil {
return "", errors.Wrap(err, "failed to read hash from metadata")
}
return o.hash, nil
} }
// Size returns the size of an object in bytes // Size returns the size of an object in bytes
@ -679,7 +687,7 @@ func (o *Object) Size() int64 {
func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error { func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
o.bytes = int64(info.Size) o.bytes = int64(info.Size)
o.modTime = info.ClientModified o.modTime = info.ClientModified
o.hasMetadata = true o.hash = info.ContentHash
return nil return nil
} }
@ -722,7 +730,7 @@ func (o *Object) metadataKey() string {
// readMetaData gets the info if it hasn't already been fetched // readMetaData gets the info if it hasn't already been fetched
func (o *Object) readMetaData() (err error) { func (o *Object) readMetaData() (err error) {
if o.hasMetadata { if !o.modTime.IsZero() {
return nil return nil
} }
// Last resort // Last resort

View file

@ -9,6 +9,7 @@ import (
"io" "io"
"strings" "strings"
"github.com/ncw/rclone/dropbox/dbhash"
"github.com/pkg/errors" "github.com/pkg/errors"
) )
@ -26,18 +27,23 @@ const (
// HashSHA1 indicates SHA-1 support // HashSHA1 indicates SHA-1 support
HashSHA1 HashSHA1
// HashDropbox indicates Dropbox special hash
// https://www.dropbox.com/developers/reference/content-hash
HashDropbox
// HashNone indicates no hashes are supported // HashNone indicates no hashes are supported
HashNone HashType = 0 HashNone HashType = 0
) )
// SupportedHashes returns a set of all the supported hashes by // SupportedHashes returns a set of all the supported hashes by
// HashStream and MultiHasher. // HashStream and MultiHasher.
var SupportedHashes = NewHashSet(HashMD5, HashSHA1) var SupportedHashes = NewHashSet(HashMD5, HashSHA1, HashDropbox)
// HashWidth returns the width in characters for any HashType // HashWidth returns the width in characters for any HashType
var HashWidth = map[HashType]int{ var HashWidth = map[HashType]int{
HashMD5: 32, HashMD5: 32,
HashSHA1: 40, HashSHA1: 40,
HashDropbox: 64,
} }
// HashStream will calculate hashes of all supported hash types. // HashStream will calculate hashes of all supported hash types.
@ -73,6 +79,8 @@ func (h HashType) String() string {
return "MD5" return "MD5"
case HashSHA1: case HashSHA1:
return "SHA-1" return "SHA-1"
case HashDropbox:
return "DropboxHash"
default: default:
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h)) err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
panic(err) panic(err)
@ -94,6 +102,8 @@ func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) {
hashers[t] = md5.New() hashers[t] = md5.New()
case HashSHA1: case HashSHA1:
hashers[t] = sha1.New() hashers[t] = sha1.New()
case HashDropbox:
hashers[t] = dbhash.New()
default: default:
err := fmt.Sprintf("internal error: Unsupported hash type %v", t) err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
panic(err) panic(err)

View file

@ -65,16 +65,18 @@ var hashTestSet = []hashTest{
{ {
input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
output: map[fs.HashType]string{ output: map[fs.HashType]string{
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe", fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166", fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
fs.HashDropbox: "214d2fcf3566e94c99ad2f59bd993daca46d8521a0c447adf4b324f53fddc0c7",
}, },
}, },
// Empty data set // Empty data set
{ {
input: []byte{}, input: []byte{},
output: map[fs.HashType]string{ output: map[fs.HashType]string{
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e", fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709", fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
fs.HashDropbox: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
}, },
}, },
} }
@ -88,13 +90,13 @@ func TestMultiHasher(t *testing.T) {
sums := mh.Sums() sums := mh.Sums()
for k, v := range sums { for k, v := range sums {
expect, ok := test.output[k] expect, ok := test.output[k]
require.True(t, ok) require.True(t, ok, "test output for hash not found")
assert.Equal(t, v, expect) assert.Equal(t, v, expect)
} }
// Test that all are present // Test that all are present
for k, v := range test.output { for k, v := range test.output {
expect, ok := sums[k] expect, ok := sums[k]
require.True(t, ok) require.True(t, ok, "test output for hash not found")
assert.Equal(t, v, expect) assert.Equal(t, v, expect)
} }
} }
@ -145,8 +147,8 @@ func TestHashStreamTypes(t *testing.T) {
} }
func TestHashSetStringer(t *testing.T) { func TestHashSetStringer(t *testing.T) {
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5) h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5, fs.HashDropbox)
assert.Equal(t, h.String(), "[MD5, SHA-1]") assert.Equal(t, h.String(), "[MD5, SHA-1, DropboxHash]")
h = fs.NewHashSet(fs.HashSHA1) h = fs.NewHashSet(fs.HashSHA1)
assert.Equal(t, h.String(), "[SHA-1]") assert.Equal(t, h.String(), "[SHA-1]")
h = fs.NewHashSet() h = fs.NewHashSet()

View file

@ -978,6 +978,15 @@ func Sha1sum(f Fs, w io.Writer) error {
return hashLister(HashSHA1, f, w) return hashLister(HashSHA1, f, w)
} }
// DropboxHashSum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func DropboxHashSum(f Fs, w io.Writer) error {
return hashLister(HashDropbox, f, w)
}
func hashLister(ht HashType, f Fs, w io.Writer) error { func hashLister(ht HashType, f Fs, w io.Writer) error {
return ListFn(f, func(o Object) { return ListFn(f, func(o Object) {
Stats.Checking(o.Remote()) Stats.Checking(o.Remote())

View file

@ -386,7 +386,7 @@ func TestLsLong(t *testing.T) {
} }
} }
func TestMd5sum(t *testing.T) { func TestHashSums(t *testing.T) {
r := NewRun(t) r := NewRun(t)
defer r.Finalise() defer r.Finalise()
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1) file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
@ -394,6 +394,8 @@ func TestMd5sum(t *testing.T) {
fstest.CheckItems(t, r.fremote, file1, file2) fstest.CheckItems(t, r.fremote, file1, file2)
// MD5 Sum
var buf bytes.Buffer var buf bytes.Buffer
err := fs.Md5sum(r.fremote, &buf) err := fs.Md5sum(r.fremote, &buf)
require.NoError(t, err) require.NoError(t, err)
@ -408,20 +410,12 @@ func TestMd5sum(t *testing.T) {
!strings.Contains(res, " potato2\n") { !strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res) t.Errorf("potato2 missing: %q", res)
} }
}
func TestSha1sum(t *testing.T) { // SHA1 Sum
r := NewRun(t)
defer r.Finalise()
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
file2 := r.WriteBoth("empty space", "", t2)
fstest.CheckItems(t, r.fremote, file1, file2) err = fs.Sha1sum(r.fremote, &buf)
var buf bytes.Buffer
err := fs.Sha1sum(r.fremote, &buf)
require.NoError(t, err) require.NoError(t, err)
res := buf.String() res = buf.String()
if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709 empty space\n") && if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") && !strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") { !strings.Contains(res, " empty space\n") {
@ -432,6 +426,22 @@ func TestSha1sum(t *testing.T) {
!strings.Contains(res, " potato2\n") { !strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res) t.Errorf("potato2 missing: %q", res)
} }
// Dropbox Hash Sum
err = fs.DropboxHashSum(r.fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "a979481df794fed9c3990a6a422e0b1044ac802c15fab13af9c687f8bdbee01a potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
} }
func TestCount(t *testing.T) { func TestCount(t *testing.T) {