dropbox: support Dropbox content hashing scheme - fixes #1302

* add support to hashing module
  * add dbhashsum to list the hashes
  * add support to dropbox module

This means objects up and downloaded to/from Dropbox will have their
hashes checked.

Note after this change local objects are calculating MD5, SHA1 and
DBHASH which is excessive and needs to be fixed.
This commit is contained in:
Nick Craig-Wood 2017-05-26 15:09:31 +01:00
parent 8916455e4f
commit 6381959850
8 changed files with 85 additions and 37 deletions

View file

@ -13,6 +13,7 @@ import (
_ "github.com/ncw/rclone/cmd/copy"
_ "github.com/ncw/rclone/cmd/copyto"
_ "github.com/ncw/rclone/cmd/cryptcheck"
_ "github.com/ncw/rclone/cmd/dbhashsum"
_ "github.com/ncw/rclone/cmd/dedupe"
_ "github.com/ncw/rclone/cmd/delete"
_ "github.com/ncw/rclone/cmd/genautocomplete"

View file

@ -91,7 +91,7 @@ To copy a local directory to a dropbox directory called backup
rclone copy /home/source remote:backup
### Modified time and MD5SUMs ###
### Modified time and Hashes ###
Dropbox supports modified times, but the only way to set a
modification time is to re-upload the file.
@ -102,6 +102,10 @@ decide to upload all your old data to fix the modification times. If
you don't want this to happen use `--size-only` or `--checksum` flag
to stop it.
Dropbox supports [its own hash
type](https://www.dropbox.com/developers/reference/content-hash) which
is checked for all transfers.
### Specific options ###
Here are the command line options specific to this cloud storage

View file

@ -20,7 +20,7 @@ Here is an overview of the major features of each cloud storage system.
| Google Drive | MD5 | Yes | No | Yes | R/W |
| Amazon S3 | MD5 | Yes | No | No | R/W |
| Openstack Swift | MD5 | Yes | No | No | R/W |
| Dropbox | - | Yes | Yes | No | - |
| Dropbox | DBHASH †| Yes | Yes | No | - |
| Google Cloud Storage | MD5 | Yes | No | No | R/W |
| Amazon Drive | MD5 | No | Yes | No | R |
| Microsoft OneDrive | SHA1 | Yes | Yes | No | R |
@ -28,7 +28,7 @@ Here is an overview of the major features of each cloud storage system.
| Backblaze B2 | SHA1 | Yes | No | No | R/W |
| Yandex Disk | MD5 | Yes | No | No | R/W |
| SFTP | - | Yes | Depends | No | - |
| FTP | None | No | Yes | No | - |
| FTP | - | No | Yes | No | - |
| The local filesystem | All | Yes | Depends | No | - |
### Hash ###
@ -41,6 +41,10 @@ the `check` command.
To use the checksum checks between filesystems they must support a
common hash type.
† Note that Dropbox supports [its own custom
hash](https://www.dropbox.com/developers/reference/content-hash).
This is an SHA256 sum of all the 4MB block SHA256s.
### ModTime ###
The cloud storage system supports setting modification times on

View file

@ -2,7 +2,6 @@
package dropbox
// FIXME put low level retries in
// FIXME add dropbox style hashes
// FIXME dropbox for business would be quite easy to add
/*
@ -99,12 +98,14 @@ type Fs struct {
}
// Object describes a dropbox object
//
// Dropbox Objects always have full metadata
type Object struct {
fs *Fs // what this object is part of
remote string // The remote path
bytes int64 // size of the object
modTime time.Time // time it was last modified
hasMetadata bool // metadata is valid
fs *Fs // what this object is part of
remote string // The remote path
bytes int64 // size of the object
modTime time.Time // time it was last modified
hash string // content_hash of the object
}
// ------------------------------------------------------------
@ -640,7 +641,7 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error {
// Hashes returns the supported hash sets.
func (f *Fs) Hashes() fs.HashSet {
return fs.HashSet(fs.HashNone)
return fs.HashSet(fs.HashDropbox)
}
// ------------------------------------------------------------
@ -663,9 +664,16 @@ func (o *Object) Remote() string {
return o.remote
}
// Hash is unsupported on Dropbox
// Hash returns the dropbox special hash
func (o *Object) Hash(t fs.HashType) (string, error) {
return "", fs.ErrHashUnsupported
if t != fs.HashDropbox {
return "", fs.ErrHashUnsupported
}
err := o.readMetaData()
if err != nil {
return "", errors.Wrap(err, "failed to read hash from metadata")
}
return o.hash, nil
}
// Size returns the size of an object in bytes
@ -679,7 +687,7 @@ func (o *Object) Size() int64 {
func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
o.bytes = int64(info.Size)
o.modTime = info.ClientModified
o.hasMetadata = true
o.hash = info.ContentHash
return nil
}
@ -722,7 +730,7 @@ func (o *Object) metadataKey() string {
// readMetaData gets the info if it hasn't already been fetched
func (o *Object) readMetaData() (err error) {
if o.hasMetadata {
if !o.modTime.IsZero() {
return nil
}
// Last resort

View file

@ -9,6 +9,7 @@ import (
"io"
"strings"
"github.com/ncw/rclone/dropbox/dbhash"
"github.com/pkg/errors"
)
@ -26,18 +27,23 @@ const (
// HashSHA1 indicates SHA-1 support
HashSHA1
// HashDropbox indicates Dropbox special hash
// https://www.dropbox.com/developers/reference/content-hash
HashDropbox
// HashNone indicates no hashes are supported
HashNone HashType = 0
)
// SupportedHashes returns a set of all the supported hashes by
// HashStream and MultiHasher.
var SupportedHashes = NewHashSet(HashMD5, HashSHA1)
var SupportedHashes = NewHashSet(HashMD5, HashSHA1, HashDropbox)
// HashWidth returns the width in characters for any HashType
var HashWidth = map[HashType]int{
HashMD5: 32,
HashSHA1: 40,
HashMD5: 32,
HashSHA1: 40,
HashDropbox: 64,
}
// HashStream will calculate hashes of all supported hash types.
@ -73,6 +79,8 @@ func (h HashType) String() string {
return "MD5"
case HashSHA1:
return "SHA-1"
case HashDropbox:
return "DropboxHash"
default:
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
panic(err)
@ -94,6 +102,8 @@ func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) {
hashers[t] = md5.New()
case HashSHA1:
hashers[t] = sha1.New()
case HashDropbox:
hashers[t] = dbhash.New()
default:
err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
panic(err)

View file

@ -65,16 +65,18 @@ var hashTestSet = []hashTest{
{
input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
output: map[fs.HashType]string{
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
fs.HashDropbox: "214d2fcf3566e94c99ad2f59bd993daca46d8521a0c447adf4b324f53fddc0c7",
},
},
// Empty data set
{
input: []byte{},
output: map[fs.HashType]string{
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
fs.HashDropbox: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
},
},
}
@ -88,13 +90,13 @@ func TestMultiHasher(t *testing.T) {
sums := mh.Sums()
for k, v := range sums {
expect, ok := test.output[k]
require.True(t, ok)
require.True(t, ok, "test output for hash not found")
assert.Equal(t, v, expect)
}
// Test that all are present
for k, v := range test.output {
expect, ok := sums[k]
require.True(t, ok)
require.True(t, ok, "test output for hash not found")
assert.Equal(t, v, expect)
}
}
@ -145,8 +147,8 @@ func TestHashStreamTypes(t *testing.T) {
}
func TestHashSetStringer(t *testing.T) {
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5)
assert.Equal(t, h.String(), "[MD5, SHA-1]")
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5, fs.HashDropbox)
assert.Equal(t, h.String(), "[MD5, SHA-1, DropboxHash]")
h = fs.NewHashSet(fs.HashSHA1)
assert.Equal(t, h.String(), "[SHA-1]")
h = fs.NewHashSet()

View file

@ -978,6 +978,15 @@ func Sha1sum(f Fs, w io.Writer) error {
return hashLister(HashSHA1, f, w)
}
// DropboxHashSum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func DropboxHashSum(f Fs, w io.Writer) error {
return hashLister(HashDropbox, f, w)
}
func hashLister(ht HashType, f Fs, w io.Writer) error {
return ListFn(f, func(o Object) {
Stats.Checking(o.Remote())

View file

@ -386,7 +386,7 @@ func TestLsLong(t *testing.T) {
}
}
func TestMd5sum(t *testing.T) {
func TestHashSums(t *testing.T) {
r := NewRun(t)
defer r.Finalise()
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
@ -394,6 +394,8 @@ func TestMd5sum(t *testing.T) {
fstest.CheckItems(t, r.fremote, file1, file2)
// MD5 Sum
var buf bytes.Buffer
err := fs.Md5sum(r.fremote, &buf)
require.NoError(t, err)
@ -408,20 +410,12 @@ func TestMd5sum(t *testing.T) {
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
}
func TestSha1sum(t *testing.T) {
r := NewRun(t)
defer r.Finalise()
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
file2 := r.WriteBoth("empty space", "", t2)
// SHA1 Sum
fstest.CheckItems(t, r.fremote, file1, file2)
var buf bytes.Buffer
err := fs.Sha1sum(r.fremote, &buf)
err = fs.Sha1sum(r.fremote, &buf)
require.NoError(t, err)
res := buf.String()
res = buf.String()
if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
@ -432,6 +426,22 @@ func TestSha1sum(t *testing.T) {
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// Dropbox Hash Sum
err = fs.DropboxHashSum(r.fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "a979481df794fed9c3990a6a422e0b1044ac802c15fab13af9c687f8bdbee01a potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
}
func TestCount(t *testing.T) {