forked from TrueCloudLab/rclone
dropbox: support Dropbox content hashing scheme - fixes #1302
* add support to hashing module * add dbhashsum to list the hashes * add support to dropbox module This means objects up and downloaded to/from Dropbox will have their hashes checked. Note after this change local objects are calculating MD5, SHA1 and DBHASH which is excessive and needs to be fixed.
This commit is contained in:
parent
8916455e4f
commit
6381959850
8 changed files with 85 additions and 37 deletions
|
@ -13,6 +13,7 @@ import (
|
||||||
_ "github.com/ncw/rclone/cmd/copy"
|
_ "github.com/ncw/rclone/cmd/copy"
|
||||||
_ "github.com/ncw/rclone/cmd/copyto"
|
_ "github.com/ncw/rclone/cmd/copyto"
|
||||||
_ "github.com/ncw/rclone/cmd/cryptcheck"
|
_ "github.com/ncw/rclone/cmd/cryptcheck"
|
||||||
|
_ "github.com/ncw/rclone/cmd/dbhashsum"
|
||||||
_ "github.com/ncw/rclone/cmd/dedupe"
|
_ "github.com/ncw/rclone/cmd/dedupe"
|
||||||
_ "github.com/ncw/rclone/cmd/delete"
|
_ "github.com/ncw/rclone/cmd/delete"
|
||||||
_ "github.com/ncw/rclone/cmd/genautocomplete"
|
_ "github.com/ncw/rclone/cmd/genautocomplete"
|
||||||
|
|
|
@ -91,7 +91,7 @@ To copy a local directory to a dropbox directory called backup
|
||||||
|
|
||||||
rclone copy /home/source remote:backup
|
rclone copy /home/source remote:backup
|
||||||
|
|
||||||
### Modified time and MD5SUMs ###
|
### Modified time and Hashes ###
|
||||||
|
|
||||||
Dropbox supports modified times, but the only way to set a
|
Dropbox supports modified times, but the only way to set a
|
||||||
modification time is to re-upload the file.
|
modification time is to re-upload the file.
|
||||||
|
@ -102,6 +102,10 @@ decide to upload all your old data to fix the modification times. If
|
||||||
you don't want this to happen use `--size-only` or `--checksum` flag
|
you don't want this to happen use `--size-only` or `--checksum` flag
|
||||||
to stop it.
|
to stop it.
|
||||||
|
|
||||||
|
Dropbox supports [its own hash
|
||||||
|
type](https://www.dropbox.com/developers/reference/content-hash) which
|
||||||
|
is checked for all transfers.
|
||||||
|
|
||||||
### Specific options ###
|
### Specific options ###
|
||||||
|
|
||||||
Here are the command line options specific to this cloud storage
|
Here are the command line options specific to this cloud storage
|
||||||
|
|
|
@ -20,7 +20,7 @@ Here is an overview of the major features of each cloud storage system.
|
||||||
| Google Drive | MD5 | Yes | No | Yes | R/W |
|
| Google Drive | MD5 | Yes | No | Yes | R/W |
|
||||||
| Amazon S3 | MD5 | Yes | No | No | R/W |
|
| Amazon S3 | MD5 | Yes | No | No | R/W |
|
||||||
| Openstack Swift | MD5 | Yes | No | No | R/W |
|
| Openstack Swift | MD5 | Yes | No | No | R/W |
|
||||||
| Dropbox | - | Yes | Yes | No | - |
|
| Dropbox | DBHASH †| Yes | Yes | No | - |
|
||||||
| Google Cloud Storage | MD5 | Yes | No | No | R/W |
|
| Google Cloud Storage | MD5 | Yes | No | No | R/W |
|
||||||
| Amazon Drive | MD5 | No | Yes | No | R |
|
| Amazon Drive | MD5 | No | Yes | No | R |
|
||||||
| Microsoft OneDrive | SHA1 | Yes | Yes | No | R |
|
| Microsoft OneDrive | SHA1 | Yes | Yes | No | R |
|
||||||
|
@ -28,7 +28,7 @@ Here is an overview of the major features of each cloud storage system.
|
||||||
| Backblaze B2 | SHA1 | Yes | No | No | R/W |
|
| Backblaze B2 | SHA1 | Yes | No | No | R/W |
|
||||||
| Yandex Disk | MD5 | Yes | No | No | R/W |
|
| Yandex Disk | MD5 | Yes | No | No | R/W |
|
||||||
| SFTP | - | Yes | Depends | No | - |
|
| SFTP | - | Yes | Depends | No | - |
|
||||||
| FTP | None | No | Yes | No | - |
|
| FTP | - | No | Yes | No | - |
|
||||||
| The local filesystem | All | Yes | Depends | No | - |
|
| The local filesystem | All | Yes | Depends | No | - |
|
||||||
|
|
||||||
### Hash ###
|
### Hash ###
|
||||||
|
@ -41,6 +41,10 @@ the `check` command.
|
||||||
To use the checksum checks between filesystems they must support a
|
To use the checksum checks between filesystems they must support a
|
||||||
common hash type.
|
common hash type.
|
||||||
|
|
||||||
|
† Note that Dropbox supports [its own custom
|
||||||
|
hash](https://www.dropbox.com/developers/reference/content-hash).
|
||||||
|
This is an SHA256 sum of all the 4MB block SHA256s.
|
||||||
|
|
||||||
### ModTime ###
|
### ModTime ###
|
||||||
|
|
||||||
The cloud storage system supports setting modification times on
|
The cloud storage system supports setting modification times on
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
package dropbox
|
package dropbox
|
||||||
|
|
||||||
// FIXME put low level retries in
|
// FIXME put low level retries in
|
||||||
// FIXME add dropbox style hashes
|
|
||||||
// FIXME dropbox for business would be quite easy to add
|
// FIXME dropbox for business would be quite easy to add
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -99,12 +98,14 @@ type Fs struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Object describes a dropbox object
|
// Object describes a dropbox object
|
||||||
|
//
|
||||||
|
// Dropbox Objects always have full metadata
|
||||||
type Object struct {
|
type Object struct {
|
||||||
fs *Fs // what this object is part of
|
fs *Fs // what this object is part of
|
||||||
remote string // The remote path
|
remote string // The remote path
|
||||||
bytes int64 // size of the object
|
bytes int64 // size of the object
|
||||||
modTime time.Time // time it was last modified
|
modTime time.Time // time it was last modified
|
||||||
hasMetadata bool // metadata is valid
|
hash string // content_hash of the object
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
@ -640,7 +641,7 @@ func (f *Fs) DirMove(src fs.Fs, srcRemote, dstRemote string) error {
|
||||||
|
|
||||||
// Hashes returns the supported hash sets.
|
// Hashes returns the supported hash sets.
|
||||||
func (f *Fs) Hashes() fs.HashSet {
|
func (f *Fs) Hashes() fs.HashSet {
|
||||||
return fs.HashSet(fs.HashNone)
|
return fs.HashSet(fs.HashDropbox)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
@ -663,9 +664,16 @@ func (o *Object) Remote() string {
|
||||||
return o.remote
|
return o.remote
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hash is unsupported on Dropbox
|
// Hash returns the dropbox special hash
|
||||||
func (o *Object) Hash(t fs.HashType) (string, error) {
|
func (o *Object) Hash(t fs.HashType) (string, error) {
|
||||||
return "", fs.ErrHashUnsupported
|
if t != fs.HashDropbox {
|
||||||
|
return "", fs.ErrHashUnsupported
|
||||||
|
}
|
||||||
|
err := o.readMetaData()
|
||||||
|
if err != nil {
|
||||||
|
return "", errors.Wrap(err, "failed to read hash from metadata")
|
||||||
|
}
|
||||||
|
return o.hash, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Size returns the size of an object in bytes
|
// Size returns the size of an object in bytes
|
||||||
|
@ -679,7 +687,7 @@ func (o *Object) Size() int64 {
|
||||||
func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
|
func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
|
||||||
o.bytes = int64(info.Size)
|
o.bytes = int64(info.Size)
|
||||||
o.modTime = info.ClientModified
|
o.modTime = info.ClientModified
|
||||||
o.hasMetadata = true
|
o.hash = info.ContentHash
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -722,7 +730,7 @@ func (o *Object) metadataKey() string {
|
||||||
|
|
||||||
// readMetaData gets the info if it hasn't already been fetched
|
// readMetaData gets the info if it hasn't already been fetched
|
||||||
func (o *Object) readMetaData() (err error) {
|
func (o *Object) readMetaData() (err error) {
|
||||||
if o.hasMetadata {
|
if !o.modTime.IsZero() {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Last resort
|
// Last resort
|
||||||
|
|
16
fs/hash.go
16
fs/hash.go
|
@ -9,6 +9,7 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ncw/rclone/dropbox/dbhash"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -26,18 +27,23 @@ const (
|
||||||
// HashSHA1 indicates SHA-1 support
|
// HashSHA1 indicates SHA-1 support
|
||||||
HashSHA1
|
HashSHA1
|
||||||
|
|
||||||
|
// HashDropbox indicates Dropbox special hash
|
||||||
|
// https://www.dropbox.com/developers/reference/content-hash
|
||||||
|
HashDropbox
|
||||||
|
|
||||||
// HashNone indicates no hashes are supported
|
// HashNone indicates no hashes are supported
|
||||||
HashNone HashType = 0
|
HashNone HashType = 0
|
||||||
)
|
)
|
||||||
|
|
||||||
// SupportedHashes returns a set of all the supported hashes by
|
// SupportedHashes returns a set of all the supported hashes by
|
||||||
// HashStream and MultiHasher.
|
// HashStream and MultiHasher.
|
||||||
var SupportedHashes = NewHashSet(HashMD5, HashSHA1)
|
var SupportedHashes = NewHashSet(HashMD5, HashSHA1, HashDropbox)
|
||||||
|
|
||||||
// HashWidth returns the width in characters for any HashType
|
// HashWidth returns the width in characters for any HashType
|
||||||
var HashWidth = map[HashType]int{
|
var HashWidth = map[HashType]int{
|
||||||
HashMD5: 32,
|
HashMD5: 32,
|
||||||
HashSHA1: 40,
|
HashSHA1: 40,
|
||||||
|
HashDropbox: 64,
|
||||||
}
|
}
|
||||||
|
|
||||||
// HashStream will calculate hashes of all supported hash types.
|
// HashStream will calculate hashes of all supported hash types.
|
||||||
|
@ -73,6 +79,8 @@ func (h HashType) String() string {
|
||||||
return "MD5"
|
return "MD5"
|
||||||
case HashSHA1:
|
case HashSHA1:
|
||||||
return "SHA-1"
|
return "SHA-1"
|
||||||
|
case HashDropbox:
|
||||||
|
return "DropboxHash"
|
||||||
default:
|
default:
|
||||||
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
|
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
|
||||||
panic(err)
|
panic(err)
|
||||||
|
@ -94,6 +102,8 @@ func hashFromTypes(set HashSet) (map[HashType]hash.Hash, error) {
|
||||||
hashers[t] = md5.New()
|
hashers[t] = md5.New()
|
||||||
case HashSHA1:
|
case HashSHA1:
|
||||||
hashers[t] = sha1.New()
|
hashers[t] = sha1.New()
|
||||||
|
case HashDropbox:
|
||||||
|
hashers[t] = dbhash.New()
|
||||||
default:
|
default:
|
||||||
err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
|
err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
|
||||||
panic(err)
|
panic(err)
|
||||||
|
|
|
@ -65,16 +65,18 @@ var hashTestSet = []hashTest{
|
||||||
{
|
{
|
||||||
input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
|
input: []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
|
||||||
output: map[fs.HashType]string{
|
output: map[fs.HashType]string{
|
||||||
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
|
fs.HashMD5: "bf13fc19e5151ac57d4252e0e0f87abe",
|
||||||
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
|
fs.HashSHA1: "3ab6543c08a75f292a5ecedac87ec41642d12166",
|
||||||
|
fs.HashDropbox: "214d2fcf3566e94c99ad2f59bd993daca46d8521a0c447adf4b324f53fddc0c7",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
// Empty data set
|
// Empty data set
|
||||||
{
|
{
|
||||||
input: []byte{},
|
input: []byte{},
|
||||||
output: map[fs.HashType]string{
|
output: map[fs.HashType]string{
|
||||||
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
|
fs.HashMD5: "d41d8cd98f00b204e9800998ecf8427e",
|
||||||
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
|
fs.HashSHA1: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
|
||||||
|
fs.HashDropbox: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -88,13 +90,13 @@ func TestMultiHasher(t *testing.T) {
|
||||||
sums := mh.Sums()
|
sums := mh.Sums()
|
||||||
for k, v := range sums {
|
for k, v := range sums {
|
||||||
expect, ok := test.output[k]
|
expect, ok := test.output[k]
|
||||||
require.True(t, ok)
|
require.True(t, ok, "test output for hash not found")
|
||||||
assert.Equal(t, v, expect)
|
assert.Equal(t, v, expect)
|
||||||
}
|
}
|
||||||
// Test that all are present
|
// Test that all are present
|
||||||
for k, v := range test.output {
|
for k, v := range test.output {
|
||||||
expect, ok := sums[k]
|
expect, ok := sums[k]
|
||||||
require.True(t, ok)
|
require.True(t, ok, "test output for hash not found")
|
||||||
assert.Equal(t, v, expect)
|
assert.Equal(t, v, expect)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -145,8 +147,8 @@ func TestHashStreamTypes(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHashSetStringer(t *testing.T) {
|
func TestHashSetStringer(t *testing.T) {
|
||||||
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5)
|
h := fs.NewHashSet(fs.HashSHA1, fs.HashMD5, fs.HashDropbox)
|
||||||
assert.Equal(t, h.String(), "[MD5, SHA-1]")
|
assert.Equal(t, h.String(), "[MD5, SHA-1, DropboxHash]")
|
||||||
h = fs.NewHashSet(fs.HashSHA1)
|
h = fs.NewHashSet(fs.HashSHA1)
|
||||||
assert.Equal(t, h.String(), "[SHA-1]")
|
assert.Equal(t, h.String(), "[SHA-1]")
|
||||||
h = fs.NewHashSet()
|
h = fs.NewHashSet()
|
||||||
|
|
|
@ -978,6 +978,15 @@ func Sha1sum(f Fs, w io.Writer) error {
|
||||||
return hashLister(HashSHA1, f, w)
|
return hashLister(HashSHA1, f, w)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DropboxHashSum list the Fs to the supplied writer
|
||||||
|
//
|
||||||
|
// Obeys includes and excludes
|
||||||
|
//
|
||||||
|
// Lists in parallel which may get them out of order
|
||||||
|
func DropboxHashSum(f Fs, w io.Writer) error {
|
||||||
|
return hashLister(HashDropbox, f, w)
|
||||||
|
}
|
||||||
|
|
||||||
func hashLister(ht HashType, f Fs, w io.Writer) error {
|
func hashLister(ht HashType, f Fs, w io.Writer) error {
|
||||||
return ListFn(f, func(o Object) {
|
return ListFn(f, func(o Object) {
|
||||||
Stats.Checking(o.Remote())
|
Stats.Checking(o.Remote())
|
||||||
|
|
|
@ -386,7 +386,7 @@ func TestLsLong(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMd5sum(t *testing.T) {
|
func TestHashSums(t *testing.T) {
|
||||||
r := NewRun(t)
|
r := NewRun(t)
|
||||||
defer r.Finalise()
|
defer r.Finalise()
|
||||||
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
|
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
|
||||||
|
@ -394,6 +394,8 @@ func TestMd5sum(t *testing.T) {
|
||||||
|
|
||||||
fstest.CheckItems(t, r.fremote, file1, file2)
|
fstest.CheckItems(t, r.fremote, file1, file2)
|
||||||
|
|
||||||
|
// MD5 Sum
|
||||||
|
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
err := fs.Md5sum(r.fremote, &buf)
|
err := fs.Md5sum(r.fremote, &buf)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
@ -408,20 +410,12 @@ func TestMd5sum(t *testing.T) {
|
||||||
!strings.Contains(res, " potato2\n") {
|
!strings.Contains(res, " potato2\n") {
|
||||||
t.Errorf("potato2 missing: %q", res)
|
t.Errorf("potato2 missing: %q", res)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
func TestSha1sum(t *testing.T) {
|
// SHA1 Sum
|
||||||
r := NewRun(t)
|
|
||||||
defer r.Finalise()
|
|
||||||
file1 := r.WriteBoth("potato2", "------------------------------------------------------------", t1)
|
|
||||||
file2 := r.WriteBoth("empty space", "", t2)
|
|
||||||
|
|
||||||
fstest.CheckItems(t, r.fremote, file1, file2)
|
err = fs.Sha1sum(r.fremote, &buf)
|
||||||
|
|
||||||
var buf bytes.Buffer
|
|
||||||
err := fs.Sha1sum(r.fremote, &buf)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
res := buf.String()
|
res = buf.String()
|
||||||
if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709 empty space\n") &&
|
if !strings.Contains(res, "da39a3ee5e6b4b0d3255bfef95601890afd80709 empty space\n") &&
|
||||||
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
|
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
|
||||||
!strings.Contains(res, " empty space\n") {
|
!strings.Contains(res, " empty space\n") {
|
||||||
|
@ -432,6 +426,22 @@ func TestSha1sum(t *testing.T) {
|
||||||
!strings.Contains(res, " potato2\n") {
|
!strings.Contains(res, " potato2\n") {
|
||||||
t.Errorf("potato2 missing: %q", res)
|
t.Errorf("potato2 missing: %q", res)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dropbox Hash Sum
|
||||||
|
|
||||||
|
err = fs.DropboxHashSum(r.fremote, &buf)
|
||||||
|
require.NoError(t, err)
|
||||||
|
res = buf.String()
|
||||||
|
if !strings.Contains(res, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 empty space\n") &&
|
||||||
|
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
|
||||||
|
!strings.Contains(res, " empty space\n") {
|
||||||
|
t.Errorf("empty space missing: %q", res)
|
||||||
|
}
|
||||||
|
if !strings.Contains(res, "a979481df794fed9c3990a6a422e0b1044ac802c15fab13af9c687f8bdbee01a potato2\n") &&
|
||||||
|
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
|
||||||
|
!strings.Contains(res, " potato2\n") {
|
||||||
|
t.Errorf("potato2 missing: %q", res)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCount(t *testing.T) {
|
func TestCount(t *testing.T) {
|
||||||
|
|
Loading…
Reference in a new issue