From 1d9e76bb0f5ca68ed79ea0c7eb05b97f0b7b361b Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sun, 16 Aug 2015 23:24:34 +0100 Subject: [PATCH] dropbox: remove datastore - Fixes #55 #84 This means that dropbox no longer stores MD5SUMs and modified times. Fixup the tests so that blank MD5SUMs are ignored, and that if Precision is set to a fs.ModTimeNotSupported, ModTimes can be ignored too. This opens the door for other FSs which don't support metadata easily. --- docs/content/dropbox.md | 16 +-- dropbox/dropbox.go | 293 +++----------------------------------- fs/fs.go | 4 + fs/operations.go | 14 +- fs/operations_test.go | 41 ++++-- fstest/fstest.go | 15 +- fstest/fstests/fstests.go | 7 +- 7 files changed, 91 insertions(+), 299 deletions(-) diff --git a/docs/content/dropbox.md b/docs/content/dropbox.md index 728b43270..fa2e41cdf 100644 --- a/docs/content/dropbox.md +++ b/docs/content/dropbox.md @@ -71,22 +71,12 @@ To copy a local directory to a dropbox directory called backup rclone copy /home/source remote:backup -### Modified time ### +### Modified time and MD5SUMs ### -Md5sums and timestamps in RFC3339 format accurate to 1ns are stored in -a Dropbox datastore called "rclone". +Dropbox doesn't have the capability of storing modification times or +MD5SUMs so syncs will effectively have the `--size-only` flag set. ### Limitations ### -Dropbox datastores are limited to 100,000 rows so this is the maximum -number of files rclone can manage on Dropbox. - Note that Dropbox is case sensitive so you can't have a file called "Hello.doc" and one called "hello.doc". - -If you use the desktop sync tool and rclone on the same files then the -md5sums and modification times may get out of sync as far as rclone is -concerned. This will cause `Corrupted on transfer: md5sums differ` -error message when fetching files. You can work around this by using -the `--size-only` flag to ignore the md5sums and modification times -for these files. diff --git a/dropbox/dropbox.go b/dropbox/dropbox.go index 547eb2906..f327bba62 100644 --- a/dropbox/dropbox.go +++ b/dropbox/dropbox.go @@ -6,11 +6,6 @@ Limitations of dropbox File system is case insensitive -The datastore is limited to 100,000 records which therefore is the -limit of the number of files that rclone can use on dropbox. - -FIXME only open datastore if we need it? - FIXME Getting this sometimes Failed to copy: Upload failed: invalid character '<' looking for beginning of value This is a JSON decode error - from Update / UploadByChunk @@ -38,10 +33,10 @@ import ( "errors" "fmt" "io" + "io/ioutil" "log" "path" "strings" - "sync" "time" "github.com/ncw/rclone/fs" @@ -50,17 +45,10 @@ import ( // Constants const ( - rcloneAppKey = "5jcck7diasz0rqy" - rcloneAppSecret = "1n9m04y2zx7bf26" - uploadChunkSize = 64 * 1024 // chunk size for upload - metadataLimit = dropbox.MetadataLimitDefault // max items to fetch at once - datastoreName = "rclone" - tableName = "metadata" - md5sumField = "md5sum" - mtimeField = "mtime" - maxCommitRetries = 5 - timeFormatIn = time.RFC3339 - timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00" + rcloneAppKey = "5jcck7diasz0rqy" + rcloneAppSecret = "1n9m04y2zx7bf26" + uploadChunkSize = 64 * 1024 // chunk size for upload + metadataLimit = dropbox.MetadataLimitDefault // max items to fetch at once ) // Register with Fs @@ -111,24 +99,19 @@ func configHelper(name string) { // FsDropbox represents a remote dropbox server type FsDropbox struct { - db *dropbox.Dropbox // the connection to the dropbox server - root string // the path we are working on - slashRoot string // root with "/" prefix, lowercase - slashRootSlash string // root with "/" prefix and postfix, lowercase - datastoreManager *dropbox.DatastoreManager - datastore *dropbox.Datastore - table *dropbox.Table - datastoreMutex sync.Mutex // lock this when using the datastore - datastoreErr error // pending errors on the datastore + db *dropbox.Dropbox // the connection to the dropbox server + root string // the path we are working on + slashRoot string // root with "/" prefix, lowercase + slashRootSlash string // root with "/" prefix and postfix, lowercase } // FsObjectDropbox describes a dropbox object type FsObjectDropbox struct { - dropbox *FsDropbox // what this object is part of - remote string // The remote path - md5sum string // md5sum of the object - bytes int64 // size of the object - modTime time.Time // time it was last modified + dropbox *FsDropbox // what this object is part of + remote string // The remote path + bytes int64 // size of the object + modTime time.Time // time it was last modified + hasMetadata bool // metadata is valid } // ------------------------------------------------------------ @@ -170,12 +153,6 @@ func NewFs(name, root string) (fs.Fs, error) { // Authorize the client db.SetAccessToken(token) - // Make a db to store rclone metadata in - f.datastoreManager = db.NewDatastoreManager() - - // Open the datastore in the background - go f.openDataStore() - // See if the root is actually an object entry, err := f.db.Metadata(f.slashRoot, false, false, "", "", metadataLimit) if err == nil && !entry.IsDir { @@ -205,30 +182,6 @@ func (f *FsDropbox) setRoot(root string) { } } -// Opens the datastore in f -func (f *FsDropbox) openDataStore() { - f.datastoreMutex.Lock() - defer f.datastoreMutex.Unlock() - fs.Debug(f, "Open rclone datastore") - // Open the rclone datastore - var err error - f.datastore, err = f.datastoreManager.OpenDatastore(datastoreName) - if err != nil { - fs.Log(f, "Failed to open datastore: %v", err) - f.datastoreErr = err - return - } - - // Get the table we are using - f.table, err = f.datastore.GetTable(tableName) - if err != nil { - fs.Log(f, "Failed to open datastore table: %v", err) - f.datastoreErr = err - return - } - fs.Debug(f, "Open rclone datastore finished") -} - // Return an FsObject from a path // // May return nil if an error occurred @@ -295,13 +248,6 @@ func (f *FsDropbox) list(out fs.ObjectsChan) { entry := deltaEntry.Entry if entry == nil { // This notifies of a deleted object - fs.Debug(f, "Deleting metadata for %q", deltaEntry.Path) - key := metadataKey(deltaEntry.Path) // Path is lowercased - err := f.deleteMetadata(key) - if err != nil { - fs.Debug(f, "Failed to delete metadata for %q", deltaEntry.Path) - // Don't accumulate Error here - } } else { if len(entry.Path) <= 1 || entry.Path[0] != '/' { fs.Stats.Error() @@ -452,8 +398,8 @@ func (f *FsDropbox) Rmdir() error { } // Return the precision -func (fs *FsDropbox) Precision() time.Duration { - return time.Nanosecond +func (f *FsDropbox) Precision() time.Duration { + return fs.ModTimeNotSupported } // Purge deletes all the files and the container @@ -462,81 +408,11 @@ func (fs *FsDropbox) Precision() time.Duration { // deleting all the files quicker than just running Remove() on the // result of List() func (f *FsDropbox) Purge() error { - // Delete metadata first - var wg sync.WaitGroup - to_be_deleted := f.List() - wg.Add(fs.Config.Transfers) - for i := 0; i < fs.Config.Transfers; i++ { - go func() { - defer wg.Done() - for dst := range to_be_deleted { - o := dst.(*FsObjectDropbox) - o.deleteMetadata() - } - }() - } - wg.Wait() - // Let dropbox delete the filesystem tree _, err := f.db.Delete(f.slashRoot) return err } -// Tries the transaction in fn then calls commit, repeating until retry limit -// -// Holds datastore mutex while in progress -func (f *FsDropbox) transaction(fn func() error) error { - f.datastoreMutex.Lock() - defer f.datastoreMutex.Unlock() - if f.datastoreErr != nil { - return f.datastoreErr - } - var err error - for i := 1; i <= maxCommitRetries; i++ { - err = fn() - if err != nil { - return err - } - - err = f.datastore.Commit() - if err == nil { - break - } - fs.Debug(f, "Retrying transaction %d/%d", i, maxCommitRetries) - } - if err != nil { - return fmt.Errorf("Failed to commit metadata changes: %s", err) - } - return nil -} - -// Deletes the medadata associated with this key -func (f *FsDropbox) deleteMetadata(key string) error { - return f.transaction(func() error { - record, err := f.table.Get(key) - if err != nil { - return fmt.Errorf("Couldn't get record: %s", err) - } - if record == nil { - return nil - } - record.DeleteRecord() - return nil - }) -} - -// Reads the record attached to key -// -// Holds datastore mutex while in progress -func (f *FsDropbox) readRecord(key string) (*dropbox.Record, error) { - f.datastoreMutex.Lock() - defer f.datastoreMutex.Unlock() - if f.datastoreErr != nil { - return nil, f.datastoreErr - } - return f.table.Get(key) -} - // ------------------------------------------------------------ // Return the parent Fs @@ -558,33 +434,8 @@ func (o *FsObjectDropbox) Remote() string { } // Md5sum returns the Md5sum of an object returning a lowercase hex string -// -// FIXME has to download the file! func (o *FsObjectDropbox) Md5sum() (string, error) { - if o.md5sum != "" { - return o.md5sum, nil - } - err := o.readMetaData() - if err != nil { - fs.Log(o, "Failed to read metadata: %s", err) - return "", fmt.Errorf("Failed to read metadata: %s", err) - - } - - // For pre-existing files which have no md5sum can read it and set it? - - // in, err := o.Open() - // if err != nil { - // return "", err - // } - // defer in.Close() - // hash := md5.New() - // _, err = io.Copy(hash, in) - // if err != nil { - // return "", err - // } - // o.md5sum = fmt.Sprintf("%x", hash.Sum(nil)) - return o.md5sum, nil + return "", nil } // Size returns the size of an object in bytes @@ -598,6 +449,7 @@ func (o *FsObjectDropbox) Size() int64 { func (o *FsObjectDropbox) setMetadataFromEntry(info *dropbox.Entry) { o.bytes = info.Bytes o.modTime = time.Time(info.ClientMtime) + o.hasMetadata = true } // Reads the entry from dropbox @@ -645,55 +497,9 @@ func (o *FsObjectDropbox) metadataKey() string { // readMetaData gets the info if it hasn't already been fetched func (o *FsObjectDropbox) readMetaData() (err error) { - if o.md5sum != "" { + if o.hasMetadata { return nil } - - // fs.Debug(o, "Reading metadata from datastore") - record, err := o.dropbox.readRecord(o.metadataKey()) - if err != nil { - fs.Debug(o, "Couldn't read metadata: %s", err) - record = nil - } - - if record != nil { - // Read md5sum - md5sumInterface, ok, err := record.Get(md5sumField) - if err != nil { - return err - } - if !ok { - fs.Debug(o, "Couldn't find md5sum in record") - } else { - md5sum, ok := md5sumInterface.(string) - if !ok { - fs.Debug(o, "md5sum not a string") - } else { - o.md5sum = md5sum - } - } - - // read mtime - mtimeInterface, ok, err := record.Get(mtimeField) - if err != nil { - return err - } - if !ok { - fs.Debug(o, "Couldn't find mtime in record") - } else { - mtime, ok := mtimeInterface.(string) - if !ok { - fs.Debug(o, "mtime not a string") - } else { - modTime, err := time.Parse(timeFormatIn, mtime) - if err != nil { - return err - } - o.modTime = modTime - } - } - } - // Last resort return o.readEntryAndSetMetadata() } @@ -711,59 +517,12 @@ func (o *FsObjectDropbox) ModTime() time.Time { return o.modTime } -// Sets the modification time of the local fs object into the record -// FIXME if we don't set md5sum what will that do? -func (o *FsObjectDropbox) setModTimeAndMd5sum(modTime time.Time, md5sum string) error { - key := o.metadataKey() - // fs.Debug(o, "Writing metadata to datastore") - return o.dropbox.transaction(func() error { - record, err := o.dropbox.table.GetOrInsert(key) - if err != nil { - return fmt.Errorf("Couldn't read record: %s", err) - } - - if md5sum != "" { - err = record.Set(md5sumField, md5sum) - if err != nil { - return fmt.Errorf("Couldn't set md5sum record: %s", err) - } - o.md5sum = md5sum - } - - if !modTime.IsZero() { - mtime := modTime.Format(timeFormatOut) - err := record.Set(mtimeField, mtime) - if err != nil { - return fmt.Errorf("Couldn't set mtime record: %s", err) - } - o.modTime = modTime - } - - return nil - }) -} - -// Deletes the medadata associated with this file -// -// It logs any errors -func (o *FsObjectDropbox) deleteMetadata() { - fs.Debug(o, "Deleting metadata from datastore") - err := o.dropbox.deleteMetadata(o.metadataKey()) - if err != nil { - fs.ErrorLog(o, "Error deleting metadata: %v", err) - fs.Stats.Error() - } -} - // Sets the modification time of the local fs object // // Commits the datastore func (o *FsObjectDropbox) SetModTime(modTime time.Time) { - err := o.setModTimeAndMd5sum(modTime, "") - if err != nil { - fs.Stats.Error() - fs.ErrorLog(o, err.Error()) - } + // FIXME not implemented + return } // Is this object storable @@ -783,22 +542,16 @@ func (o *FsObjectDropbox) Open() (in io.ReadCloser, err error) { // // The new object may have been created if an error is returned func (o *FsObjectDropbox) Update(in io.Reader, modTime time.Time, size int64) error { - // Calculate md5sum as we upload it - hash := md5.New() - rc := &readCloser{in: io.TeeReader(in, hash)} - entry, err := o.dropbox.db.UploadByChunk(rc, uploadChunkSize, o.remotePath(), true, "") + entry, err := o.dropbox.db.UploadByChunk(ioutil.NopCloser(in), uploadChunkSize, o.remotePath(), true, "") if err != nil { return fmt.Errorf("Upload failed: %s", err) } o.setMetadataFromEntry(entry) - - md5sum := fmt.Sprintf("%x", hash.Sum(nil)) - return o.setModTimeAndMd5sum(modTime, md5sum) + return nil } // Remove an object func (o *FsObjectDropbox) Remove() error { - o.deleteMetadata() _, err := o.dropbox.db.Delete(o.remotePath()) return err } diff --git a/fs/fs.go b/fs/fs.go index 2180a06f7..72aab51cf 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -14,6 +14,8 @@ import ( const ( // User agent for Fs which can set it UserAgent = "rclone/" + Version + // Very large precision value to show mod time isn't supported + ModTimeNotSupported = 100 * 365 * 24 * time.Hour ) // Globals @@ -107,9 +109,11 @@ type Object interface { Remote() string // Md5sum returns the md5 checksum of the file + // If no Md5sum is available it returns "" Md5sum() (string, error) // ModTime returns the modification date of the file + // It should return a best guess if one isn't available ModTime() time.Time // SetModTime sets the metadata on the object to set the modification date diff --git a/fs/operations.go b/fs/operations.go index dd697b7bb..b058b151b 100644 --- a/fs/operations.go +++ b/fs/operations.go @@ -27,6 +27,14 @@ func CalculateModifyWindow(fs ...Fs) { Debug(fs[0], "Modify window is %s\n", Config.ModifyWindow) } +// Md5sumsEqual checks to see if src == dst, but ignores empty strings +func Md5sumsEqual(src, dst string) bool { + if src == "" || dst == "" { + return true + } + return src == dst +} + // Check the two files to see if the MD5sums are the same // // May return an error which will already have been logged @@ -47,7 +55,7 @@ func CheckMd5sums(src, dst Object) (bool, error) { } // Debug("Src MD5 %s", srcMd5) // Debug("Dst MD5 %s", obj.Hash) - return srcMd5 == dstMd5, nil + return Md5sumsEqual(srcMd5, dstMd5), nil } // Checks to see if the src and dst objects are equal by looking at @@ -203,7 +211,7 @@ tryAgain: if md5sumErr != nil { Stats.Error() ErrorLog(dst, "Failed to read md5sum: %s", md5sumErr) - } else if dstMd5sum != "" && srcMd5sum != dstMd5sum { + } else if !Md5sumsEqual(srcMd5sum, dstMd5sum) { Stats.Error() err = fmt.Errorf("Corrupted on transfer: md5sums differ %q vs %q", srcMd5sum, dstMd5sum) ErrorLog(dst, "%s", err) @@ -525,7 +533,7 @@ func Md5sum(f Fs, w io.Writer) error { Stats.DoneChecking(o) if err != nil { Debug(o, "Failed to read MD5: %v", err) - md5sum = "UNKNOWN" + md5sum = "ERROR" } syncFprintf(w, "%32s %s\n", md5sum, o.Remote()) }) diff --git a/fs/operations_test.go b/fs/operations_test.go index 44a7894c7..1a60779ef 100644 --- a/fs/operations_test.go +++ b/fs/operations_test.go @@ -398,14 +398,37 @@ func TestLsLong(t *testing.T) { t.Fatalf("List failed: %v", err) } res := buf.String() - timeFormat := "2006-01-02 15:04:05" - m1 := regexp.MustCompile(`(?m)^ 0 ` + t2.Local().Format(timeFormat) + `\.\d{9} empty space$`) - if !m1.MatchString(res) { - t.Errorf("empty space missing: %q", res) + lines := strings.Split(strings.Trim(res, "\n"), "\n") + if len(lines) != 2 { + t.Fatalf("Wrong number of lines in list: %q", lines) } - m2 := regexp.MustCompile(`(?m)^ 60 ` + t1.Local().Format(timeFormat) + `\.\d{9} potato2$`) - if !m2.MatchString(res) { + + timeFormat := "2006-01-02 15:04:05.000000000" + precision := fremote.Precision() + checkTime := func(m, filename string, expected time.Time) { + modTime, err := time.Parse(timeFormat, m) + if err != nil { + t.Errorf("Error parsing %q: %v", m, err) + } else { + dt, ok := fstest.CheckTimeEqualWithPrecision(expected, modTime, precision) + if !ok { + t.Errorf("%s: Modification time difference too big |%s| > %s (%s vs %s) (precision %s)", filename, dt, precision, modTime, expected, precision) + } + } + } + + m1 := regexp.MustCompile(`(?m)^ 0 (\d{4}-\d\d-\d\d \d\d:\d\d:\d\d\.\d{9}) empty space$`) + if ms := m1.FindStringSubmatch(res); ms == nil { + t.Errorf("empty space missing: %q", res) + } else { + checkTime(ms[1], "empty space", t2.Local()) + } + + m2 := regexp.MustCompile(`(?m)^ 60 (\d{4}-\d\d-\d\d \d\d:\d\d:\d\d\.\d{9}) potato2$`) + if ms := m2.FindStringSubmatch(res); ms == nil { t.Errorf("potato2 missing: %q", res) + } else { + checkTime(ms[1], "potato2", t1.Local()) } } @@ -416,10 +439,12 @@ func TestMd5sum(t *testing.T) { t.Fatalf("List failed: %v", err) } res := buf.String() - if !strings.Contains(res, "d41d8cd98f00b204e9800998ecf8427e empty space\n") { + if !strings.Contains(res, "d41d8cd98f00b204e9800998ecf8427e empty space\n") && + !strings.Contains(res, " empty space\n") { t.Errorf("empty space missing: %q", res) } - if !strings.Contains(res, "6548b156ea68a4e003e786df99eee76 potato2\n") { + if !strings.Contains(res, "6548b156ea68a4e003e786df99eee76 potato2\n") && + !strings.Contains(res, " potato2\n") { t.Errorf("potato2 missing: %q", res) } } diff --git a/fstest/fstest.go b/fstest/fstest.go index b2b2c65a8..06686bda3 100644 --- a/fstest/fstest.go +++ b/fstest/fstest.go @@ -30,10 +30,19 @@ type Item struct { Size int64 } +// Checks the times are equal within the precision, returns the delta and a flag +func CheckTimeEqualWithPrecision(t0, t1 time.Time, precision time.Duration) (time.Duration, bool) { + dt := t0.Sub(t1) + if dt >= precision || dt <= -precision { + return dt, false + } + return dt, true +} + // check the mod time to the given precision func (i *Item) CheckModTime(t *testing.T, obj fs.Object, modTime time.Time, precision time.Duration) { - dt := modTime.Sub(i.ModTime) - if dt >= precision || dt <= -precision { + dt, ok := CheckTimeEqualWithPrecision(modTime, i.ModTime, precision) + if !ok { t.Errorf("%s: Modification time difference too big |%s| > %s (%s vs %s) (precision %s)", obj.Remote(), dt, precision, modTime, i.ModTime, precision) } } @@ -47,7 +56,7 @@ func (i *Item) Check(t *testing.T, obj fs.Object, precision time.Duration) { if err != nil { t.Fatalf("Failed to read md5sum for %q: %v", obj.Remote(), err) } - if i.Md5sum != Md5sum { + if !fs.Md5sumsEqual(i.Md5sum, Md5sum) { t.Errorf("%s: Md5sum incorrect - expecting %q got %q", obj.Remote(), i.Md5sum, Md5sum) } if i.Size != obj.Size() { diff --git a/fstest/fstests/fstests.go b/fstest/fstests/fstests.go index dcae74546..134426911 100644 --- a/fstest/fstests/fstests.go +++ b/fstest/fstests/fstests.go @@ -258,6 +258,9 @@ func TestFsRmdirFull(t *testing.T) { func TestFsPrecision(t *testing.T) { skipIfNotOk(t) precision := remote.Precision() + if precision == fs.ModTimeNotSupported { + return + } if precision > time.Second || precision < 0 { t.Fatalf("Precision out of range %v", precision) } @@ -301,7 +304,7 @@ func TestObjectMd5sum(t *testing.T) { if err != nil { t.Errorf("Error in Md5sum: %v", err) } - if Md5sum != file1.Md5sum { + if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) { t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum) } } @@ -351,7 +354,7 @@ func TestObjectOpen(t *testing.T) { t.Fatalf("in.Close() return error: %v", err) } Md5sum := hex.EncodeToString(hash.Sum(nil)) - if Md5sum != file1.Md5sum { + if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) { t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum) } }