From cc628717d80db5bc76a35c0afd30107ddf6ada50 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 5 Jul 2016 11:26:02 +0100 Subject: [PATCH] b2: Add --b2-versions flag so old versions can be listed and retreived. #420 --- b2/api/types.go | 59 ++++++++++++++++++++++++- b2/api/types_test.go | 87 ++++++++++++++++++++++++++++++++++++ b2/b2.go | 51 ++++++++++++++++++---- docs/content/b2.md | 102 ++++++++++++++++++++++++++++++++++++------- 4 files changed, 274 insertions(+), 25 deletions(-) create mode 100644 b2/api/types_test.go diff --git a/b2/api/types.go b/b2/api/types.go index e7da9e8ee..05acc6c27 100644 --- a/b2/api/types.go +++ b/b2/api/types.go @@ -2,7 +2,9 @@ package api import ( "fmt" + "path" "strconv" + "strings" "time" "github.com/ncw/rclone/fs" @@ -62,10 +64,65 @@ func (t *Timestamp) UnmarshalJSON(data []byte) error { if err != nil { return err } - *t = Timestamp(time.Unix(timestamp/1E3, (timestamp%1E3)*1E6)) + *t = Timestamp(time.Unix(timestamp/1E3, (timestamp%1E3)*1E6).UTC()) return nil } +const versionFormat = "-v2006-01-02-150405.000" + +// AddVersion adds the timestamp as a version string into the filename passed in. +func (t Timestamp) AddVersion(remote string) string { + ext := path.Ext(remote) + base := remote[:len(remote)-len(ext)] + s := (time.Time)(t).Format(versionFormat) + // Replace the '.' with a '-' + s = strings.Replace(s, ".", "-", -1) + return base + s + ext +} + +// RemoveVersion removes the timestamp from a filename as a version string. +// +// It returns the new file name and a timestamp, or the old filename +// and a zero timestamp. +func RemoveVersion(remote string) (t Timestamp, newRemote string) { + newRemote = remote + ext := path.Ext(remote) + base := remote[:len(remote)-len(ext)] + if len(base) < len(versionFormat) { + return + } + versionStart := len(base) - len(versionFormat) + // Check it ends in -xxx + if base[len(base)-4] != '-' { + return + } + // Replace with .xxx for parsing + base = base[:len(base)-4] + "." + base[len(base)-3:] + newT, err := time.Parse(versionFormat, base[versionStart:]) + if err != nil { + return + } + return Timestamp(newT), base[:versionStart] + ext +} + +// IsZero returns true if the timestamp is unitialised +func (t Timestamp) IsZero() bool { + return (time.Time)(t).IsZero() +} + +// Equal compares two timestamps +// +// If either are !IsZero then it returns false +func (t Timestamp) Equal(s Timestamp) bool { + if (time.Time)(t).IsZero() { + return false + } + if (time.Time)(s).IsZero() { + return false + } + return (time.Time)(t).Equal((time.Time)(s)) +} + // File is info about a file type File struct { ID string `json:"fileId"` // The unique identifier for this version of this file. Used with b2_get_file_info, b2_download_file_by_id, and b2_delete_file_version. diff --git a/b2/api/types_test.go b/b2/api/types_test.go new file mode 100644 index 000000000..dd1c23208 --- /dev/null +++ b/b2/api/types_test.go @@ -0,0 +1,87 @@ +package api_test + +import ( + "testing" + "time" + + "github.com/ncw/rclone/b2/api" + "github.com/ncw/rclone/fstest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + emptyT api.Timestamp + t0 = api.Timestamp(fstest.Time("1970-01-01T01:01:01.123456789Z")) + t0r = api.Timestamp(fstest.Time("1970-01-01T01:01:01.123000000Z")) + t1 = api.Timestamp(fstest.Time("2001-02-03T04:05:06.123000000Z")) +) + +func TestTimestampMarshalJSON(t *testing.T) { + resB, err := t0.MarshalJSON() + res := string(resB) + require.NoError(t, err) + assert.Equal(t, "3661123", res) + + resB, err = t1.MarshalJSON() + res = string(resB) + require.NoError(t, err) + assert.Equal(t, "981173106123", res) +} + +func TestTimestampUnmarshalJSON(t *testing.T) { + var tActual api.Timestamp + err := tActual.UnmarshalJSON([]byte("981173106123")) + require.NoError(t, err) + assert.Equal(t, (time.Time)(t1), (time.Time)(tActual)) +} + +func TestTimestampAddVersion(t *testing.T) { + for _, test := range []struct { + t api.Timestamp + in string + expected string + }{ + {t0, "potato.txt", "potato-v1970-01-01-010101-123.txt"}, + {t1, "potato", "potato-v2001-02-03-040506-123"}, + {t1, "", "-v2001-02-03-040506-123"}, + } { + actual := test.t.AddVersion(test.in) + assert.Equal(t, test.expected, actual, test.in) + } +} + +func TestTimestampRemoveVersion(t *testing.T) { + for _, test := range []struct { + in string + expectedT api.Timestamp + expectedRemote string + }{ + {"potato.txt", emptyT, "potato.txt"}, + {"potato-v1970-01-01-010101-123.txt", t0r, "potato.txt"}, + {"potato-v2001-02-03-040506-123", t1, "potato"}, + {"-v2001-02-03-040506-123", t1, ""}, + {"potato-v2A01-02-03-040506-123", emptyT, "potato-v2A01-02-03-040506-123"}, + {"potato-v2001-02-03-040506=123", emptyT, "potato-v2001-02-03-040506=123"}, + } { + actualT, actualRemote := api.RemoveVersion(test.in) + assert.Equal(t, test.expectedT, actualT, test.in) + assert.Equal(t, test.expectedRemote, actualRemote, test.in) + } +} + +func TestTimestampIsZero(t *testing.T) { + assert.True(t, emptyT.IsZero()) + assert.False(t, t0.IsZero()) + assert.False(t, t1.IsZero()) +} + +func TestTimestampEqual(t *testing.T) { + assert.False(t, emptyT.Equal(emptyT)) + assert.False(t, t0.Equal(emptyT)) + assert.False(t, emptyT.Equal(t0)) + assert.False(t, t0.Equal(t1)) + assert.False(t, t1.Equal(t0)) + assert.True(t, t0.Equal(t0)) + assert.True(t, t1.Equal(t1)) +} diff --git a/b2/b2.go b/b2/b2.go index ee5920972..cd2ecef91 100644 --- a/b2/b2.go +++ b/b2/b2.go @@ -41,14 +41,17 @@ const ( maxSleep = 2 * time.Second decayConstant = 1 // bigger for slower decay, exponential maxParts = 10000 + maxVersions = 100 // maximum number of versions we search in --b2-versions mode ) // Globals var ( - minChunkSize = fs.SizeSuffix(100E6) - chunkSize = fs.SizeSuffix(96 * 1024 * 1024) - uploadCutoff = fs.SizeSuffix(200E6) - b2TestMode = pflag.StringP("b2-test-mode", "", "", "A flag string for X-Bz-Test-Mode header.") + minChunkSize = fs.SizeSuffix(100E6) + chunkSize = fs.SizeSuffix(96 * 1024 * 1024) + uploadCutoff = fs.SizeSuffix(200E6) + b2TestMode = pflag.StringP("b2-test-mode", "", "", "A flag string for X-Bz-Test-Mode header.") + b2Versions = pflag.BoolP("b2-versions", "", false, "Include old versions in directory listings.") + errNotWithVersions = errors.New("can't modify or delete files in --b2-versions mode") ) // Register with Fs @@ -528,7 +531,8 @@ func (f *Fs) list(dir string, level int, prefix string, limit int, hidden bool, func (f *Fs) listFiles(out fs.ListOpts, dir string) { defer out.Finished() // List the objects - err := f.list(dir, out.Level(), "", 0, false, func(remote string, object *api.File, isDirectory bool) error { + last := "" + err := f.list(dir, out.Level(), "", 0, *b2Versions, func(remote string, object *api.File, isDirectory bool) error { if isDirectory { dir := &fs.Dir{ Name: remote, @@ -539,6 +543,15 @@ func (f *Fs) listFiles(out fs.ListOpts, dir string) { return fs.ErrorListAborted } } else { + if remote == last { + remote = object.UploadTimestamp.AddVersion(remote) + } else { + last = remote + } + // hide objects represent deleted files which we don't list + if object.Action == "hide" { + return nil + } o, err := f.newObjectWithInfo(remote, object) if err != nil { return err @@ -914,12 +927,22 @@ func (o *Object) readMetaData() (err error) { if o.id != "" { return nil } + maxSearched := 1 + var timestamp api.Timestamp + baseRemote := o.remote + if *b2Versions { + timestamp, baseRemote = api.RemoveVersion(baseRemote) + maxSearched = maxVersions + } var info *api.File - err = o.fs.list("", fs.MaxLevel, o.remote, 1, false, func(remote string, object *api.File, isDirectory bool) error { + err = o.fs.list("", fs.MaxLevel, baseRemote, maxSearched, *b2Versions, func(remote string, object *api.File, isDirectory bool) error { if isDirectory { return nil } - if remote == o.remote { + if remote == baseRemote { + if !timestamp.IsZero() && !timestamp.Equal(object.UploadTimestamp) { + return nil + } info = object } return errEndList // read only 1 item @@ -1046,7 +1069,13 @@ func (o *Object) Open() (in io.ReadCloser, err error) { opts := rest.Opts{ Method: "GET", Absolute: true, - Path: o.fs.info.DownloadURL + "/file/" + urlEncode(o.fs.bucket) + "/" + urlEncode(o.fs.root+o.remote), + Path: o.fs.info.DownloadURL, + } + // Download by id if set otherwise by name + if o.id != "" { + opts.Path += "/b2api/v1/b2_download_file_by_id?fileId=" + urlEncode(o.id) + } else { + opts.Path += "/file/" + urlEncode(o.fs.bucket) + "/" + urlEncode(o.fs.root+o.remote) } var resp *http.Response err = o.fs.pacer.Call(func() (bool, error) { @@ -1108,6 +1137,9 @@ func urlEncode(in string) string { // // The new object may have been created if an error is returned func (o *Object) Update(in io.Reader, src fs.ObjectInfo) (err error) { + if *b2Versions { + return errNotWithVersions + } size := src.Size() // If a large file upload in chunks - see upload.go @@ -1256,6 +1288,9 @@ func (o *Object) Update(in io.Reader, src fs.ObjectInfo) (err error) { // Remove an object func (o *Object) Remove() error { + if *b2Versions { + return errNotWithVersions + } bucketID, err := o.fs.getBucketID() if err != nil { return err diff --git a/docs/content/b2.md b/docs/content/b2.md index f09ceea77..afbc6324f 100644 --- a/docs/content/b2.md +++ b/docs/content/b2.md @@ -109,22 +109,6 @@ will be used in the syncing process. You can use the `--checksum` flag. Large files which are uploaded in chunks will store their SHA1 on the object as `X-Bz-Info-large_file_sha1` as recommended by Backblaze. -### Versions ### - -When rclone uploads a new version of a file it creates a [new version -of it](https://www.backblaze.com/b2/docs/file_versions.html). -Likewise when you delete a file, the old version will still be -available. - -The old versions of files are visible in the B2 web interface, but not -via rclone yet. - -If you wish to remove all the old versions then you can use the -`rclone cleanup remote:bucket` command which will delete all the old -versions of files, leaving the current ones intact. - -When you `purge` a bucket, all the old versions will be deleted also. - ### Transfers ### Backblaze recommends that you do lots of transfers simultaneously for @@ -135,6 +119,64 @@ depending on your hardware, how big the files are, how much you want to load your computer, etc. The default of `--transfers 4` is definitely too low for Backblaze B2 though. +### Versions ### + +When rclone uploads a new version of a file it creates a [new version +of it](https://www.backblaze.com/b2/docs/file_versions.html). +Likewise when you delete a file, the old version will still be +available. + +Old versions of files are visible using the `--b2-versions` flag. + +If you wish to remove all the old versions then you can use the +`rclone cleanup remote:bucket` command which will delete all the old +versions of files, leaving the current ones intact. You can also +supply a path and only old versions under that path will be deleted, +eg `rclone cleanup remote:bucket/path/to/stuff`. + +When you `purge` a bucket, the current and the old versions will be +deleted then the bucket will be deleted. + +However `delete` will cause the current versions of the files to +become hidden old versions. + +Here is a session showing the listing and and retreival of an old +version followed by a `cleanup` of the old versions. + +Show current version and all the versions with `--b2-versions` flag. + +``` +$ rclone -q ls b2:cleanup-test + 9 one.txt + +$ rclone -q --b2-versions ls b2:cleanup-test + 9 one.txt + 8 one-v2016-07-04-141032-000.txt + 16 one-v2016-07-04-141003-000.txt + 15 one-v2016-07-02-155621-000.txt +``` + +Retreive an old verson + +``` +$ rclone -q --b2-versions copy b2:cleanup-test/one-v2016-07-04-141003-000.txt /tmp + +$ ls -l /tmp/one-v2016-07-04-141003-000.txt +-rw-rw-r-- 1 ncw ncw 16 Jul 2 17:46 /tmp/one-v2016-07-04-141003-000.txt +``` + +Clean up all the old versions and show that they've gone. + +``` +$ rclone -q cleanup b2:cleanup-test + +$ rclone -q ls b2:cleanup-test + 9 one.txt + +$ rclone -q --b2-versions ls b2:cleanup-test + 9 one.txt +``` + ### Specific options ### Here are the command line options specific to this cloud storage @@ -167,3 +209,31 @@ specific errors for debugging purposes. These will be set in the `X-Bz-Test-Mode` header which is documented in the [b2 integrations checklist](https://www.backblaze.com/b2/docs/integration_checklist.html). + +#### --b2-versions #### + +When set rclone will show and act on older versions of files. For example + +Listing without `--b2-versions` + +``` +$ rclone -q ls b2:cleanup-test + 9 one.txt +``` + +And with + +``` +$ rclone -q --b2-versions ls b2:cleanup-test + 9 one.txt + 8 one-v2016-07-04-141032-000.txt + 16 one-v2016-07-04-141003-000.txt + 15 one-v2016-07-02-155621-000.txt +``` + +Showing that the current version is unchanged but older versions can +be seen. These have the UTC date that they were uploaded to the +server to the nearest millisecond appended to them. + +Note that when using `--b2-versions` no file write operations are +permitted, so you can't upload files or delete them.