forked from TrueCloudLab/rclone
bisync: support files with unknown length, including Google Docs - fixes #5696
Before this change, bisync intentionally ignored Google Docs (albeit in a buggy way that caused problems during --resync.) After this change, Google Docs (including Google Sheets, Slides, etc.) are now supported in bisync, subject to the same options, defaults, and limitations as in `rclone sync`. When bisyncing drive with non-drive backends, the drive -> non-drive direction is controlled by `--drive-export-formats` (default `"docx,xlsx,pptx,svg"`) and the non-drive -> drive direction is controlled by `--drive-import-formats` (default none.) For example, with the default export/import formats, a Google Sheet on the drive side will be synced to an `.xlsx` file on the non-drive side. In the reverse direction, `.xlsx` files with filenames that match an existing Google Sheet will be synced to that Google Sheet, while `.xlsx` files that do NOT match an existing Google Sheet will be copied to drive as normal `.xlsx` files (without conversion to Sheets, although the Google Drive web browser UI may still give you the option to open it as one.) If `--drive-import-formats` is set (it's not, by default), then all of the specified formats will be converted to Google Docs, if there is no existing Google Doc with a matching name. Caution: such conversion can be quite lossy, and in most cases it's probably not what you want! To bisync Google Docs as URL shortcut links (in a manner similar to "Drive for Desktop"), use: `--drive-export-formats url` (or alternatives.) Note that these link files cannot be edited on the non-drive side -- you will get errors if you try to sync an edited link file back to drive. They CAN be deleted (it will result in deleting the corresponding Google Doc.) If you create a `.url` file on the non-drive side that does not match an existing Google Doc, bisyncing it will just result in copying the literal `.url` file over to drive (no Google Doc will be created.) So, as a general rule of thumb, think of them as read-only placeholders on the non-drive side, and make all your changes on the drive side. Likewise, even with other export-formats, it is best to only move/rename Google Docs on the drive side. This is because otherwise, bisync will interpret this as a file deleted and another created, and accordingly, it will delete the Google Doc and create a new file at the new path. (Whether or not that new file is a Google Doc depends on `--drive-import-formats`.) Lastly, take note that all Google Docs on the drive side have a size of `-1` and no checksum. Therefore, they cannot be reliably synced with the `--checksum` or `--size-only` flags. (To be exact: they will still get created/deleted, and bisync's delta engine will notice changes and queue them for syncing, but the underlying sync function will consider them identical and skip them.) To work around this, use the default (modtime and size) instead of `--checksum` or `--size-only`. To ignore Google Docs entirely, use `--drive-skip-gdocs`. Nearly all of the Google Docs logic is outsourced to the Drive backend, so future changes should also be supported by bisync.
This commit is contained in:
parent
4d5d6ee61b
commit
9cf783677e
5 changed files with 68 additions and 21 deletions
|
@ -36,7 +36,7 @@ const ListingHeader = "# bisync listing v1 from"
|
|||
// id: "-" (reserved)
|
||||
const lineFormat = "%s %8d %s %s %s %q\n"
|
||||
|
||||
var lineRegex = regexp.MustCompile(`^(\S) +(\d+) (\S+) (\S+) (\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d{9}[+-]\d{4}) (".+")$`)
|
||||
var lineRegex = regexp.MustCompile(`^(\S) +(-?\d+) (\S+) (\S+) (\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d{9}[+-]\d{4}) (".+")$`)
|
||||
|
||||
// timeFormat defines time format used in listings
|
||||
const timeFormat = "2006-01-02T15:04:05.000000000-0700"
|
||||
|
@ -237,7 +237,6 @@ func (ls *fileList) save(ctx context.Context, listing string) error {
|
|||
|
||||
// loadListing will load listing from a file.
|
||||
// The key is the path to the file relative to the Path1/Path2 base.
|
||||
// File size of -1, as for Google Docs, prints a warning and won't be loaded.
|
||||
func (b *bisyncRun) loadListing(listing string) (*fileList, error) {
|
||||
file, err := os.Open(listing)
|
||||
if err != nil {
|
||||
|
|
|
@ -168,8 +168,7 @@ func (b *bisyncRun) ForDir(o fs.Directory, isPath1 bool) {
|
|||
id := "" // TODO
|
||||
flags := "d" // "-" for a file and "d" for a directory
|
||||
marchLsLock.Lock()
|
||||
//record size as 0 instead of -1, so bisync doesn't think it's a google doc
|
||||
ls.put(o.Remote(), 0, time, "", id, flags)
|
||||
ls.put(o.Remote(), -1, time, "", id, flags)
|
||||
marchLsLock.Unlock()
|
||||
}
|
||||
|
||||
|
|
|
@ -441,8 +441,7 @@ func (b *bisyncRun) resync(octx, fctx context.Context) error {
|
|||
// fctx has our extra filters added!
|
||||
ctxSync, filterSync := filter.AddConfig(ctxRun)
|
||||
if filterSync.Opt.MinSize == -1 {
|
||||
// prevent overwriting Google Doc files (their size is -1)
|
||||
filterSync.Opt.MinSize = 0
|
||||
fs.Debugf(nil, "filterSync.Opt.MinSize: %v", filterSync.Opt.MinSize)
|
||||
}
|
||||
ci := fs.GetConfig(ctxSync)
|
||||
ci.IgnoreExisting = true
|
||||
|
|
|
@ -38,7 +38,9 @@ type Results struct {
|
|||
|
||||
var logger = operations.NewLoggerOpt()
|
||||
var lock mutex.Mutex
|
||||
var once mutex.Once
|
||||
var ignoreListingChecksum bool
|
||||
var ci *fs.ConfigInfo
|
||||
|
||||
// FsPathIfAny handles type assertions and returns a formatted bilib.FsPath if valid, otherwise ""
|
||||
func FsPathIfAny(x fs.DirEntry) string {
|
||||
|
@ -121,7 +123,13 @@ func WriteResults(ctx context.Context, sigil operations.Sigil, src, dst fs.DirEn
|
|||
result.Name = dst.Remote()
|
||||
}
|
||||
result.Flags = "d"
|
||||
result.Size = 0
|
||||
result.Size = -1
|
||||
}
|
||||
|
||||
if result.Size < 0 && result.Flags != "d" && (ci.CheckSum || ci.SizeOnly) {
|
||||
once.Do(func() {
|
||||
fs.Logf(result.Name, Color(terminal.YellowFg, "Files of unknown size (such as Google Docs) do not sync reliably with --checksum or --size-only. Consider using modtime instead (the default) or --drive-skip-gdocs"))
|
||||
})
|
||||
}
|
||||
|
||||
fs.Debugf(nil, "writing result: %v", result)
|
||||
|
@ -166,6 +174,7 @@ func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.
|
|||
}
|
||||
|
||||
ignoreListingChecksum = b.opt.IgnoreListingChecksum
|
||||
ci = fs.GetConfig(ctx)
|
||||
logger.LoggerFn = WriteResults
|
||||
ctxCopyLogger := operations.WithSyncLogger(ctxCopy, logger)
|
||||
b.testFn()
|
||||
|
@ -194,6 +203,7 @@ func (b *bisyncRun) retryFastCopy(ctx context.Context, fsrc, fdst fs.Fs, files b
|
|||
}
|
||||
|
||||
func (b *bisyncRun) resyncDir(ctx context.Context, fsrc, fdst fs.Fs) ([]Results, error) {
|
||||
ci = fs.GetConfig(ctx)
|
||||
ignoreListingChecksum = b.opt.IgnoreListingChecksum
|
||||
logger.LoggerFn = WriteResults
|
||||
ctxCopyLogger := operations.WithSyncLogger(ctx, logger)
|
||||
|
|
|
@ -883,23 +883,62 @@ files are generating complaints. If the error is
|
|||
consider using the flag
|
||||
[--drive-acknowledge-abuse](/drive/#drive-acknowledge-abuse).
|
||||
|
||||
### Google Doc files
|
||||
### Google Docs (and other files of unknown size) {#gdocs}
|
||||
|
||||
Google docs exist as virtual files on Google Drive and cannot be transferred
|
||||
to other filesystems natively. While it is possible to export a Google doc to
|
||||
a normal file (with `.xlsx` extension, for example), it is not possible
|
||||
to import a normal file back into a Google document.
|
||||
As of `v1.65`, [Google Docs](/drive/#import-export-of-google-documents)
|
||||
(including Google Sheets, Slides, etc.) are now supported in bisync, subject to
|
||||
the same options, defaults, and limitations as in `rclone sync`. When bisyncing
|
||||
drive with non-drive backends, the drive -> non-drive direction is controlled
|
||||
by [`--drive-export-formats`](/drive/#drive-export-formats) (default
|
||||
`"docx,xlsx,pptx,svg"`) and the non-drive -> drive direction is controlled by
|
||||
[`--drive-import-formats`](/drive/#drive-import-formats) (default none.)
|
||||
|
||||
Bisync's handling of Google Doc files is to flag them in the run log output
|
||||
for user's attention and ignore them for any file transfers, deletes, or syncs.
|
||||
They will show up with a length of `-1` in the listings.
|
||||
This bisync run is otherwise successful:
|
||||
For example, with the default export/import formats, a Google Sheet on the
|
||||
drive side will be synced to an `.xlsx` file on the non-drive side. In the
|
||||
reverse direction, `.xlsx` files with filenames that match an existing Google
|
||||
Sheet will be synced to that Google Sheet, while `.xlsx` files that do NOT
|
||||
match an existing Google Sheet will be copied to drive as normal `.xlsx` files
|
||||
(without conversion to Sheets, although the Google Drive web browser UI may
|
||||
still give you the option to open it as one.)
|
||||
|
||||
```
|
||||
2021/05/11 08:23:15 INFO : Synching Path1 "/path/to/local/tree/base/" with Path2 "GDrive:"
|
||||
2021/05/11 08:23:15 INFO : ...path2.lst-new: Ignoring incorrect line: "- -1 - - 2018-07-29T08:49:30.136000000+0000 GoogleDoc.docx"
|
||||
2021/05/11 08:23:15 INFO : Bisync successful
|
||||
```
|
||||
If `--drive-import-formats` is set (it's not, by default), then all of the
|
||||
specified formats will be converted to Google Docs, if there is no existing
|
||||
Google Doc with a matching name. Caution: such conversion can be quite lossy,
|
||||
and in most cases it's probably not what you want!
|
||||
|
||||
To bisync Google Docs as URL shortcut links (in a manner similar to "Drive for
|
||||
Desktop"), use: `--drive-export-formats url` (or
|
||||
[alternatives](https://rclone.org/drive/#exportformats:~:text=available%20Google%20Documents.-,Extension,macOS,-Standard%20options).)
|
||||
|
||||
Note that these link files cannot be edited on the non-drive side -- you will
|
||||
get errors if you try to sync an edited link file back to drive. They CAN be
|
||||
deleted (it will result in deleting the corresponding Google Doc.) If you
|
||||
create a `.url` file on the non-drive side that does not match an existing
|
||||
Google Doc, bisyncing it will just result in copying the literal `.url` file
|
||||
over to drive (no Google Doc will be created.) So, as a general rule of thumb,
|
||||
think of them as read-only placeholders on the non-drive side, and make all
|
||||
your changes on the drive side.
|
||||
|
||||
Likewise, even with other export-formats, it is best to only move/rename Google
|
||||
Docs on the drive side. This is because otherwise, bisync will interpret this
|
||||
as a file deleted and another created, and accordingly, it will delete the
|
||||
Google Doc and create a new file at the new path. (Whether or not that new file
|
||||
is a Google Doc depends on `--drive-import-formats`.)
|
||||
|
||||
Lastly, take note that all Google Docs on the drive side have a size of `-1`
|
||||
and no checksum. Therefore, they cannot be reliably synced with the
|
||||
`--checksum` or `--size-only` flags. (To be exact: they will still get
|
||||
created/deleted, and bisync's delta engine will notice changes and queue them
|
||||
for syncing, but the underlying sync function will consider them identical and
|
||||
skip them.) To work around this, use the default (modtime and size) instead of
|
||||
`--checksum` or `--size-only`.
|
||||
|
||||
To ignore Google Docs entirely, use
|
||||
[`--drive-skip-gdocs`](/drive/#drive-skip-gdocs).
|
||||
|
||||
(Note that all flags starting with `--drive` are backend-specific, and
|
||||
therefore will cause the behavior explained in [Overridden
|
||||
Configs](/#overridden-configs).)
|
||||
|
||||
## Usage examples
|
||||
|
||||
|
@ -1289,6 +1328,7 @@ about _Unison_ and synchronization in general.
|
|||
for performance improvements and less [risk of error](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=4.%20Listings%20should%20alternate%20between%20paths%20to%20minimize%20errors).
|
||||
* Fixed handling of unicode normalization and case insensitivity, support for [`--fix-case`](/docs/#fix-case), [`--ignore-case-sync`](/docs/#ignore-case-sync), [`--no-unicode-normalization`](/docs/#no-unicode-normalization)
|
||||
* `--resync` is now much more efficient (especially for users of `--create-empty-src-dirs`)
|
||||
* Google Docs (and other files of unknown size) are now supported (with the same options as in `sync`)
|
||||
|
||||
### `v1.64`
|
||||
* Fixed an [issue](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Dry%20runs%20are%20not%20completely%20dry)
|
||||
|
|
Loading…
Reference in a new issue