bisync: Add experimental --resilient mode to allow recovery from self-correctable errors
https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=2.%20Bisync%20should%20be%20more%20resilient%20to%20self%2Dcorrectable%20errors
This commit is contained in:
parent
f01a50eb47
commit
e5bde42303
5 changed files with 54 additions and 7 deletions
|
@ -40,6 +40,7 @@ type Options struct {
|
||||||
NoCleanup bool
|
NoCleanup bool
|
||||||
SaveQueues bool // save extra debugging files (test only flag)
|
SaveQueues bool // save extra debugging files (test only flag)
|
||||||
IgnoreListingChecksum bool
|
IgnoreListingChecksum bool
|
||||||
|
Resilient bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default values
|
// Default values
|
||||||
|
@ -110,6 +111,7 @@ func init() {
|
||||||
flags.BoolVarP(cmdFlags, &tzLocal, "localtime", "", tzLocal, "Use local time in listings (default: UTC)", "")
|
flags.BoolVarP(cmdFlags, &tzLocal, "localtime", "", tzLocal, "Use local time in listings (default: UTC)", "")
|
||||||
flags.BoolVarP(cmdFlags, &Opt.NoCleanup, "no-cleanup", "", Opt.NoCleanup, "Retain working files (useful for troubleshooting and testing).", "")
|
flags.BoolVarP(cmdFlags, &Opt.NoCleanup, "no-cleanup", "", Opt.NoCleanup, "Retain working files (useful for troubleshooting and testing).", "")
|
||||||
flags.BoolVarP(cmdFlags, &Opt.IgnoreListingChecksum, "ignore-listing-checksum", "", Opt.IgnoreListingChecksum, "Do not use checksums for listings (add --ignore-checksum to additionally skip post-copy checksum checks)", "")
|
flags.BoolVarP(cmdFlags, &Opt.IgnoreListingChecksum, "ignore-listing-checksum", "", Opt.IgnoreListingChecksum, "Do not use checksums for listings (add --ignore-checksum to additionally skip post-copy checksum checks)", "")
|
||||||
|
flags.BoolVarP(cmdFlags, &Opt.Resilient, "resilient", "", Opt.Resilient, "Allow future runs to retry after certain less-serious errors, instead of requiring --resync. Use at your own risk!", "")
|
||||||
}
|
}
|
||||||
|
|
||||||
// bisync command definition
|
// bisync command definition
|
||||||
|
|
|
@ -104,6 +104,7 @@ func (b *bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck *filter
|
||||||
opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2)
|
opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2)
|
||||||
if checkopterr != nil {
|
if checkopterr != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
|
fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
|
||||||
return matches, checkopterr
|
return matches, checkopterr
|
||||||
}
|
}
|
||||||
|
|
|
@ -301,5 +301,6 @@ func (b *bisyncRun) checkListing(ls *fileList, listing, msg string) error {
|
||||||
}
|
}
|
||||||
fs.Errorf(nil, "Empty %s listing. Cannot sync to an empty directory: %s", msg, listing)
|
fs.Errorf(nil, "Empty %s listing. Cannot sync to an empty directory: %s", msg, listing)
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
return fmt.Errorf("empty %s listing: %s", msg, listing)
|
return fmt.Errorf("empty %s listing: %s", msg, listing)
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ type bisyncRun struct {
|
||||||
fs2 fs.Fs
|
fs2 fs.Fs
|
||||||
abort bool
|
abort bool
|
||||||
critical bool
|
critical bool
|
||||||
|
retryable bool
|
||||||
basePath string
|
basePath string
|
||||||
workDir string
|
workDir string
|
||||||
opt *Options
|
opt *Options
|
||||||
|
@ -123,6 +124,10 @@ func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if b.critical {
|
if b.critical {
|
||||||
|
if b.retryable && b.opt.Resilient {
|
||||||
|
fs.Errorf(nil, "Bisync critical error: %v", err)
|
||||||
|
fs.Errorf(nil, "Bisync aborted. Error is retryable without --resync due to --resilient mode.")
|
||||||
|
} else {
|
||||||
if bilib.FileExists(listing1) {
|
if bilib.FileExists(listing1) {
|
||||||
_ = os.Rename(listing1, listing1+"-err")
|
_ = os.Rename(listing1, listing1+"-err")
|
||||||
}
|
}
|
||||||
|
@ -131,6 +136,7 @@ func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) {
|
||||||
}
|
}
|
||||||
fs.Errorf(nil, "Bisync critical error: %v", err)
|
fs.Errorf(nil, "Bisync critical error: %v", err)
|
||||||
fs.Errorf(nil, "Bisync aborted. Must run --resync to recover.")
|
fs.Errorf(nil, "Bisync aborted. Must run --resync to recover.")
|
||||||
|
}
|
||||||
return ErrBisyncAborted
|
return ErrBisyncAborted
|
||||||
}
|
}
|
||||||
if b.abort {
|
if b.abort {
|
||||||
|
@ -152,6 +158,7 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
|
||||||
fs.Infof(nil, "Validating listings for Path1 %s vs Path2 %s", quotePath(path1), quotePath(path2))
|
fs.Infof(nil, "Validating listings for Path1 %s vs Path2 %s", quotePath(path1), quotePath(path2))
|
||||||
if err = b.checkSync(listing1, listing2); err != nil {
|
if err = b.checkSync(listing1, listing2); err != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -176,6 +183,7 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
|
||||||
var fctx context.Context
|
var fctx context.Context
|
||||||
if fctx, err = b.opt.applyFilters(octx); err != nil {
|
if fctx, err = b.opt.applyFilters(octx); err != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,6 +196,7 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
|
||||||
if !bilib.FileExists(listing1) || !bilib.FileExists(listing2) {
|
if !bilib.FileExists(listing1) || !bilib.FileExists(listing2) {
|
||||||
// On prior critical error abort, the prior listings are renamed to .lst-err to lock out further runs
|
// On prior critical error abort, the prior listings are renamed to .lst-err to lock out further runs
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
return errors.New("cannot find prior Path1 or Path2 listings, likely due to critical error on prior run")
|
return errors.New("cannot find prior Path1 or Path2 listings, likely due to critical error on prior run")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,6 +224,7 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
|
||||||
err = b.checkAccess(ds1.checkFiles, ds2.checkFiles)
|
err = b.checkAccess(ds1.checkFiles, ds2.checkFiles)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -255,6 +265,7 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
|
||||||
changes1, changes2, err = b.applyDeltas(octx, ds1, ds2)
|
changes1, changes2, err = b.applyDeltas(octx, ds1, ds2)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
// b.retryable = true // not sure about this one
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -283,6 +294,7 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,6 +322,7 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) (
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -369,6 +382,7 @@ func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string
|
||||||
err = b.checkAccess(ds1.checkFiles, ds2.checkFiles)
|
err = b.checkAccess(ds1.checkFiles, ds2.checkFiles)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.critical = true
|
b.critical = true
|
||||||
|
b.retryable = true
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,6 +97,8 @@ Optional Flags:
|
||||||
Consider using `--verbose` or `--dry-run` first.
|
Consider using `--verbose` or `--dry-run` first.
|
||||||
--ignore-listing-checksum Do not use checksums for listings
|
--ignore-listing-checksum Do not use checksums for listings
|
||||||
(add --ignore-checksum to additionally skip post-copy checksum checks)
|
(add --ignore-checksum to additionally skip post-copy checksum checks)
|
||||||
|
--resilient Allow future runs to retry after certain less-serious errors,
|
||||||
|
instead of requiring --resync. Use at your own risk!
|
||||||
--localtime Use local time in listings (default: UTC)
|
--localtime Use local time in listings (default: UTC)
|
||||||
--no-cleanup Retain working files (useful for troubleshooting and testing).
|
--no-cleanup Retain working files (useful for troubleshooting and testing).
|
||||||
--workdir PATH Use custom working directory (useful for testing).
|
--workdir PATH Use custom working directory (useful for testing).
|
||||||
|
@ -285,6 +287,30 @@ were generated on a run using `--ignore-listing-checksum`. For a more robust int
|
||||||
consider using [`check`](commands/rclone_check/)
|
consider using [`check`](commands/rclone_check/)
|
||||||
(or [`cryptcheck`](/commands/rclone_cryptcheck/), if at least one path is a `crypt` remote.)
|
(or [`cryptcheck`](/commands/rclone_cryptcheck/), if at least one path is a `crypt` remote.)
|
||||||
|
|
||||||
|
#### --resilient
|
||||||
|
|
||||||
|
***Caution: this is an experimental feature. Use at your own risk!***
|
||||||
|
|
||||||
|
By default, most errors or interruptions will cause bisync to abort and
|
||||||
|
require [`--resync`](#resync) to recover. This is a safety feature,
|
||||||
|
to prevent bisync from running again until a user checks things out.
|
||||||
|
However, in some cases, bisync can go too far and enforce a lockout when one isn't actually necessary,
|
||||||
|
like for certain less-serious errors that might resolve themselves on the next run.
|
||||||
|
When `--resilient` is specified, bisync tries its best to recover and self-correct,
|
||||||
|
and only requires `--resync` as a last resort when a human's involvement is absolutely necessary.
|
||||||
|
The intended use case is for running bisync as a background process (such as via scheduled [cron](#cron)).
|
||||||
|
|
||||||
|
When using `--resilient` mode, bisync will still report the error and abort,
|
||||||
|
however it will not lock out future runs -- allowing the possibility of retrying at the next normally scheduled time,
|
||||||
|
without requiring a `--resync` first. Examples of such retryable errors include
|
||||||
|
access test failures, missing listing files, and filter change detections.
|
||||||
|
These safety features will still prevent the *current* run from proceeding --
|
||||||
|
the difference is that if conditions have improved by the time of the *next* run,
|
||||||
|
that next run will be allowed to proceed.
|
||||||
|
Certain more serious errors will still enforce a `--resync` lockout, even in `--resilient` mode, to prevent data loss.
|
||||||
|
|
||||||
|
Behavior of `--resilient` may change in a future version.
|
||||||
|
|
||||||
## Operation
|
## Operation
|
||||||
|
|
||||||
### Runtime flow details
|
### Runtime flow details
|
||||||
|
@ -394,6 +420,8 @@ typically at `${HOME}/.cache/rclone/bisync/` on Linux.
|
||||||
Some errors are considered temporary and re-running the bisync is not blocked.
|
Some errors are considered temporary and re-running the bisync is not blocked.
|
||||||
The _critical return_ blocks further bisync runs.
|
The _critical return_ blocks further bisync runs.
|
||||||
|
|
||||||
|
See also: [`--resilient`](#resilient)
|
||||||
|
|
||||||
### Lock file
|
### Lock file
|
||||||
|
|
||||||
When bisync is running, a lock file is created in the bisync working directory,
|
When bisync is running, a lock file is created in the bisync working directory,
|
||||||
|
@ -1164,5 +1192,6 @@ causing dry runs to inadvertently commit filter changes
|
||||||
causing bisync to consider more files than necessary due to overbroad filters during delete operations
|
causing bisync to consider more files than necessary due to overbroad filters during delete operations
|
||||||
* [Improved detection of false positive change conflicts](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Identical%20files%20should%20be%20left%20alone%2C%20even%20if%20new/newer/changed%20on%20both%20sides)
|
* [Improved detection of false positive change conflicts](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Identical%20files%20should%20be%20left%20alone%2C%20even%20if%20new/newer/changed%20on%20both%20sides)
|
||||||
(identical files are now left alone instead of renamed)
|
(identical files are now left alone instead of renamed)
|
||||||
|
* Added experimental `--resilient` mode to allow [recovery from self-correctable errors](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=2.%20Bisync%20should%20be%20more%20resilient%20to%20self%2Dcorrectable%20errors)
|
||||||
* Added [new `--ignore-listing-checksum` flag](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=6.%20%2D%2Dignore%2Dchecksum%20should%20be%20split%20into%20two%20flags%20for%20separate%20purposes)
|
* Added [new `--ignore-listing-checksum` flag](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=6.%20%2D%2Dignore%2Dchecksum%20should%20be%20split%20into%20two%20flags%20for%20separate%20purposes)
|
||||||
to distinguish from `--ignore-checksum`
|
to distinguish from `--ignore-checksum`
|
||||||
|
|
Loading…
Reference in a new issue