diff --git a/cmd/bisync/checkfn.go b/cmd/bisync/checkfn.go new file mode 100644 index 000000000..cc462772b --- /dev/null +++ b/cmd/bisync/checkfn.go @@ -0,0 +1,191 @@ +package bisync + +import ( + "bytes" + "context" + "fmt" + "strings" + + "github.com/rclone/rclone/backend/crypt" + "github.com/rclone/rclone/cmd/bisync/bilib" + "github.com/rclone/rclone/cmd/check" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/accounting" + "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/fs/operations" +) + +var hashType hash.Type +var fsrc, fdst fs.Fs +var fcrypt *crypt.Fs + +// WhichCheck determines which CheckFn we should use based on the Fs types +// It is more robust and accurate than Check because +// it will fallback to CryptCheck or DownloadCheck instead of --size-only! +// it returns the *operations.CheckOpt with the CheckFn set. +func WhichCheck(ctx context.Context, opt *operations.CheckOpt) *operations.CheckOpt { + ci := fs.GetConfig(ctx) + common := opt.Fsrc.Hashes().Overlap(opt.Fdst.Hashes()) + + // note that ci.IgnoreChecksum doesn't change the behavior of Check -- it's just a way to opt-out of cryptcheck/download + if common.Count() > 0 || ci.SizeOnly || ci.IgnoreChecksum { + // use normal check + opt.Check = CheckFn + return opt + } + + FsrcCrypt, srcIsCrypt := opt.Fsrc.(*crypt.Fs) + FdstCrypt, dstIsCrypt := opt.Fdst.(*crypt.Fs) + + if (srcIsCrypt && dstIsCrypt) || (!srcIsCrypt && dstIsCrypt) { + // if both are crypt or only dst is crypt + hashType = FdstCrypt.UnWrap().Hashes().GetOne() + if hashType != hash.None { + // use cryptcheck + fsrc = opt.Fsrc + fdst = opt.Fdst + fcrypt = FdstCrypt + fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)") + opt.Check = CryptCheckFn + return opt + } + } else if srcIsCrypt && !dstIsCrypt { + // if only src is crypt + hashType = FsrcCrypt.UnWrap().Hashes().GetOne() + if hashType != hash.None { + // use reverse cryptcheck + fsrc = opt.Fdst + fdst = opt.Fsrc + fcrypt = FsrcCrypt + fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)") + opt.Check = ReverseCryptCheckFn + return opt + } + } + + // if we've gotten this far, niether check or cryptcheck will work, so use --download + fs.Infof(fdst, "Can't compare hashes, so using check --download for safety. (Use --size-only or --ignore-checksum to disable)") + opt.Check = DownloadCheckFn + return opt +} + +// CheckFn is a slightly modified version of Check +func CheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) { + same, ht, err := operations.CheckHashes(ctx, src, dst) + if err != nil { + return true, false, err + } + if ht == hash.None { + return false, true, nil + } + if !same { + err = fmt.Errorf("%v differ", ht) + fs.Errorf(src, "%v", err) + return true, false, nil + } + return false, false, nil +} + +// CryptCheckFn is a slightly modified version of CryptCheck +func CryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) { + cryptDst := dst.(*crypt.Object) + underlyingDst := cryptDst.UnWrap() + underlyingHash, err := underlyingDst.Hash(ctx, hashType) + if err != nil { + return true, false, fmt.Errorf("error reading hash from underlying %v: %w", underlyingDst, err) + } + if underlyingHash == "" { + return false, true, nil + } + cryptHash, err := fcrypt.ComputeHash(ctx, cryptDst, src, hashType) + if err != nil { + return true, false, fmt.Errorf("error computing hash: %w", err) + } + if cryptHash == "" { + return false, true, nil + } + if cryptHash != underlyingHash { + err = fmt.Errorf("hashes differ (%s:%s) %q vs (%s:%s) %q", fdst.Name(), fdst.Root(), cryptHash, fsrc.Name(), fsrc.Root(), underlyingHash) + fs.Errorf(src, err.Error()) + return true, false, nil + } + return false, false, nil +} + +// ReverseCryptCheckFn is like CryptCheckFn except src and dst are switched +// result: src is crypt, dst is non-crypt +func ReverseCryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) { + return CryptCheckFn(ctx, src, dst) +} + +// DownloadCheckFn is a slightly modified version of Check with --download +func DownloadCheckFn(ctx context.Context, a, b fs.Object) (differ bool, noHash bool, err error) { + differ, err = operations.CheckIdenticalDownload(ctx, a, b) + if err != nil { + return true, true, fmt.Errorf("failed to download: %w", err) + } + return differ, false, nil +} + +// check potential conflicts (to avoid renaming if already identical) +func (b *bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck *filter.Filter, fs1, fs2 fs.Fs) (bilib.Names, error) { + matches := bilib.Names{} + if filterCheck.HaveFilesFrom() { + fs.Debugf(nil, "There are potential conflicts to check.") + + opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2) + if checkopterr != nil { + b.critical = true + b.retryable = true + fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr) + return matches, checkopterr + } + defer close() + + opt.Match = new(bytes.Buffer) + + opt = WhichCheck(ctxCheck, opt) + + fs.Infof(nil, "Checking potential conflicts...") + check := operations.CheckFn(ctxCheck, opt) + fs.Infof(nil, "Finished checking the potential conflicts. %s", check) + + //reset error count, because we don't want to count check errors as bisync errors + accounting.Stats(ctxCheck).ResetErrors() + + //return the list of identical files to check against later + if len(fmt.Sprint(opt.Match)) > 0 { + matches = bilib.ToNames(strings.Split(fmt.Sprint(opt.Match), "\n")) + } + if matches.NotEmpty() { + fs.Debugf(nil, "The following potential conflicts were determined to be identical. %v", matches) + } else { + fs.Debugf(nil, "None of the conflicts were determined to be identical.") + } + + } + return matches, nil +} + +// WhichEqual is similar to WhichCheck, but checks a single object. +// Returns true if the objects are equal, false if they differ or if we don't know +func WhichEqual(ctx context.Context, src, dst fs.Object, Fsrc, Fdst fs.Fs) bool { + opt, close, checkopterr := check.GetCheckOpt(Fsrc, Fdst) + if checkopterr != nil { + fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr) + } + defer close() + + opt = WhichCheck(ctx, opt) + differ, noHash, err := opt.Check(ctx, dst, src) + if err != nil { + fs.Errorf(src, "failed to check: %v", err) + return false + } + if noHash { + fs.Errorf(src, "failed to check as hash is missing") + return false + } + return !differ +} diff --git a/cmd/bisync/deltas.go b/cmd/bisync/deltas.go index 23a10540f..d441ca404 100644 --- a/cmd/bisync/deltas.go +++ b/cmd/bisync/deltas.go @@ -3,7 +3,6 @@ package bisync import ( - "bytes" "context" "fmt" "path/filepath" @@ -11,9 +10,7 @@ import ( "strings" "github.com/rclone/rclone/cmd/bisync/bilib" - "github.com/rclone/rclone/cmd/check" "github.com/rclone/rclone/fs" - "github.com/rclone/rclone/fs/accounting" "github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/operations" "golang.org/x/text/unicode/norm" @@ -96,47 +93,6 @@ func (ds *deltaSet) printStats() { ds.msg, nAll, nNew, nNewer, nOlder, nDeleted) } -// check potential conflicts (to avoid renaming if already identical) -func (b *bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck *filter.Filter, fs1, fs2 fs.Fs) (bilib.Names, error) { - matches := bilib.Names{} - if filterCheck.HaveFilesFrom() { - fs.Debugf(nil, "There are potential conflicts to check.") - - opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2) - if checkopterr != nil { - b.critical = true - b.retryable = true - fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr) - return matches, checkopterr - } - defer close() - - opt.Match = new(bytes.Buffer) - - // TODO: consider using custom CheckFn to act like cryptcheck, if either fs is a crypt remote and -c has been passed - // note that cryptCheck() is not currently exported - - fs.Infof(nil, "Checking potential conflicts...") - check := operations.Check(ctxCheck, opt) - fs.Infof(nil, "Finished checking the potential conflicts. %s", check) - - //reset error count, because we don't want to count check errors as bisync errors - accounting.Stats(ctxCheck).ResetErrors() - - //return the list of identical files to check against later - if len(fmt.Sprint(opt.Match)) > 0 { - matches = bilib.ToNames(strings.Split(fmt.Sprint(opt.Match), "\n")) - } - if matches.NotEmpty() { - fs.Debugf(nil, "The following potential conflicts were determined to be identical. %v", matches) - } else { - fs.Debugf(nil, "None of the conflicts were determined to be identical.") - } - - } - return matches, nil -} - // findDeltas func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing string, now *fileList, msg string) (ds *deltaSet, err error) { var old *fileList diff --git a/cmd/bisync/listing.go b/cmd/bisync/listing.go index 313569ed8..20f2fa00b 100644 --- a/cmd/bisync/listing.go +++ b/cmd/bisync/listing.go @@ -641,7 +641,8 @@ func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList, fs.Debugf(srcObj, "rechecking") for _, dstObj := range dstObjs { if srcObj.Remote() == dstObj.Remote() || srcObj.Remote() == b.aliases.Alias(dstObj.Remote()) { - if operations.Equal(ctxRecheck, srcObj, dstObj) || b.opt.DryRun { + // note: unlike Equal(), WhichEqual() does not update the modtime in dest if sums match but modtimes don't. + if b.opt.DryRun || WhichEqual(ctxRecheck, srcObj, dstObj, src, dst) { putObj(srcObj, src, srcList) putObj(dstObj, dst, dstList) resolved = append(resolved, srcObj.Remote()) @@ -655,7 +656,8 @@ func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList, // skip and error during --resync, as rollback is not possible if !slices.Contains(resolved, srcObj.Remote()) && !b.opt.DryRun { if b.opt.Resync { - b.handleErr(srcObj, "Unable to rollback during --resync", errors.New("no dstObj match or files not equal"), true, false) + err = errors.New("no dstObj match or files not equal") + b.handleErr(srcObj, "Unable to rollback during --resync", err, true, false) } else { toRollback = append(toRollback, srcObj.Remote()) } diff --git a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path1.lst-old b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path1.lst-old index 0f2ac67fe..6815fa4ee 100644 --- a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path1.lst-old +++ b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path1.lst-old @@ -1,5 +1,5 @@ # bisync listing v1 from test - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "file1.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/HeLlO,wOrLd!.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/éééö.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/HeLlO,wOrLd!.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/éééö.txt" - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt" diff --git a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new index 9afa4b347..6f843cfa9 100644 --- a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new +++ b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-new @@ -1,5 +1,5 @@ # bisync listing v1 from test - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-05T00:00:00.000000000+0000 "file1.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/hello,WORLD!.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/éééö.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/hello,WORLD!.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/éééö.txt" - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt" diff --git a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-old b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-old index a8c82fc73..ed2547754 100644 --- a/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-old +++ b/cmd/bisync/testdata/test_normalization/golden/_testdir_path1.._testdir_path2.path2.lst-old @@ -1,5 +1,5 @@ # bisync listing v1 from test - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "file1.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/hello,WORLD!.txt" -- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/éééö.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/hello,WORLD!.txt" +- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/éééö.txt" - 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt" diff --git a/docs/content/bisync.md b/docs/content/bisync.md index f3354dc1e..947128fee 100644 --- a/docs/content/bisync.md +++ b/docs/content/bisync.md @@ -1367,6 +1367,8 @@ for performance improvements and less [risk of error](https://forum.rclone.org/t * Fixed handling of unicode normalization and case insensitivity, support for [`--fix-case`](/docs/#fix-case), [`--ignore-case-sync`](/docs/#ignore-case-sync), [`--no-unicode-normalization`](/docs/#no-unicode-normalization) * `--resync` is now much more efficient (especially for users of `--create-empty-src-dirs`) * Google Docs (and other files of unknown size) are now supported (with the same options as in `sync`) +* Equality checks before a sync conflict rename now fall back to `cryptcheck` (when possible) or `--download`, +instead of of `--size-only`, when `check` is not available. ### `v1.64` * Fixed an [issue](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Dry%20runs%20are%20not%20completely%20dry)