diff --git a/cmd/bisync/bilib/canonical.go b/cmd/bisync/bilib/canonical.go index 8dff41bff..be99d7f2d 100644 --- a/cmd/bisync/bilib/canonical.go +++ b/cmd/bisync/bilib/canonical.go @@ -11,7 +11,7 @@ import ( ) // FsPath converts Fs to a suitable rclone argument -func FsPath(f fs.Fs) string { +func FsPath(f fs.Info) string { name, path, slash := f.Name(), f.Root(), "/" if name == "local" { slash = string(os.PathSeparator) diff --git a/cmd/bisync/bisync_test.go b/cmd/bisync/bisync_test.go index 407cdab04..97c84fc81 100644 --- a/cmd/bisync/bisync_test.go +++ b/cmd/bisync/bisync_test.go @@ -641,6 +641,8 @@ func (b *bisyncTest) runBisync(ctx context.Context, args []string) (err error) { case "subdir": fs1 = addSubdir(b.path1, val) fs2 = addSubdir(b.path2, val) + case "ignore-listing-checksum": + opt.IgnoreListingChecksum = true default: return fmt.Errorf("invalid bisync option %q", arg) } diff --git a/cmd/bisync/deltas.go b/cmd/bisync/deltas.go index 6e0542dfb..9d565eba6 100644 --- a/cmd/bisync/deltas.go +++ b/cmd/bisync/deltas.go @@ -176,9 +176,11 @@ func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing, newLis } else { if old.getTime(file) != now.getTime(file) { if old.beforeOther(now, file) { + fs.Debugf(file, "(old: %v current: %v", old.getTime(file), now.getTime(file)) b.indent(msg, file, "File is newer") d |= deltaNewer } else { // Current version is older than prior sync. + fs.Debugf(file, "(old: %v current: %v", old.getTime(file), now.getTime(file)) b.indent(msg, file, "File is OLDER") d |= deltaOlder } @@ -217,7 +219,7 @@ func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing, newLis } // applyDeltas -func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (changes1, changes2 bool, err error) { +func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (changes1, changes2 bool, results2to1, results1to2 []Results, queues queues, err error) { path1 := bilib.FsPath(b.fs1) path2 := bilib.FsPath(b.fs2) @@ -226,6 +228,10 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change delete1 := bilib.Names{} delete2 := bilib.Names{} handled := bilib.Names{} + renamed1 := bilib.Names{} + renamed2 := bilib.Names{} + renameSkipped := bilib.Names{} + deletedonboth := bilib.Names{} ctxMove := b.opt.setDryRun(ctx) @@ -299,6 +305,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change equal := matches.Has(file) if equal { fs.Infof(nil, "Files are equal! Skipping: %s", file) + renameSkipped.Add(file) } else { fs.Debugf(nil, "Files are NOT equal: %s", file) b.indent("!Path1", p1+"..path1", "Renaming Path1 copy") @@ -307,6 +314,11 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change b.critical = true return } + if b.opt.DryRun { + renameSkipped.Add(file) + } else { + renamed1.Add(file) + } b.indent("!Path1", p2+"..path1", "Queue copy to Path2") copy1to2.Add(file + "..path1") @@ -315,6 +327,11 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change err = fmt.Errorf("path2 rename failed for %s: %w", file, err) return } + if b.opt.DryRun { + renameSkipped.Add(file) + } else { + renamed2.Add(file) + } b.indent("!Path2", p1+"..path2", "Queue copy to Path1") copy2to1.Add(file + "..path2") } @@ -334,6 +351,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change handled.Add(file) } else if d2.is(deltaDeleted) { handled.Add(file) + deletedonboth.Add(file) } } } @@ -360,25 +378,25 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change if copy2to1.NotEmpty() { changes1 = true b.indent("Path2", "Path1", "Do queued copies to") - err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1") + results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1") if err != nil { return } //copy empty dirs from path2 to path1 (if --create-empty-src-dirs) - b.syncEmptyDirs(ctx, b.fs1, copy2to1, dirs2, "make") + b.syncEmptyDirs(ctx, b.fs1, copy2to1, dirs2, &results2to1, "make") } if copy1to2.NotEmpty() { changes2 = true b.indent("Path1", "Path2", "Do queued copies to") - err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2") + results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2") if err != nil { return } //copy empty dirs from path1 to path2 (if --create-empty-src-dirs) - b.syncEmptyDirs(ctx, b.fs2, copy1to2, dirs1, "make") + b.syncEmptyDirs(ctx, b.fs2, copy1to2, dirs1, &results1to2, "make") } if delete1.NotEmpty() { @@ -386,7 +404,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change return } //propagate deletions of empty dirs from path2 to path1 (if --create-empty-src-dirs) - b.syncEmptyDirs(ctx, b.fs1, delete1, dirs1, "remove") + b.syncEmptyDirs(ctx, b.fs1, delete1, dirs1, &results2to1, "remove") } if delete2.NotEmpty() { @@ -394,9 +412,16 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change return } //propagate deletions of empty dirs from path1 to path2 (if --create-empty-src-dirs) - b.syncEmptyDirs(ctx, b.fs2, delete2, dirs2, "remove") + b.syncEmptyDirs(ctx, b.fs2, delete2, dirs2, &results1to2, "remove") } + queues.copy1to2 = copy1to2 + queues.copy2to1 = copy2to1 + queues.renamed1 = renamed1 + queues.renamed2 = renamed2 + queues.renameSkipped = renameSkipped + queues.deletedonboth = deletedonboth + return } diff --git a/cmd/bisync/listing.go b/cmd/bisync/listing.go index b2edb7ec9..4a71d1500 100644 --- a/cmd/bisync/listing.go +++ b/cmd/bisync/listing.go @@ -9,14 +9,18 @@ import ( "io" "os" "regexp" + "sort" "strconv" "strings" "sync" "time" "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fs/walk" + "golang.org/x/exp/slices" ) // ListingHeader defines first line of a listing @@ -70,11 +74,29 @@ func (ls *fileList) empty() bool { func (ls *fileList) has(file string) bool { _, found := ls.info[file] + if !found { + //try unquoting + file, _ = strconv.Unquote(`"` + file + `"`) + _, found = ls.info[file] + } return found } func (ls *fileList) get(file string) *fileInfo { - return ls.info[file] + info, found := ls.info[file] + if !found { + //try unquoting + file, _ = strconv.Unquote(`"` + file + `"`) + info = ls.info[fmt.Sprint(file)] + } + return info +} + +func (ls *fileList) remove(file string) { + if ls.has(file) { + ls.list = slices.Delete(ls.list, slices.Index(ls.list, file), slices.Index(ls.list, file)+1) + delete(ls.info, file) + } } func (ls *fileList) put(file string, size int64, time time.Time, hash, id string, flags string) { @@ -82,6 +104,9 @@ func (ls *fileList) put(file string, size int64, time time.Time, hash, id string if fi != nil { fi.size = size fi.time = time + fi.hash = hash + fi.id = id + fi.flags = flags } else { fi = &fileInfo{ size: size, @@ -120,12 +145,20 @@ func (ls *fileList) afterTime(file string, time time.Time) bool { return fi.time.After(time) } +// sort by path name +func (ls *fileList) sort() { + sort.SliceStable(ls.list, func(i, j int) bool { + return ls.list[i] < ls.list[j] + }) +} + // save will save listing to a file. func (ls *fileList) save(ctx context.Context, listing string) error { file, err := os.Create(listing) if err != nil { return err } + ls.sort() hashName := "" if ls.hash != hash.None { @@ -376,3 +409,221 @@ func (b *bisyncRun) listDirsOnly(listingNum int) (*fileList, error) { return dirsonly, err } + +// ConvertPrecision returns the Modtime rounded to Dest's precision if lower, otherwise unchanged +// Need to use the other fs's precision (if lower) when copying +// Note: we need to use Truncate rather than Round so that After() is reliable. +// (2023-11-02 20:22:45.552679442 +0000 < UTC 2023-11-02 20:22:45.553 +0000 UTC) +func ConvertPrecision(Modtime time.Time, dst fs.Fs) time.Time { + DestPrecision := dst.Precision() + + // In case it's wrapping an Fs with lower precision, try unwrapping and use the lowest. + if Modtime.Truncate(DestPrecision).After(Modtime.Truncate(fs.UnWrapFs(dst).Precision())) { + DestPrecision = fs.UnWrapFs(dst).Precision() + } + + if Modtime.After(Modtime.Truncate(DestPrecision)) { + return Modtime.Truncate(DestPrecision) + } + return Modtime +} + +// modifyListing will modify the listing based on the results of the sync +func (b *bisyncRun) modifyListing(ctx context.Context, src fs.Fs, dst fs.Fs, srcListing string, dstListing string, results []Results, queues queues, is1to2 bool) (err error) { + queue := queues.copy2to1 + renames := queues.renamed2 + direction := "2to1" + if is1to2 { + queue = queues.copy1to2 + renames = queues.renamed1 + direction = "1to2" + } + + fs.Debugf(nil, "updating %s", direction) + fs.Debugf(nil, "RESULTS: %v", results) + fs.Debugf(nil, "QUEUE: %v", queue) + + srcList, err := b.loadListing(srcListing) + if err != nil { + return fmt.Errorf("cannot read prior listing: %w", err) + } + dstList, err := b.loadListing(dstListing) + if err != nil { + return fmt.Errorf("cannot read prior listing: %w", err) + } + // set list hash type + if b.opt.Resync && !b.opt.IgnoreListingChecksum { + srcList.hash = src.Hashes().GetOne() + dstList.hash = dst.Hashes().GetOne() + } + + srcWinners := newFileList() + dstWinners := newFileList() + errors := newFileList() + ctxRecheck, filterRecheck := filter.AddConfig(ctx) + + for _, result := range results { + if result.Name == "" { + continue + } + + if result.Flags == "d" && !b.opt.CreateEmptySrcDirs { + continue + } + + // build src winners list + if result.IsSrc && result.Src != "" && (result.Winner.Err == nil || result.Flags == "d") { + srcWinners.put(result.Name, result.Size, ConvertPrecision(result.Modtime, src), result.Hash, "-", result.Flags) + fs.Debugf(nil, "winner: copy to src: %v", result) + } + + // build dst winners list + if result.IsWinner && result.Winner.Side != "none" && (result.Winner.Err == nil || result.Flags == "d") { + dstWinners.put(result.Name, result.Size, ConvertPrecision(result.Modtime, dst), result.Hash, "-", result.Flags) + fs.Debugf(nil, "winner: copy to dst: %v", result) + } + + // build errors list + if result.Err != nil || result.Winner.Err != nil { + errors.put(result.Name, result.Size, result.Modtime, result.Hash, "-", result.Flags) + if err := filterRecheck.AddFile(result.Name); err != nil { + fs.Debugf(result.Name, "error adding file to recheck filter: %v", err) + } + } + } + + updateLists := func(side string, winners, list *fileList) { + // removals from side + for _, oldFile := range queue.ToList() { + if !winners.has(oldFile) && list.has(oldFile) && !errors.has(oldFile) { + list.remove(oldFile) + fs.Debugf(nil, "decision: removed from %s: %v", side, oldFile) + } else if winners.has(oldFile) { + // copies to side + new := winners.get(oldFile) + list.put(oldFile, new.size, new.time, new.hash, new.id, new.flags) + fs.Debugf(nil, "decision: copied to %s: %v", side, oldFile) + } else { + fs.Debugf(oldFile, "file in queue but missing from %s transfers", side) + if err := filterRecheck.AddFile(oldFile); err != nil { + fs.Debugf(oldFile, "error adding file to recheck filter: %v", err) + } + } + } + } + updateLists("src", srcWinners, srcList) + updateLists("dst", dstWinners, dstList) + + // TODO: rollback on error + + // account for "deltaOthers" we handled separately + if queues.deletedonboth.NotEmpty() { + for file := range queues.deletedonboth { + srcList.remove(file) + dstList.remove(file) + } + } + if renames.NotEmpty() && !b.opt.DryRun { + // renamed on src and copied to dst + renamesList := renames.ToList() + for _, file := range renamesList { + // we'll handle the other side when we go the other direction + newName := file + "..path2" + oppositeName := file + "..path1" + if is1to2 { + newName = file + "..path1" + oppositeName = file + "..path2" + } + var new *fileInfo + // we prefer to get the info from the ..path1 / ..path2 versions + // since they were actually copied as opposed to operations.MoveFile()'d. + // the size/time/hash info is therefore fresher on the renames + // but we'll settle for the original if we have to. + if srcList.has(newName) { + new = srcList.get(newName) + } else if srcList.has(oppositeName) { + new = srcList.get(oppositeName) + } else if srcList.has(file) { + new = srcList.get(file) + } else { + if err := filterRecheck.AddFile(file); err != nil { + fs.Debugf(file, "error adding file to recheck filter: %v", err) + } + } + srcList.put(newName, new.size, new.time, new.hash, new.id, new.flags) + dstList.put(newName, new.size, ConvertPrecision(new.time, src), new.hash, new.id, new.flags) + srcList.remove(file) + dstList.remove(file) + } + } + + // recheck the ones we skipped because they were equal + // we never got their info because they were never synced. + // TODO: add flag to skip this for people who don't care and would rather avoid? + if queues.renameSkipped.NotEmpty() { + skippedList := queues.renameSkipped.ToList() + for _, file := range skippedList { + if err := filterRecheck.AddFile(file); err != nil { + fs.Debugf(file, "error adding file to recheck filter: %v", err) + } + } + } + + if filterRecheck.HaveFilesFrom() { + b.recheck(ctxRecheck, src, dst, srcList, dstList) + } + + // update files + err = srcList.save(ctx, srcListing) + if err != nil { + b.abort = true + } + err = dstList.save(ctx, dstListing) + if err != nil { + b.abort = true + } + + return err +} + +// recheck the ones we're not sure about +func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList, dstList *fileList) { + var srcObjs []fs.Object + var dstObjs []fs.Object + + if err := operations.ListFn(ctxRecheck, src, func(obj fs.Object) { + srcObjs = append(srcObjs, obj) + }); err != nil { + fs.Debugf(src, "error recchecking src obj: %v", err) + } + if err := operations.ListFn(ctxRecheck, dst, func(obj fs.Object) { + dstObjs = append(dstObjs, obj) + }); err != nil { + fs.Debugf(dst, "error recchecking dst obj: %v", err) + } + + putObj := func(obj fs.Object, f fs.Fs, list *fileList) { + hashVal := "" + if !b.opt.IgnoreListingChecksum { + hashType := f.Hashes().GetOne() + if hashType != hash.None { + hashVal, _ = obj.Hash(ctxRecheck, hashType) + } + } + list.put(obj.Remote(), obj.Size(), obj.ModTime(ctxRecheck), hashVal, "-", "-") + } + + for _, srcObj := range srcObjs { + fs.Debugf(srcObj, "rechecking") + for _, dstObj := range dstObjs { + if srcObj.Remote() == dstObj.Remote() { + if operations.Equal(ctxRecheck, srcObj, dstObj) || b.opt.DryRun { + putObj(srcObj, src, srcList) + putObj(dstObj, dst, dstList) + } else { + fs.Infof(srcObj, "files not equal on recheck: %v %v", srcObj, dstObj) + } + } + } + } +} diff --git a/cmd/bisync/operations.go b/cmd/bisync/operations.go index 250437e62..d53a9c40a 100644 --- a/cmd/bisync/operations.go +++ b/cmd/bisync/operations.go @@ -16,7 +16,6 @@ import ( "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/operations" - "github.com/rclone/rclone/fs/sync" "github.com/rclone/rclone/lib/atexit" ) @@ -35,6 +34,15 @@ type bisyncRun struct { opt *Options } +type queues struct { + copy1to2 bilib.Names + copy2to1 bilib.Names + renamed1 bilib.Names // renamed on 1 and copied to 2 + renamed2 bilib.Names // renamed on 2 and copied to 1 + renameSkipped bilib.Names // not renamed because it was equal + deletedonboth bilib.Names +} + // Bisync handles lock file, performs bisync run and checks exit status func Bisync(ctx context.Context, fs1, fs2 fs.Fs, optArg *Options) (err error) { opt := *optArg // ensure that input is never changed @@ -256,13 +264,18 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) ( // Determine and apply changes to Path1 and Path2 noChanges := ds1.empty() && ds2.empty() - changes1 := false - changes2 := false + changes1 := false // 2to1 + changes2 := false // 1to2 + results2to1 := []Results{} + results1to2 := []Results{} + + queues := queues{} + if noChanges { fs.Infof(nil, "No changes found") } else { fs.Infof(nil, "Applying changes") - changes1, changes2, err = b.applyDeltas(octx, ds1, ds2) + changes1, changes2, results2to1, results1to2, queues, err = b.applyDeltas(octx, ds1, ds2) if err != nil { b.critical = true // b.retryable = true // not sure about this one @@ -277,13 +290,13 @@ func (b *bisyncRun) runLocked(octx context.Context, listing1, listing2 string) ( err1 = bilib.CopyFileIfExists(newListing1, listing1) err2 = bilib.CopyFileIfExists(newListing2, listing2) } else { - if changes1 { - _, err1 = b.makeListing(fctx, b.fs1, listing1) + if changes1 { // 2to1 + err1 = b.modifyListing(fctx, b.fs2, b.fs1, listing2, listing1, results2to1, queues, false) } else { err1 = bilib.CopyFileIfExists(newListing1, listing1) } - if changes2 { - _, err2 = b.makeListing(fctx, b.fs2, listing2) + if changes2 { // 1to2 + err2 = b.modifyListing(fctx, b.fs1, b.fs2, listing1, listing2, results1to2, queues, true) } else { err2 = bilib.CopyFileIfExists(newListing2, listing2) } @@ -394,11 +407,15 @@ func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string copy2to1 = append(copy2to1, file) } } + var results2to1 []Results + var results1to2 []Results + var results2to1Dirs []Results + queues := queues{} if len(copy2to1) > 0 { b.indent("Path2", "Path1", "Resync is doing queued copies to") // octx does not have extra filters! - err = b.fastCopy(octx, b.fs2, b.fs1, bilib.ToNames(copy2to1), "resync-copy2to1") + results2to1, err = b.fastCopy(octx, b.fs2, b.fs1, bilib.ToNames(copy2to1), "resync-copy2to1") if err != nil { b.critical = true return err @@ -413,7 +430,7 @@ func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string // prevent overwriting Google Doc files (their size is -1) filterSync.Opt.MinSize = 0 } - if err = sync.CopyDir(ctxSync, b.fs2, b.fs1, b.opt.CreateEmptySrcDirs); err != nil { + if results1to2, err = b.resyncDir(ctxSync, b.fs1, b.fs2); err != nil { b.critical = true return err } @@ -434,20 +451,39 @@ func (b *bisyncRun) resync(octx, fctx context.Context, listing1, listing2 string fs.Infof(nil, "Resynching Path2 to Path1 (for empty dirs)") - //note copy (not sync) and dst comes before src - if err = sync.CopyDir(ctxSync, b.fs1, b.fs2, b.opt.CreateEmptySrcDirs); err != nil { + // note copy (not sync) and dst comes before src + if results2to1Dirs, err = b.resyncDir(ctxSync, b.fs2, b.fs1); err != nil { b.critical = true return err } } fs.Infof(nil, "Resync updating listings") - if _, err = b.makeListing(fctx, b.fs1, listing1); err != nil { + if err := bilib.CopyFileIfExists(newListing1, listing1); err != nil { + return err + } + if err := bilib.CopyFileIfExists(newListing2, listing2); err != nil { + return err + } + + // resync 2to1 + queues.copy2to1 = bilib.ToNames(copy2to1) + if err = b.modifyListing(fctx, b.fs2, b.fs1, listing2, listing1, results2to1, queues, false); err != nil { b.critical = true return err } - if _, err = b.makeListing(fctx, b.fs2, listing2); err != nil { + // resync 1to2 + queues.copy1to2 = bilib.ToNames(filesNow1.list) + if err = b.modifyListing(fctx, b.fs1, b.fs2, listing1, listing2, results1to2, queues, true); err != nil { + b.critical = true + return err + } + + // resync 2to1 (dirs) + dirs2, _ := b.listDirsOnly(2) + queues.copy2to1 = bilib.ToNames(dirs2.list) + if err = b.modifyListing(fctx, b.fs2, b.fs1, listing2, listing1, results2to1Dirs, queues, false); err != nil { b.critical = true return err } diff --git a/cmd/bisync/queue.go b/cmd/bisync/queue.go index d213ff74a..111b8991b 100644 --- a/cmd/bisync/queue.go +++ b/cmd/bisync/queue.go @@ -2,39 +2,187 @@ package bisync import ( "context" + "encoding/json" "fmt" + "io" "sort" + mutex "sync" // renamed as "sync" already in use + "time" "github.com/rclone/rclone/cmd/bisync/bilib" "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/operations" "github.com/rclone/rclone/fs/sync" + "golang.org/x/text/unicode/norm" ) -func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.Names, queueName string) error { +// Results represents a pair of synced files, as reported by the LoggerFn +// Bisync uses this to determine what happened during the sync, and modify the listings accordingly +type Results struct { + Src string + Dst string + Name string + Size int64 + Modtime time.Time + Hash string + Flags string + Sigil operations.Sigil + Err error + Winner operations.Winner + IsWinner bool + IsSrc bool + IsDst bool +} + +var logger = operations.NewLoggerOpt() +var lock mutex.Mutex +var ignoreListingChecksum bool + +// FsPathIfAny handles type assertions and returns a formatted bilib.FsPath if valid, otherwise "" +func FsPathIfAny(x fs.DirEntry) string { + obj, ok := x.(fs.Object) + if x != nil && ok { + return bilib.FsPath(obj.Fs()) + } + return "" +} + +func resultName(result Results, side, src, dst fs.DirEntry) string { + if side != nil { + return side.Remote() + } else if result.IsSrc && dst != nil { + return dst.Remote() + } else if src != nil { + return src.Remote() + } + return "" +} + +// WriteResults is Bisync's LoggerFn +func WriteResults(ctx context.Context, sigil operations.Sigil, src, dst fs.DirEntry, err error) { + lock.Lock() + defer lock.Unlock() + + opt := operations.GetLoggerOpt(ctx) + result := Results{ + Sigil: sigil, + Src: FsPathIfAny(src), + Dst: FsPathIfAny(dst), + Err: err, + } + + result.Winner = operations.WinningSide(ctx, sigil, src, dst, err) + + fss := []fs.DirEntry{src, dst} + for i, side := range fss { + + result.Name = resultName(result, side, src, dst) + result.IsSrc = i == 0 + result.IsDst = i == 1 + result.Flags = "-" + if side != nil { + result.Size = side.Size() + result.Modtime = side.ModTime(ctx).In(time.UTC) + + if !ignoreListingChecksum { + sideObj, ok := side.(fs.ObjectInfo) + if ok { + result.Hash, _ = sideObj.Hash(ctx, sideObj.Fs().Hashes().GetOne()) + } + } + } + result.IsWinner = result.Winner.Obj == side + + // used during resync only + if err == fs.ErrorIsDir { + if src != nil { + result.Src = src.Remote() + result.Name = src.Remote() + } else { + result.Dst = dst.Remote() + result.Name = dst.Remote() + } + result.Flags = "d" + result.Size = 0 + } + + fs.Debugf(nil, "writing result: %v", result) + err := json.NewEncoder(opt.JSON).Encode(result) + if err != nil { + fs.Errorf(result, "Error encoding JSON: %v", err) + } + } +} + +// ReadResults decodes the JSON data from WriteResults +func ReadResults(results io.Reader) []Results { + dec := json.NewDecoder(results) + var slice []Results + for { + var r Results + if err := dec.Decode(&r); err == io.EOF { + break + } + fs.Debugf(nil, "result: %v", r) + slice = append(slice, r) + } + return slice +} + +func (b *bisyncRun) fastCopy(ctx context.Context, fsrc, fdst fs.Fs, files bilib.Names, queueName string) ([]Results, error) { if err := b.saveQueue(files, queueName); err != nil { - return err + return nil, err } ctxCopy, filterCopy := filter.AddConfig(b.opt.setDryRun(ctx)) for _, file := range files.ToList() { if err := filterCopy.AddFile(file); err != nil { - return err + return nil, err + } + // macOS + if err := filterCopy.AddFile(norm.NFD.String(file)); err != nil { + return nil, err } } + ignoreListingChecksum = b.opt.IgnoreListingChecksum + logger.LoggerFn = WriteResults + ctxCopyLogger := operations.WithSyncLogger(ctxCopy, logger) var err error if b.opt.Resync { - err = sync.CopyDir(ctxCopy, fdst, fsrc, b.opt.CreateEmptySrcDirs) + err = sync.CopyDir(ctxCopyLogger, fdst, fsrc, b.opt.CreateEmptySrcDirs) } else { - err = sync.Sync(ctxCopy, fdst, fsrc, b.opt.CreateEmptySrcDirs) + err = sync.Sync(ctxCopyLogger, fdst, fsrc, b.opt.CreateEmptySrcDirs) } - return err + fs.Debugf(nil, "logger is: %v", logger) + + getResults := ReadResults(logger.JSON) + fs.Debugf(nil, "Got %v results for %v", len(getResults), queueName) + + lineFormat := "%s %8d %s %s %s %q\n" + for _, result := range getResults { + fs.Debugf(nil, lineFormat, result.Flags, result.Size, result.Hash, "", result.Modtime, result.Name) + } + + return getResults, err +} + +func (b *bisyncRun) resyncDir(ctx context.Context, fsrc, fdst fs.Fs) ([]Results, error) { + ignoreListingChecksum = b.opt.IgnoreListingChecksum + logger.LoggerFn = WriteResults + ctxCopyLogger := operations.WithSyncLogger(ctx, logger) + err := sync.CopyDir(ctxCopyLogger, fdst, fsrc, b.opt.CreateEmptySrcDirs) + fs.Debugf(nil, "logger is: %v", logger) + + getResults := ReadResults(logger.JSON) + fs.Debugf(nil, "Got %v results for %v", len(getResults), "resync") + + return getResults, err } // operation should be "make" or "remove" -func (b *bisyncRun) syncEmptyDirs(ctx context.Context, dst fs.Fs, candidates bilib.Names, dirsList *fileList, operation string) { +func (b *bisyncRun) syncEmptyDirs(ctx context.Context, dst fs.Fs, candidates bilib.Names, dirsList *fileList, results *[]Results, operation string) { if b.opt.CreateEmptySrcDirs && (!b.opt.Resync || operation == "make") { candidatesList := candidates.ToList() @@ -46,18 +194,50 @@ func (b *bisyncRun) syncEmptyDirs(ctx context.Context, dst fs.Fs, candidates bil for _, s := range candidatesList { var direrr error if dirsList.has(s) { //make sure it's a dir, not a file + r := Results{} + r.Name = s + r.Size = 0 + r.Modtime = dirsList.getTime(s).In(time.UTC) + r.Flags = "d" + r.Err = nil + r.Winner = operations.Winner{ // note: Obj not set + Side: "src", + Err: nil, + } + + rSrc := r + rDst := r + rSrc.IsSrc = true + rSrc.IsDst = false + rDst.IsSrc = false + rDst.IsDst = true + rSrc.IsWinner = true + rDst.IsWinner = false + if operation == "remove" { // directories made empty by the sync will have already been deleted during the sync // this just catches the already-empty ones (excluded from sync by --files-from filter) direrr = operations.TryRmdir(ctx, dst, s) + rSrc.Sigil = operations.MissingOnSrc + rDst.Sigil = operations.MissingOnSrc + rSrc.Dst = s + rDst.Dst = s + rSrc.Winner.Side = "none" + rDst.Winner.Side = "none" } else if operation == "make" { direrr = operations.Mkdir(ctx, dst, s) + rSrc.Sigil = operations.MissingOnDst + rDst.Sigil = operations.MissingOnDst + rSrc.Src = s + rDst.Src = s } else { direrr = fmt.Errorf("invalid operation. Expected 'make' or 'remove', received '%q'", operation) } if direrr != nil { fs.Debugf(nil, "Error syncing directory: %v", direrr) + } else { + *results = append(*results, rSrc, rDst) } } } diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.copy1to2.que b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.copy1to2.que new file mode 100644 index 000000000..a49deb16c --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.copy1to2.que @@ -0,0 +1 @@ +"subdir/file20.txt" diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.copy2to1.que b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.copy2to1.que new file mode 100644 index 000000000..029f3e190 --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.copy2to1.que @@ -0,0 +1 @@ +"file1.txt" diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path1.lst b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path1.lst new file mode 100644 index 000000000..f0208cb9a --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path1.lst @@ -0,0 +1,9 @@ +# bisync listing v1 from test +- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt" +- 19 - - 2001-01-02T00:00:00.000000000+0000 "file1.txt" +- 19 - - 2001-01-02T00:00:00.000000000+0000 "subdir/file20.txt" diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path1.lst-new b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path1.lst-new new file mode 100644 index 000000000..ff7009316 --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path1.lst-new @@ -0,0 +1,9 @@ +# bisync listing v1 from test +- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.txt" +- 19 - - 2001-01-02T00:00:00.000000000+0000 "subdir/file20.txt" diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path2.lst b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path2.lst new file mode 100644 index 000000000..f0208cb9a --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path2.lst @@ -0,0 +1,9 @@ +# bisync listing v1 from test +- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt" +- 19 - - 2001-01-02T00:00:00.000000000+0000 "file1.txt" +- 19 - - 2001-01-02T00:00:00.000000000+0000 "subdir/file20.txt" diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path2.lst-new b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path2.lst-new new file mode 100644 index 000000000..21a22d1e3 --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/_testdir_path1.._testdir_path2.path2.lst-new @@ -0,0 +1,9 @@ +# bisync listing v1 from test +- 109 - - 2000-01-01T00:00:00.000000000+0000 "RCLONE_TEST" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy1.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy2.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy3.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy4.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "file1.copy5.txt" +- 19 - - 2001-01-02T00:00:00.000000000+0000 "file1.txt" +- 0 - - 2000-01-01T00:00:00.000000000+0000 "subdir/file20.txt" diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/golden/test.log b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/test.log new file mode 100644 index 000000000..993e36559 --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/golden/test.log @@ -0,0 +1,39 @@ +(01) : test basic + + +(02) : test initial bisync +(03) : bisync resync +INFO : Synching Path1 "{path1/}" with Path2 "{path2/}" +INFO : Copying unique Path2 files to Path1 +INFO : Resynching Path1 to Path2 +INFO : Resync updating listings +INFO : Bisync successful +(04) : bisync resync ignore-listing-checksum +INFO : Synching Path1 "{path1/}" with Path2 "{path2/}" +INFO : Copying unique Path2 files to Path1 +INFO : Resynching Path1 to Path2 +INFO : Resync updating listings +INFO : Bisync successful + +(05) : test place newer files on both paths + +(06) : touch-copy 2001-01-02 {datadir/}file1.txt {path2/} +(07) : copy-as {datadir/}file1.txt {path1/}subdir file20.txt + +(08) : test bisync run +(09) : bisync ignore-listing-checksum +INFO : Synching Path1 "{path1/}" with Path2 "{path2/}" +INFO : Path1 checking for diffs +INFO : - Path1 File is newer - subdir/file20.txt +INFO : Path1: 1 changes: 0 new, 1 newer, 0 older, 0 deleted +INFO : Path2 checking for diffs +INFO : - Path2 File is newer - file1.txt +INFO : Path2: 1 changes: 0 new, 1 newer, 0 older, 0 deleted +INFO : Applying changes +INFO : - Path1 Queue copy to Path2 - {path2/}subdir/file20.txt +INFO : - Path2 Queue copy to Path1 - {path1/}file1.txt +INFO : - Path2 Do queued copies to - Path1 +INFO : - Path1 Do queued copies to - Path2 +INFO : Updating listings +INFO : Validating listings for Path1 "{path1/}" vs Path2 "{path2/}" +INFO : Bisync successful diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/RCLONE_TEST b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/RCLONE_TEST new file mode 100644 index 000000000..d8ca97c2a --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/RCLONE_TEST @@ -0,0 +1 @@ +This file is used for testing the health of rclone accesses to the local/remote file system. Do not delete. diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy1.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy1.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy2.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy2.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy3.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy3.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy4.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy4.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy5.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.copy5.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/file1.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/initial/subdir/file20.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/initial/subdir/file20.txt new file mode 100644 index 000000000..e69de29bb diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/modfiles/file1.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/modfiles/file1.txt new file mode 100644 index 000000000..464147f09 --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/modfiles/file1.txt @@ -0,0 +1 @@ +This file is newer diff --git a/cmd/bisync/testdata/test_ignorelistingchecksum/scenario.txt b/cmd/bisync/testdata/test_ignorelistingchecksum/scenario.txt new file mode 100644 index 000000000..b4bd11e34 --- /dev/null +++ b/cmd/bisync/testdata/test_ignorelistingchecksum/scenario.txt @@ -0,0 +1,14 @@ +test basic +# Simple test case for development + +test initial bisync +bisync resync +bisync resync ignore-listing-checksum + +test place newer files on both paths +# force specific modification time since file time is lost through git +touch-copy 2001-01-02 {datadir/}file1.txt {path2/} +copy-as {datadir/}file1.txt {path1/}subdir file20.txt + +test bisync run +bisync ignore-listing-checksum diff --git a/docs/content/bisync.md b/docs/content/bisync.md index 44bfc58c8..e010eba02 100644 --- a/docs/content/bisync.md +++ b/docs/content/bisync.md @@ -277,7 +277,39 @@ sync run times for very large numbers of files. The check may be run manually with `--check-sync=only`. It runs only the integrity check and terminates without actually synching. -See also: [Concurrent modifications](#concurrent-modifications) +Note that currently, `--check-sync` **only checks filenames and NOT modtime, size, or hash.** +For a more robust integrity check of the current state, consider using [`check`](commands/rclone_check/) +(or [`cryptcheck`](/commands/rclone_cryptcheck/), if at least one path is a `crypt` remote.) +For example, a possible sequence could look like this: + +1. Normally scheduled bisync run: + +``` +rclone bisync Path1 Path2 -MPc --check-access --max-delete 10 --filters-file /path/to/filters.txt -v --no-cleanup --ignore-listing-checksum --disable ListR --checkers=16 --drive-pacer-min-sleep=10ms --create-empty-src-dirs --resilient +``` + +2. Periodic independent integrity check (perhaps scheduled nightly or weekly): + +``` +rclone check -MvPc Path1 Path2 --filter-from /path/to/filters.txt +``` + +3. If diffs are found, you have some choices to correct them. +If one side is more up-to-date and you want to make the other side match it, you could run: + +``` +rclone sync Path1 Path2 --filter-from /path/to/filters.txt --create-empty-src-dirs -MPc -v +``` +(or switch Path1 and Path2 to make Path2 the source-of-truth) + +Or, if neither side is totally up-to-date, you could run a `--resync` to bring them back into agreement +(but remember that this could cause deleted files to re-appear.) + +*Note also that `rclone check` does not currently include empty directories, +so if you want to know if any empty directories are out of sync, +consider alternatively running the above `rclone sync` command with `--dry-run` added. + +See also: [Concurrent modifications](#concurrent-modifications), [`--resilient`](#resilient) #### --ignore-listing-checksum @@ -299,13 +331,6 @@ if there ARE diffs. * Unless `--ignore-listing-checksum` is passed, bisync currently computes hashes for one path *even when there's no common hash with the other path* (for example, a [crypt](/crypt/#modification-times-and-hashes) remote.) -* If both paths support checksums and have a common hash, -AND `--ignore-listing-checksum` was not specified when creating the listings, -`--check-sync=only` can be used to compare Path1 vs. Path2 checksums (as of the time the previous listings were created.) -However, `--check-sync=only` will NOT include checksums if the previous listings -were generated on a run using `--ignore-listing-checksum`. For a more robust integrity check of the current state, -consider using [`check`](commands/rclone_check/) -(or [`cryptcheck`](/commands/rclone_cryptcheck/), if at least one path is a `crypt` remote.) #### --resilient @@ -495,44 +520,18 @@ original path on the next sync, resulting in data loss. It is therefore recommended to _omit_ `--inplace`. Files that **change during** a bisync run may result in data loss. -This has been seen in a highly dynamic environment, where the filesystem -is getting hammered by running processes during the sync. -The currently recommended solution is to sync at quiet times or [filter out](#filtering) -unnecessary directories and files. - -As an [alternative approach](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=scans%2C%20to%20avoid-,errors%20if%20files%20changed%20during%20sync,-Given%20the%20number), -consider using `--check-sync=false` (and possibly `--resilient`) to make bisync more forgiving -of filesystems that change during the sync. -Be advised that this may cause bisync to miss events that occur during a bisync run, -so it is a good idea to supplement this with a periodic independent integrity check, -and corrective sync if diffs are found. For example, a possible sequence could look like this: - -1. Normally scheduled bisync run: - -``` -rclone bisync Path1 Path2 -MPc --check-access --max-delete 10 --filters-file /path/to/filters.txt -v --check-sync=false --no-cleanup --ignore-listing-checksum --disable ListR --checkers=16 --drive-pacer-min-sleep=10ms --create-empty-src-dirs --resilient -``` - -2. Periodic independent integrity check (perhaps scheduled nightly or weekly): - -``` -rclone check -MvPc Path1 Path2 --filter-from /path/to/filters.txt -``` - -3. If diffs are found, you have some choices to correct them. -If one side is more up-to-date and you want to make the other side match it, you could run: - -``` -rclone sync Path1 Path2 --filter-from /path/to/filters.txt --create-empty-src-dirs -MPc -v -``` -(or switch Path1 and Path2 to make Path2 the source-of-truth) - -Or, if neither side is totally up-to-date, you could run a `--resync` to bring them back into agreement -(but remember that this could cause deleted files to re-appear.) - -*Note also that `rclone check` does not currently include empty directories, -so if you want to know if any empty directories are out of sync, -consider alternatively running the above `rclone sync` command with `--dry-run` added. +Prior to `rclone v1.65`, this was commonly seen in highly dynamic environments, where the filesystem +was getting hammered by running processes during the sync. +As of `rclone v1.65`, bisync was redesigned to use a "snapshot" model, +greatly reducing the risks from changes during a sync. +Changes that are not detected during the current sync will now be detected during the following sync, +and will no longer cause the entire run to throw a critical error. +There is additionally a mechanism to mark files as needing to be internally rechecked next time, for added safety. +It should therefore no longer be necessary to sync only at quiet times -- +however, note that an error can still occur if a file happens to change at the exact moment it's +being read/written by bisync (same as would happen in `rclone sync`.) +(See also: [`--ignore-checksum`](https://rclone.org/docs/#ignore-checksum), +[`--local-no-check-updated`](https://rclone.org/local/#local-no-check-updated)) ### Empty directories @@ -1273,6 +1272,8 @@ about _Unison_ and synchronization in general. * Copies and deletes are now handled in one operation instead of two * `--track-renames` and `--backup-dir` are now supported * Partial uploads known issue on `local`/`ftp`/`sftp` has been resolved (unless using `--inplace`) +* Final listings are now generated from sync results, to avoid needing to re-list +* Bisync is now much more resilient to changes that happen during a bisync run, and far less prone to critical errors / undetected changes ### `v1.64` * Fixed an [issue](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Dry%20runs%20are%20not%20completely%20dry)