rclone/cmd/bisync/march.go
nielash 9c96c13a35 bisync: optimize --resync performance -- partially addresses #5681
Before this change, --resync was handled in three steps, and needed to do a lot
of unnecessary work to implement its own --ignore-existing logic, which also
caused problems with unicode normalization, in addition to being pretty slow.
After this change, it is refactored to produce the same result much more
efficiently, by reducing the three steps to two and letting ci.IgnoreExisting
do the work instead of reinventing the wheel.

The behavior and sync order remain unchanged for now -- just faster (but see
the ongoing lively discussions about potential future changes in #5681!)
2024-01-20 14:50:08 -05:00

221 lines
5.2 KiB
Go

package bisync
import (
"context"
"sync"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/march"
)
var ls1 = newFileList()
var ls2 = newFileList()
var err error
var firstErr error
var marchLsLock sync.Mutex
var marchErrLock sync.Mutex
var marchCtx context.Context
func (b *bisyncRun) makeMarchListing(ctx context.Context) (*fileList, *fileList, error) {
ci := fs.GetConfig(ctx)
marchCtx = ctx
b.setupListing()
fs.Debugf(b, "starting to march!")
// set up a march over fdst (Path2) and fsrc (Path1)
m := &march.March{
Ctx: ctx,
Fdst: b.fs2,
Fsrc: b.fs1,
Dir: "",
NoTraverse: false,
Callback: b,
DstIncludeAll: false,
NoCheckDest: false,
NoUnicodeNormalization: ci.NoUnicodeNormalization,
}
err = m.Run(ctx)
fs.Debugf(b, "march completed. err: %v", err)
if err == nil {
err = firstErr
}
if err != nil {
b.abort = true
}
// save files
err = ls1.save(ctx, b.newListing1)
if err != nil {
b.abort = true
}
err = ls2.save(ctx, b.newListing2)
if err != nil {
b.abort = true
}
return ls1, ls2, err
}
// SrcOnly have an object which is on path1 only
func (b *bisyncRun) SrcOnly(o fs.DirEntry) (recurse bool) {
fs.Debugf(o, "path1 only")
b.parse(o, true)
return isDir(o)
}
// DstOnly have an object which is on path2 only
func (b *bisyncRun) DstOnly(o fs.DirEntry) (recurse bool) {
fs.Debugf(o, "path2 only")
b.parse(o, false)
return isDir(o)
}
// Match is called when object exists on both path1 and path2 (whether equal or not)
func (b *bisyncRun) Match(ctx context.Context, o2, o1 fs.DirEntry) (recurse bool) {
fs.Debugf(o1, "both path1 and path2")
b.parse(o1, true)
b.parse(o2, false)
return isDir(o1)
}
func isDir(e fs.DirEntry) bool {
switch x := e.(type) {
case fs.Object:
fs.Debugf(x, "is Object")
return false
case fs.Directory:
fs.Debugf(x, "is Dir")
return true
default:
fs.Debugf(e, "is unknown")
}
return false
}
func (b *bisyncRun) parse(e fs.DirEntry, isPath1 bool) {
switch x := e.(type) {
case fs.Object:
b.ForObject(x, isPath1)
case fs.Directory:
if b.opt.CreateEmptySrcDirs {
b.ForDir(x, isPath1)
}
default:
fs.Debugf(e, "is unknown")
}
}
func (b *bisyncRun) setupListing() {
ls1 = newFileList()
ls2 = newFileList()
hashType1 := hash.None
hashType2 := hash.None
if !b.opt.IgnoreListingChecksum {
// Currently bisync just honors --ignore-listing-checksum
// (note that this is different from --ignore-checksum)
// TODO add full support for checksums and related flags
hashType1 = b.fs1.Hashes().GetOne()
hashType2 = b.fs2.Hashes().GetOne()
}
ls1.hash = hashType1
ls2.hash = hashType2
}
func (b *bisyncRun) ForObject(o fs.Object, isPath1 bool) {
tr := accounting.Stats(marchCtx).NewCheckingTransfer(o, "listing file - "+whichPath(isPath1))
defer func() {
tr.Done(marchCtx, nil)
}()
var (
hashVal string
hashErr error
)
ls := whichLs(isPath1)
hashType := ls.hash
if hashType != hash.None {
hashVal, hashErr = o.Hash(marchCtx, hashType)
marchErrLock.Lock()
if firstErr == nil {
firstErr = hashErr
}
marchErrLock.Unlock()
}
time := o.ModTime(marchCtx).In(TZ)
id := "" // TODO
flags := "-" // "-" for a file and "d" for a directory
marchLsLock.Lock()
ls.put(o.Remote(), o.Size(), time, hashVal, id, flags)
marchLsLock.Unlock()
}
func (b *bisyncRun) ForDir(o fs.Directory, isPath1 bool) {
tr := accounting.Stats(marchCtx).NewCheckingTransfer(o, "listing dir - "+whichPath(isPath1))
defer func() {
tr.Done(marchCtx, nil)
}()
ls := whichLs(isPath1)
time := o.ModTime(marchCtx).In(TZ)
id := "" // TODO
flags := "d" // "-" for a file and "d" for a directory
marchLsLock.Lock()
//record size as 0 instead of -1, so bisync doesn't think it's a google doc
ls.put(o.Remote(), 0, time, "", id, flags)
marchLsLock.Unlock()
}
func whichLs(isPath1 bool) *fileList {
ls := ls1
if !isPath1 {
ls = ls2
}
return ls
}
func whichPath(isPath1 bool) string {
s := "Path1"
if !isPath1 {
s = "Path2"
}
return s
}
func (b *bisyncRun) findCheckFiles(ctx context.Context) (*fileList, *fileList, error) {
ctxCheckFile, filterCheckFile := filter.AddConfig(ctx)
b.handleErr(b.opt.CheckFilename, "error adding CheckFilename to filter", filterCheckFile.Add(true, b.opt.CheckFilename), true, true)
b.handleErr(b.opt.CheckFilename, "error adding ** exclusion to filter", filterCheckFile.Add(false, "**"), true, true)
ci := fs.GetConfig(ctxCheckFile)
marchCtx = ctxCheckFile
b.setupListing()
fs.Debugf(b, "starting to march!")
// set up a march over fdst (Path2) and fsrc (Path1)
m := &march.March{
Ctx: ctxCheckFile,
Fdst: b.fs2,
Fsrc: b.fs1,
Dir: "",
NoTraverse: false,
Callback: b,
DstIncludeAll: false,
NoCheckDest: false,
NoUnicodeNormalization: ci.NoUnicodeNormalization,
}
err = m.Run(ctxCheckFile)
fs.Debugf(b, "march completed. err: %v", err)
if err == nil {
err = firstErr
}
if err != nil {
b.abort = true
}
return ls1, ls2, err
}