fs: Add --track-renames-strategy for configurable matching criteria for --track-renames

This commit adds the `--track-renames-strategy` flag which allows the
user to choose the strategy for tracking renames when using the
`--track-renames` flag.

This can be "hash" or "modtime" or both currently.

This, when used with `--track-renames-strategy modtime` enables
support for tracking renames in encrypted remotes.

Fixes #3696
Fixes #2721
This commit is contained in:
Bernd Schoolmann 2020-03-20 14:04:56 +01:00 committed by GitHub
parent 36717c7d98
commit 158870bcdb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 217 additions and 67 deletions

View file

@ -6,6 +6,7 @@ import (
"fmt"
"path"
"sort"
"strings"
"sync"
"time"
@ -29,41 +30,42 @@ type syncCopyMove struct {
deleteEmptySrcDirs bool
dir string
// internal state
ctx context.Context // internal context for controlling go-routines
cancel func() // cancel the context
noTraverse bool // if set don't traverse the dst
noCheckDest bool // if set transfer all objects regardless without checking dst
deletersWg sync.WaitGroup // for delete before go routine
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
trackRenames bool // set if we should do server side renames
dstFilesMu sync.Mutex // protect dstFiles
dstFiles map[string]fs.Object // dst files, always filled
srcFiles map[string]fs.Object // src files, only used if deleteBefore
srcFilesChan chan fs.Object // passes src objects
srcFilesResult chan error // error result of src listing
dstFilesResult chan error // error result of dst listing
dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs
dstEmptyDirs map[string]fs.DirEntry // potentially empty directories
srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs
srcEmptyDirs map[string]fs.DirEntry // potentially empty directories
checkerWg sync.WaitGroup // wait for checkers
toBeChecked *pipe // checkers channel
transfersWg sync.WaitGroup // wait for transfers
toBeUploaded *pipe // copiers channel
errorMu sync.Mutex // Mutex covering the errors variables
err error // normal error from copy process
noRetryErr error // error with NoRetry set
fatalErr error // fatal error
commonHash hash.Type // common hash type between src and dst
renameMapMu sync.Mutex // mutex to protect the below
renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames
renamerWg sync.WaitGroup // wait for renamers
toBeRenamed *pipe // renamers channel
trackRenamesWg sync.WaitGroup // wg for background track renames
trackRenamesCh chan fs.Object // objects are pumped in here
renameCheck []fs.Object // accumulate files to check for rename here
compareCopyDest fs.Fs // place to check for files to server side copy
backupDir fs.Fs // place to store overwrites/deletes
ctx context.Context // internal context for controlling go-routines
cancel func() // cancel the context
noTraverse bool // if set don't traverse the dst
noCheckDest bool // if set transfer all objects regardless without checking dst
deletersWg sync.WaitGroup // for delete before go routine
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
trackRenames bool // set if we should do server side renames
trackRenamesStrategy []string // stratgies used for tracking renames
dstFilesMu sync.Mutex // protect dstFiles
dstFiles map[string]fs.Object // dst files, always filled
srcFiles map[string]fs.Object // src files, only used if deleteBefore
srcFilesChan chan fs.Object // passes src objects
srcFilesResult chan error // error result of src listing
dstFilesResult chan error // error result of dst listing
dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs
dstEmptyDirs map[string]fs.DirEntry // potentially empty directories
srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs
srcEmptyDirs map[string]fs.DirEntry // potentially empty directories
checkerWg sync.WaitGroup // wait for checkers
toBeChecked *pipe // checkers channel
transfersWg sync.WaitGroup // wait for transfers
toBeUploaded *pipe // copiers channel
errorMu sync.Mutex // Mutex covering the errors variables
err error // normal error from copy process
noRetryErr error // error with NoRetry set
fatalErr error // fatal error
commonHash hash.Type // common hash type between src and dst
renameMapMu sync.Mutex // mutex to protect the below
renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames
renamerWg sync.WaitGroup // wait for renamers
toBeRenamed *pipe // renamers channel
trackRenamesWg sync.WaitGroup // wg for background track renames
trackRenamesCh chan fs.Object // objects are pumped in here
renameCheck []fs.Object // accumulate files to check for rename here
compareCopyDest fs.Fs // place to check for files to server side copy
backupDir fs.Fs // place to store overwrites/deletes
}
func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.DeleteMode, DoMove bool, deleteEmptySrcDirs bool, copyEmptySrcDirs bool) (*syncCopyMove, error) {
@ -71,24 +73,25 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete
return nil, fserrors.FatalError(fs.ErrorOverlapping)
}
s := &syncCopyMove{
fdst: fdst,
fsrc: fsrc,
deleteMode: deleteMode,
DoMove: DoMove,
copyEmptySrcDirs: copyEmptySrcDirs,
deleteEmptySrcDirs: deleteEmptySrcDirs,
dir: "",
srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers),
srcFilesResult: make(chan error, 1),
dstFilesResult: make(chan error, 1),
dstEmptyDirs: make(map[string]fs.DirEntry),
srcEmptyDirs: make(map[string]fs.DirEntry),
noTraverse: fs.Config.NoTraverse,
noCheckDest: fs.Config.NoCheckDest,
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
trackRenames: fs.Config.TrackRenames,
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
trackRenamesCh: make(chan fs.Object, fs.Config.Checkers),
fdst: fdst,
fsrc: fsrc,
deleteMode: deleteMode,
DoMove: DoMove,
copyEmptySrcDirs: copyEmptySrcDirs,
deleteEmptySrcDirs: deleteEmptySrcDirs,
dir: "",
srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers),
srcFilesResult: make(chan error, 1),
dstFilesResult: make(chan error, 1),
dstEmptyDirs: make(map[string]fs.DirEntry),
srcEmptyDirs: make(map[string]fs.DirEntry),
noTraverse: fs.Config.NoTraverse,
noCheckDest: fs.Config.NoCheckDest,
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
trackRenames: fs.Config.TrackRenames,
trackRenamesStrategy: strings.Split(fs.Config.TrackRenamesStrategy, ","),
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
trackRenamesCh: make(chan fs.Object, fs.Config.Checkers),
}
var err error
s.toBeChecked, err = newPipe(fs.Config.OrderBy, accounting.Stats(ctx).SetCheckQueue, fs.Config.MaxBacklog)
@ -132,10 +135,16 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete
fs.Errorf(fdst, "Ignoring --track-renames as the destination does not support server-side move or copy")
s.trackRenames = false
}
if s.commonHash == hash.None {
if s.commonHash == hash.None && isUsingRenameStrategy("hash", s.trackRenamesStrategy) {
fs.Errorf(fdst, "Ignoring --track-renames as the source and destination do not have a common hash")
s.trackRenames = false
}
if fs.GetModifyWindow(fsrc, fdst) == fs.ModTimeNotSupported && isUsingRenameStrategy("modtime", s.trackRenamesStrategy) {
fs.Errorf(fdst, "Ignoring --track-renames as either the source or destination do not support modtime")
s.trackRenames = false
}
if s.deleteMode == fs.DeleteModeOff {
fs.Errorf(fdst, "Ignoring --track-renames as it doesn't work with copy or move, only sync")
s.trackRenames = false
@ -560,20 +569,53 @@ func (s *syncCopyMove) srcParentDirCheck(entry fs.DirEntry) {
}
}
// renameHash makes a string with the size and the hash for rename detection
// isUsingRenameStrategy checks if a strategy is included in a list of strategies
func isUsingRenameStrategy(strategy string, strategies []string) bool {
for _, s := range strategies {
if s == strategy {
return true
}
}
return false
}
// renameID makes a string with the size and the other identifiers of the requested rename strategies
//
// it may return an empty string in which case no hash could be made
func (s *syncCopyMove) renameHash(obj fs.Object) (hash string) {
var err error
hash, err = obj.Hash(s.ctx, s.commonHash)
if err != nil {
fs.Debugf(obj, "Hash failed: %v", err)
return ""
func (s *syncCopyMove) renameID(obj fs.Object, renameStrategy []string, precision time.Duration) string {
var builder strings.Builder
fmt.Fprintf(&builder, "%d", obj.Size())
if isUsingRenameStrategy("hash", renameStrategy) {
var err error
hash, err := obj.Hash(s.ctx, s.commonHash)
if err != nil {
fs.Debugf(obj, "Hash failed: %v", err)
return ""
}
if hash == "" {
return ""
}
fmt.Fprintf(&builder, ",%s", hash)
}
if hash == "" {
return ""
if isUsingRenameStrategy("modtime", renameStrategy) {
modTime := obj.ModTime(s.ctx)
divisor := precision.Nanoseconds() / 2
rounding := divisor / 2
if rounding > 0 {
rounding--
}
timeHash := (modTime.Unix()*time.Nanosecond.Nanoseconds() + int64(modTime.Nanosecond()) + rounding)
fmt.Fprintf(&builder, ",%d", timeHash)
}
return fmt.Sprintf("%d,%s", obj.Size(), hash)
return builder.String()
}
// pushRenameMap adds the object with hash to the rename map
@ -626,10 +668,12 @@ func (s *syncCopyMove) makeRenameMap() {
// only create hash for dst fs.Object if its size could match
if _, found := possibleSizes[obj.Size()]; found {
tr := accounting.Stats(s.ctx).NewCheckingTransfer(obj)
hash := s.renameHash(obj)
hash := s.renameID(obj, s.trackRenamesStrategy, fs.GetModifyWindow(s.fsrc, s.fdst))
if hash != "" {
s.pushRenameMap(hash, obj)
}
tr.Done(nil)
}
}
@ -643,7 +687,8 @@ func (s *syncCopyMove) makeRenameMap() {
// possible, it returns true if the object was renamed.
func (s *syncCopyMove) tryRename(src fs.Object) bool {
// Calculate the hash of the src object
hash := s.renameHash(src)
hash := s.renameID(src, s.trackRenamesStrategy, fs.GetModifyWindow(s.fsrc, s.fdst))
if hash == "" {
return false
}