diff --git a/docs/content/docs.md b/docs/content/docs.md index 6d7cd4dd0..2d64733e3 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -908,6 +908,20 @@ changed and won't need copying then you shouldn't use `--no-traverse`. See [rclone copy](/commands/rclone_copy/) for an example of how to use it. +### --no-unicode-normalization ### + +Don't normalize unicode characters in filenames during the sync routine. + +Sometimes, an operating system will store filenames containing unicode +parts in their decomposed form (particularly macOS). Some cloud storage +systems will then recompose the unicode, resulting in duplicate files if +the data is ever copied back to a local filesystem. + +Using this flag will disable that functionality, treating each unicode +character as unique. For example, by default é and é will be normalized +into the same character. With `--no-unicode-normalization` they will be +treated as unique characters. + ### --no-update-modtime ### When using this flag, rclone won't update modification times of remote diff --git a/fs/config.go b/fs/config.go index 43f63ca03..2340f3acb 100644 --- a/fs/config.go +++ b/fs/config.go @@ -70,6 +70,7 @@ type ConfigInfo struct { IgnoreCaseSync bool NoTraverse bool NoCheckDest bool + NoUnicodeNormalization bool NoUpdateModTime bool DataRateUnit string CompareDest string diff --git a/fs/config/configflags/configflags.go b/fs/config/configflags/configflags.go index 9233b5813..b66cd421b 100644 --- a/fs/config/configflags/configflags.go +++ b/fs/config/configflags/configflags.go @@ -75,6 +75,7 @@ func AddFlags(flagSet *pflag.FlagSet) { flags.BoolVarP(flagSet, &fs.Config.IgnoreCaseSync, "ignore-case-sync", "", fs.Config.IgnoreCaseSync, "Ignore case when synchronizing") flags.BoolVarP(flagSet, &fs.Config.NoTraverse, "no-traverse", "", fs.Config.NoTraverse, "Don't traverse destination file system on copy.") flags.BoolVarP(flagSet, &fs.Config.NoCheckDest, "no-check-dest", "", fs.Config.NoCheckDest, "Don't check the destination, copy regardless.") + flags.BoolVarP(flagSet, &fs.Config.NoUnicodeNormalization, "no-unicode-normalization", "", fs.Config.NoUnicodeNormalization, "Don't normalize unicode characters in filenames.") flags.BoolVarP(flagSet, &fs.Config.NoUpdateModTime, "no-update-modtime", "", fs.Config.NoUpdateModTime, "Don't update destination mod-time if files identical.") flags.StringVarP(flagSet, &fs.Config.CompareDest, "compare-dest", "", fs.Config.CompareDest, "Include additional server-side path during comparison.") flags.StringVarP(flagSet, &fs.Config.CopyDest, "copy-dest", "", fs.Config.CopyDest, "Implies --compare-dest but also copies files from path into destination.") diff --git a/fs/march/march.go b/fs/march/march.go index cf50812b0..0b8de9a75 100644 --- a/fs/march/march.go +++ b/fs/march/march.go @@ -22,15 +22,16 @@ import ( // calling Callback for each match type March struct { // parameters - Ctx context.Context // context for background goroutines - Fdst fs.Fs // source Fs - Fsrc fs.Fs // dest Fs - Dir string // directory - NoTraverse bool // don't traverse the destination - SrcIncludeAll bool // don't include all files in the src - DstIncludeAll bool // don't include all files in the destination - Callback Marcher // object to call with results - NoCheckDest bool // transfer all objects regardless without checking dst + Ctx context.Context // context for background goroutines + Fdst fs.Fs // source Fs + Fsrc fs.Fs // dest Fs + Dir string // directory + NoTraverse bool // don't traverse the destination + SrcIncludeAll bool // don't include all files in the src + DstIncludeAll bool // don't include all files in the destination + Callback Marcher // object to call with results + NoCheckDest bool // transfer all objects regardless without checking dst + NoUnicodeNormalization bool // don't normalize unicode characters in filenames // internal state srcListDir listDirFn // function to call to list a directory in the src dstListDir listDirFn // function to call to list a directory in the dst @@ -55,7 +56,9 @@ func (m *March) init() { } // Now create the matching transform // ..normalise the UTF8 first - m.transforms = append(m.transforms, norm.NFC.String) + if !m.NoUnicodeNormalization { + m.transforms = append(m.transforms, norm.NFC.String) + } // ..if destination is caseInsensitive then make it lower case // case Insensitive | src | dst | lower case compare | // | No | No | No | diff --git a/fs/march/march_test.go b/fs/march/march_test.go index 0a0ec940b..6d257fbdd 100644 --- a/fs/march/march_test.go +++ b/fs/march/march_test.go @@ -19,6 +19,7 @@ import ( "github.com/rclone/rclone/fstest/mockobject" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/text/unicode/norm" ) // Some times used in the tests @@ -313,6 +314,8 @@ func TestMatchListings(t *testing.T) { b = mockobject.Object("b") c = mockobject.Object("c") d = mockobject.Object("d") + uE1 = mockobject.Object("é") // one of the unicode E characters + uE2 = mockobject.Object("é") // a different unicode E character dirA = mockdir.New("A") dirb = mockdir.New("b") ) @@ -419,6 +422,28 @@ func TestMatchListings(t *testing.T) { }, transforms: []matchTransformFn{strings.ToLower}, }, + { + what: "Unicode near-duplicate that becomes duplicate with normalization", + input: fs.DirEntries{ + uE1, uE1, + uE2, uE2, + }, + matches: []matchPair{ + {uE1, uE1}, + }, + transforms: []matchTransformFn{norm.NFC.String}, + }, + { + what: "Unicode near-duplicate with no normalization", + input: fs.DirEntries{ + uE1, uE1, + uE2, uE2, + }, + matches: []matchPair{ + {uE1, uE1}, + {uE2, uE2}, + }, + }, { what: "File and directory are not duplicates - srcOnly", input: fs.DirEntries{ diff --git a/fs/sync/sync.go b/fs/sync/sync.go index 51fadd850..d392aabed 100644 --- a/fs/sync/sync.go +++ b/fs/sync/sync.go @@ -30,42 +30,43 @@ type syncCopyMove struct { deleteEmptySrcDirs bool dir string // internal state - ctx context.Context // internal context for controlling go-routines - cancel func() // cancel the context - noTraverse bool // if set don't traverse the dst - noCheckDest bool // if set transfer all objects regardless without checking dst - deletersWg sync.WaitGroup // for delete before go routine - deleteFilesCh chan fs.Object // channel to receive deletes if delete before - trackRenames bool // set if we should do server side renames - trackRenamesStrategy trackRenamesStrategy // stratgies used for tracking renames - dstFilesMu sync.Mutex // protect dstFiles - dstFiles map[string]fs.Object // dst files, always filled - srcFiles map[string]fs.Object // src files, only used if deleteBefore - srcFilesChan chan fs.Object // passes src objects - srcFilesResult chan error // error result of src listing - dstFilesResult chan error // error result of dst listing - dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs - dstEmptyDirs map[string]fs.DirEntry // potentially empty directories - srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs - srcEmptyDirs map[string]fs.DirEntry // potentially empty directories - checkerWg sync.WaitGroup // wait for checkers - toBeChecked *pipe // checkers channel - transfersWg sync.WaitGroup // wait for transfers - toBeUploaded *pipe // copiers channel - errorMu sync.Mutex // Mutex covering the errors variables - err error // normal error from copy process - noRetryErr error // error with NoRetry set - fatalErr error // fatal error - commonHash hash.Type // common hash type between src and dst - renameMapMu sync.Mutex // mutex to protect the below - renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames - renamerWg sync.WaitGroup // wait for renamers - toBeRenamed *pipe // renamers channel - trackRenamesWg sync.WaitGroup // wg for background track renames - trackRenamesCh chan fs.Object // objects are pumped in here - renameCheck []fs.Object // accumulate files to check for rename here - compareCopyDest fs.Fs // place to check for files to server side copy - backupDir fs.Fs // place to store overwrites/deletes + ctx context.Context // internal context for controlling go-routines + cancel func() // cancel the context + noTraverse bool // if set don't traverse the dst + noCheckDest bool // if set transfer all objects regardless without checking dst + noUnicodeNormalization bool // don't normalize unicode characters in filenames + deletersWg sync.WaitGroup // for delete before go routine + deleteFilesCh chan fs.Object // channel to receive deletes if delete before + trackRenames bool // set if we should do server side renames + trackRenamesStrategy trackRenamesStrategy // stratgies used for tracking renames + dstFilesMu sync.Mutex // protect dstFiles + dstFiles map[string]fs.Object // dst files, always filled + srcFiles map[string]fs.Object // src files, only used if deleteBefore + srcFilesChan chan fs.Object // passes src objects + srcFilesResult chan error // error result of src listing + dstFilesResult chan error // error result of dst listing + dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs + dstEmptyDirs map[string]fs.DirEntry // potentially empty directories + srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs + srcEmptyDirs map[string]fs.DirEntry // potentially empty directories + checkerWg sync.WaitGroup // wait for checkers + toBeChecked *pipe // checkers channel + transfersWg sync.WaitGroup // wait for transfers + toBeUploaded *pipe // copiers channel + errorMu sync.Mutex // Mutex covering the errors variables + err error // normal error from copy process + noRetryErr error // error with NoRetry set + fatalErr error // fatal error + commonHash hash.Type // common hash type between src and dst + renameMapMu sync.Mutex // mutex to protect the below + renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames + renamerWg sync.WaitGroup // wait for renamers + toBeRenamed *pipe // renamers channel + trackRenamesWg sync.WaitGroup // wg for background track renames + trackRenamesCh chan fs.Object // objects are pumped in here + renameCheck []fs.Object // accumulate files to check for rename here + compareCopyDest fs.Fs // place to check for files to server side copy + backupDir fs.Fs // place to store overwrites/deletes } type trackRenamesStrategy byte @@ -88,24 +89,25 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete return nil, fserrors.FatalError(fs.ErrorOverlapping) } s := &syncCopyMove{ - fdst: fdst, - fsrc: fsrc, - deleteMode: deleteMode, - DoMove: DoMove, - copyEmptySrcDirs: copyEmptySrcDirs, - deleteEmptySrcDirs: deleteEmptySrcDirs, - dir: "", - srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers), - srcFilesResult: make(chan error, 1), - dstFilesResult: make(chan error, 1), - dstEmptyDirs: make(map[string]fs.DirEntry), - srcEmptyDirs: make(map[string]fs.DirEntry), - noTraverse: fs.Config.NoTraverse, - noCheckDest: fs.Config.NoCheckDest, - deleteFilesCh: make(chan fs.Object, fs.Config.Checkers), - trackRenames: fs.Config.TrackRenames, - commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(), - trackRenamesCh: make(chan fs.Object, fs.Config.Checkers), + fdst: fdst, + fsrc: fsrc, + deleteMode: deleteMode, + DoMove: DoMove, + copyEmptySrcDirs: copyEmptySrcDirs, + deleteEmptySrcDirs: deleteEmptySrcDirs, + dir: "", + srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers), + srcFilesResult: make(chan error, 1), + dstFilesResult: make(chan error, 1), + dstEmptyDirs: make(map[string]fs.DirEntry), + srcEmptyDirs: make(map[string]fs.DirEntry), + noTraverse: fs.Config.NoTraverse, + noCheckDest: fs.Config.NoCheckDest, + noUnicodeNormalization: fs.Config.NoUnicodeNormalization, + deleteFilesCh: make(chan fs.Object, fs.Config.Checkers), + trackRenames: fs.Config.TrackRenames, + commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(), + trackRenamesCh: make(chan fs.Object, fs.Config.Checkers), } var err error s.toBeChecked, err = newPipe(fs.Config.OrderBy, accounting.Stats(ctx).SetCheckQueue, fs.Config.MaxBacklog) @@ -782,14 +784,15 @@ func (s *syncCopyMove) run() error { // set up a march over fdst and fsrc m := &march.March{ - Ctx: s.ctx, - Fdst: s.fdst, - Fsrc: s.fsrc, - Dir: s.dir, - NoTraverse: s.noTraverse, - Callback: s, - DstIncludeAll: filter.Active.Opt.DeleteExcluded, - NoCheckDest: s.noCheckDest, + Ctx: s.ctx, + Fdst: s.fdst, + Fsrc: s.fsrc, + Dir: s.dir, + NoTraverse: s.noTraverse, + Callback: s, + DstIncludeAll: filter.Active.Opt.DeleteExcluded, + NoCheckDest: s.noCheckDest, + NoUnicodeNormalization: s.noUnicodeNormalization, } s.processError(m.Run())