march: added flag to allow Unicode filenames to remain unique
If your filenames contain two near-identical Unicode characters, rclone will normalize these, making them identical. This flag gives you the ability to keep them unique. This might create unintended side effects, such as duplicating files that contain certain Unicode characters, when downloading them from certain cloud providers to a macOS filesystem. Fixes #4228
This commit is contained in:
parent
4006345cfb
commit
899c8e0697
6 changed files with 119 additions and 72 deletions
|
@ -908,6 +908,20 @@ changed and won't need copying then you shouldn't use `--no-traverse`.
|
||||||
|
|
||||||
See [rclone copy](/commands/rclone_copy/) for an example of how to use it.
|
See [rclone copy](/commands/rclone_copy/) for an example of how to use it.
|
||||||
|
|
||||||
|
### --no-unicode-normalization ###
|
||||||
|
|
||||||
|
Don't normalize unicode characters in filenames during the sync routine.
|
||||||
|
|
||||||
|
Sometimes, an operating system will store filenames containing unicode
|
||||||
|
parts in their decomposed form (particularly macOS). Some cloud storage
|
||||||
|
systems will then recompose the unicode, resulting in duplicate files if
|
||||||
|
the data is ever copied back to a local filesystem.
|
||||||
|
|
||||||
|
Using this flag will disable that functionality, treating each unicode
|
||||||
|
character as unique. For example, by default é and é will be normalized
|
||||||
|
into the same character. With `--no-unicode-normalization` they will be
|
||||||
|
treated as unique characters.
|
||||||
|
|
||||||
### --no-update-modtime ###
|
### --no-update-modtime ###
|
||||||
|
|
||||||
When using this flag, rclone won't update modification times of remote
|
When using this flag, rclone won't update modification times of remote
|
||||||
|
|
|
@ -70,6 +70,7 @@ type ConfigInfo struct {
|
||||||
IgnoreCaseSync bool
|
IgnoreCaseSync bool
|
||||||
NoTraverse bool
|
NoTraverse bool
|
||||||
NoCheckDest bool
|
NoCheckDest bool
|
||||||
|
NoUnicodeNormalization bool
|
||||||
NoUpdateModTime bool
|
NoUpdateModTime bool
|
||||||
DataRateUnit string
|
DataRateUnit string
|
||||||
CompareDest string
|
CompareDest string
|
||||||
|
|
|
@ -75,6 +75,7 @@ func AddFlags(flagSet *pflag.FlagSet) {
|
||||||
flags.BoolVarP(flagSet, &fs.Config.IgnoreCaseSync, "ignore-case-sync", "", fs.Config.IgnoreCaseSync, "Ignore case when synchronizing")
|
flags.BoolVarP(flagSet, &fs.Config.IgnoreCaseSync, "ignore-case-sync", "", fs.Config.IgnoreCaseSync, "Ignore case when synchronizing")
|
||||||
flags.BoolVarP(flagSet, &fs.Config.NoTraverse, "no-traverse", "", fs.Config.NoTraverse, "Don't traverse destination file system on copy.")
|
flags.BoolVarP(flagSet, &fs.Config.NoTraverse, "no-traverse", "", fs.Config.NoTraverse, "Don't traverse destination file system on copy.")
|
||||||
flags.BoolVarP(flagSet, &fs.Config.NoCheckDest, "no-check-dest", "", fs.Config.NoCheckDest, "Don't check the destination, copy regardless.")
|
flags.BoolVarP(flagSet, &fs.Config.NoCheckDest, "no-check-dest", "", fs.Config.NoCheckDest, "Don't check the destination, copy regardless.")
|
||||||
|
flags.BoolVarP(flagSet, &fs.Config.NoUnicodeNormalization, "no-unicode-normalization", "", fs.Config.NoUnicodeNormalization, "Don't normalize unicode characters in filenames.")
|
||||||
flags.BoolVarP(flagSet, &fs.Config.NoUpdateModTime, "no-update-modtime", "", fs.Config.NoUpdateModTime, "Don't update destination mod-time if files identical.")
|
flags.BoolVarP(flagSet, &fs.Config.NoUpdateModTime, "no-update-modtime", "", fs.Config.NoUpdateModTime, "Don't update destination mod-time if files identical.")
|
||||||
flags.StringVarP(flagSet, &fs.Config.CompareDest, "compare-dest", "", fs.Config.CompareDest, "Include additional server-side path during comparison.")
|
flags.StringVarP(flagSet, &fs.Config.CompareDest, "compare-dest", "", fs.Config.CompareDest, "Include additional server-side path during comparison.")
|
||||||
flags.StringVarP(flagSet, &fs.Config.CopyDest, "copy-dest", "", fs.Config.CopyDest, "Implies --compare-dest but also copies files from path into destination.")
|
flags.StringVarP(flagSet, &fs.Config.CopyDest, "copy-dest", "", fs.Config.CopyDest, "Implies --compare-dest but also copies files from path into destination.")
|
||||||
|
|
|
@ -22,15 +22,16 @@ import (
|
||||||
// calling Callback for each match
|
// calling Callback for each match
|
||||||
type March struct {
|
type March struct {
|
||||||
// parameters
|
// parameters
|
||||||
Ctx context.Context // context for background goroutines
|
Ctx context.Context // context for background goroutines
|
||||||
Fdst fs.Fs // source Fs
|
Fdst fs.Fs // source Fs
|
||||||
Fsrc fs.Fs // dest Fs
|
Fsrc fs.Fs // dest Fs
|
||||||
Dir string // directory
|
Dir string // directory
|
||||||
NoTraverse bool // don't traverse the destination
|
NoTraverse bool // don't traverse the destination
|
||||||
SrcIncludeAll bool // don't include all files in the src
|
SrcIncludeAll bool // don't include all files in the src
|
||||||
DstIncludeAll bool // don't include all files in the destination
|
DstIncludeAll bool // don't include all files in the destination
|
||||||
Callback Marcher // object to call with results
|
Callback Marcher // object to call with results
|
||||||
NoCheckDest bool // transfer all objects regardless without checking dst
|
NoCheckDest bool // transfer all objects regardless without checking dst
|
||||||
|
NoUnicodeNormalization bool // don't normalize unicode characters in filenames
|
||||||
// internal state
|
// internal state
|
||||||
srcListDir listDirFn // function to call to list a directory in the src
|
srcListDir listDirFn // function to call to list a directory in the src
|
||||||
dstListDir listDirFn // function to call to list a directory in the dst
|
dstListDir listDirFn // function to call to list a directory in the dst
|
||||||
|
@ -55,7 +56,9 @@ func (m *March) init() {
|
||||||
}
|
}
|
||||||
// Now create the matching transform
|
// Now create the matching transform
|
||||||
// ..normalise the UTF8 first
|
// ..normalise the UTF8 first
|
||||||
m.transforms = append(m.transforms, norm.NFC.String)
|
if !m.NoUnicodeNormalization {
|
||||||
|
m.transforms = append(m.transforms, norm.NFC.String)
|
||||||
|
}
|
||||||
// ..if destination is caseInsensitive then make it lower case
|
// ..if destination is caseInsensitive then make it lower case
|
||||||
// case Insensitive | src | dst | lower case compare |
|
// case Insensitive | src | dst | lower case compare |
|
||||||
// | No | No | No |
|
// | No | No | No |
|
||||||
|
|
|
@ -19,6 +19,7 @@ import (
|
||||||
"github.com/rclone/rclone/fstest/mockobject"
|
"github.com/rclone/rclone/fstest/mockobject"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Some times used in the tests
|
// Some times used in the tests
|
||||||
|
@ -313,6 +314,8 @@ func TestMatchListings(t *testing.T) {
|
||||||
b = mockobject.Object("b")
|
b = mockobject.Object("b")
|
||||||
c = mockobject.Object("c")
|
c = mockobject.Object("c")
|
||||||
d = mockobject.Object("d")
|
d = mockobject.Object("d")
|
||||||
|
uE1 = mockobject.Object("é") // one of the unicode E characters
|
||||||
|
uE2 = mockobject.Object("é") // a different unicode E character
|
||||||
dirA = mockdir.New("A")
|
dirA = mockdir.New("A")
|
||||||
dirb = mockdir.New("b")
|
dirb = mockdir.New("b")
|
||||||
)
|
)
|
||||||
|
@ -419,6 +422,28 @@ func TestMatchListings(t *testing.T) {
|
||||||
},
|
},
|
||||||
transforms: []matchTransformFn{strings.ToLower},
|
transforms: []matchTransformFn{strings.ToLower},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
what: "Unicode near-duplicate that becomes duplicate with normalization",
|
||||||
|
input: fs.DirEntries{
|
||||||
|
uE1, uE1,
|
||||||
|
uE2, uE2,
|
||||||
|
},
|
||||||
|
matches: []matchPair{
|
||||||
|
{uE1, uE1},
|
||||||
|
},
|
||||||
|
transforms: []matchTransformFn{norm.NFC.String},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
what: "Unicode near-duplicate with no normalization",
|
||||||
|
input: fs.DirEntries{
|
||||||
|
uE1, uE1,
|
||||||
|
uE2, uE2,
|
||||||
|
},
|
||||||
|
matches: []matchPair{
|
||||||
|
{uE1, uE1},
|
||||||
|
{uE2, uE2},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
what: "File and directory are not duplicates - srcOnly",
|
what: "File and directory are not duplicates - srcOnly",
|
||||||
input: fs.DirEntries{
|
input: fs.DirEntries{
|
||||||
|
|
127
fs/sync/sync.go
127
fs/sync/sync.go
|
@ -30,42 +30,43 @@ type syncCopyMove struct {
|
||||||
deleteEmptySrcDirs bool
|
deleteEmptySrcDirs bool
|
||||||
dir string
|
dir string
|
||||||
// internal state
|
// internal state
|
||||||
ctx context.Context // internal context for controlling go-routines
|
ctx context.Context // internal context for controlling go-routines
|
||||||
cancel func() // cancel the context
|
cancel func() // cancel the context
|
||||||
noTraverse bool // if set don't traverse the dst
|
noTraverse bool // if set don't traverse the dst
|
||||||
noCheckDest bool // if set transfer all objects regardless without checking dst
|
noCheckDest bool // if set transfer all objects regardless without checking dst
|
||||||
deletersWg sync.WaitGroup // for delete before go routine
|
noUnicodeNormalization bool // don't normalize unicode characters in filenames
|
||||||
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
|
deletersWg sync.WaitGroup // for delete before go routine
|
||||||
trackRenames bool // set if we should do server side renames
|
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
|
||||||
trackRenamesStrategy trackRenamesStrategy // stratgies used for tracking renames
|
trackRenames bool // set if we should do server side renames
|
||||||
dstFilesMu sync.Mutex // protect dstFiles
|
trackRenamesStrategy trackRenamesStrategy // stratgies used for tracking renames
|
||||||
dstFiles map[string]fs.Object // dst files, always filled
|
dstFilesMu sync.Mutex // protect dstFiles
|
||||||
srcFiles map[string]fs.Object // src files, only used if deleteBefore
|
dstFiles map[string]fs.Object // dst files, always filled
|
||||||
srcFilesChan chan fs.Object // passes src objects
|
srcFiles map[string]fs.Object // src files, only used if deleteBefore
|
||||||
srcFilesResult chan error // error result of src listing
|
srcFilesChan chan fs.Object // passes src objects
|
||||||
dstFilesResult chan error // error result of dst listing
|
srcFilesResult chan error // error result of src listing
|
||||||
dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs
|
dstFilesResult chan error // error result of dst listing
|
||||||
dstEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
dstEmptyDirsMu sync.Mutex // protect dstEmptyDirs
|
||||||
srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs
|
dstEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
||||||
srcEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
srcEmptyDirsMu sync.Mutex // protect srcEmptyDirs
|
||||||
checkerWg sync.WaitGroup // wait for checkers
|
srcEmptyDirs map[string]fs.DirEntry // potentially empty directories
|
||||||
toBeChecked *pipe // checkers channel
|
checkerWg sync.WaitGroup // wait for checkers
|
||||||
transfersWg sync.WaitGroup // wait for transfers
|
toBeChecked *pipe // checkers channel
|
||||||
toBeUploaded *pipe // copiers channel
|
transfersWg sync.WaitGroup // wait for transfers
|
||||||
errorMu sync.Mutex // Mutex covering the errors variables
|
toBeUploaded *pipe // copiers channel
|
||||||
err error // normal error from copy process
|
errorMu sync.Mutex // Mutex covering the errors variables
|
||||||
noRetryErr error // error with NoRetry set
|
err error // normal error from copy process
|
||||||
fatalErr error // fatal error
|
noRetryErr error // error with NoRetry set
|
||||||
commonHash hash.Type // common hash type between src and dst
|
fatalErr error // fatal error
|
||||||
renameMapMu sync.Mutex // mutex to protect the below
|
commonHash hash.Type // common hash type between src and dst
|
||||||
renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames
|
renameMapMu sync.Mutex // mutex to protect the below
|
||||||
renamerWg sync.WaitGroup // wait for renamers
|
renameMap map[string][]fs.Object // dst files by hash - only used by trackRenames
|
||||||
toBeRenamed *pipe // renamers channel
|
renamerWg sync.WaitGroup // wait for renamers
|
||||||
trackRenamesWg sync.WaitGroup // wg for background track renames
|
toBeRenamed *pipe // renamers channel
|
||||||
trackRenamesCh chan fs.Object // objects are pumped in here
|
trackRenamesWg sync.WaitGroup // wg for background track renames
|
||||||
renameCheck []fs.Object // accumulate files to check for rename here
|
trackRenamesCh chan fs.Object // objects are pumped in here
|
||||||
compareCopyDest fs.Fs // place to check for files to server side copy
|
renameCheck []fs.Object // accumulate files to check for rename here
|
||||||
backupDir fs.Fs // place to store overwrites/deletes
|
compareCopyDest fs.Fs // place to check for files to server side copy
|
||||||
|
backupDir fs.Fs // place to store overwrites/deletes
|
||||||
}
|
}
|
||||||
|
|
||||||
type trackRenamesStrategy byte
|
type trackRenamesStrategy byte
|
||||||
|
@ -88,24 +89,25 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete
|
||||||
return nil, fserrors.FatalError(fs.ErrorOverlapping)
|
return nil, fserrors.FatalError(fs.ErrorOverlapping)
|
||||||
}
|
}
|
||||||
s := &syncCopyMove{
|
s := &syncCopyMove{
|
||||||
fdst: fdst,
|
fdst: fdst,
|
||||||
fsrc: fsrc,
|
fsrc: fsrc,
|
||||||
deleteMode: deleteMode,
|
deleteMode: deleteMode,
|
||||||
DoMove: DoMove,
|
DoMove: DoMove,
|
||||||
copyEmptySrcDirs: copyEmptySrcDirs,
|
copyEmptySrcDirs: copyEmptySrcDirs,
|
||||||
deleteEmptySrcDirs: deleteEmptySrcDirs,
|
deleteEmptySrcDirs: deleteEmptySrcDirs,
|
||||||
dir: "",
|
dir: "",
|
||||||
srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers),
|
srcFilesChan: make(chan fs.Object, fs.Config.Checkers+fs.Config.Transfers),
|
||||||
srcFilesResult: make(chan error, 1),
|
srcFilesResult: make(chan error, 1),
|
||||||
dstFilesResult: make(chan error, 1),
|
dstFilesResult: make(chan error, 1),
|
||||||
dstEmptyDirs: make(map[string]fs.DirEntry),
|
dstEmptyDirs: make(map[string]fs.DirEntry),
|
||||||
srcEmptyDirs: make(map[string]fs.DirEntry),
|
srcEmptyDirs: make(map[string]fs.DirEntry),
|
||||||
noTraverse: fs.Config.NoTraverse,
|
noTraverse: fs.Config.NoTraverse,
|
||||||
noCheckDest: fs.Config.NoCheckDest,
|
noCheckDest: fs.Config.NoCheckDest,
|
||||||
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
|
noUnicodeNormalization: fs.Config.NoUnicodeNormalization,
|
||||||
trackRenames: fs.Config.TrackRenames,
|
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
|
||||||
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
|
trackRenames: fs.Config.TrackRenames,
|
||||||
trackRenamesCh: make(chan fs.Object, fs.Config.Checkers),
|
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
|
||||||
|
trackRenamesCh: make(chan fs.Object, fs.Config.Checkers),
|
||||||
}
|
}
|
||||||
var err error
|
var err error
|
||||||
s.toBeChecked, err = newPipe(fs.Config.OrderBy, accounting.Stats(ctx).SetCheckQueue, fs.Config.MaxBacklog)
|
s.toBeChecked, err = newPipe(fs.Config.OrderBy, accounting.Stats(ctx).SetCheckQueue, fs.Config.MaxBacklog)
|
||||||
|
@ -782,14 +784,15 @@ func (s *syncCopyMove) run() error {
|
||||||
|
|
||||||
// set up a march over fdst and fsrc
|
// set up a march over fdst and fsrc
|
||||||
m := &march.March{
|
m := &march.March{
|
||||||
Ctx: s.ctx,
|
Ctx: s.ctx,
|
||||||
Fdst: s.fdst,
|
Fdst: s.fdst,
|
||||||
Fsrc: s.fsrc,
|
Fsrc: s.fsrc,
|
||||||
Dir: s.dir,
|
Dir: s.dir,
|
||||||
NoTraverse: s.noTraverse,
|
NoTraverse: s.noTraverse,
|
||||||
Callback: s,
|
Callback: s,
|
||||||
DstIncludeAll: filter.Active.Opt.DeleteExcluded,
|
DstIncludeAll: filter.Active.Opt.DeleteExcluded,
|
||||||
NoCheckDest: s.noCheckDest,
|
NoCheckDest: s.noCheckDest,
|
||||||
|
NoUnicodeNormalization: s.noUnicodeNormalization,
|
||||||
}
|
}
|
||||||
s.processError(m.Run())
|
s.processError(m.Run())
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue