forked from TrueCloudLab/rclone
vfs: fix unicode normalization on macOS - fixes #7072
Before this change, the VFS layer did not properly handle unicode normalization, which caused problems particularly for users of macOS. While attempts were made to handle it with various `-o modules=iconv` combinations, this was an imperfect solution, as no one combination allowed both NFC and NFD content to simultaneously be both visible and editable via Finder. After this change, the VFS supports `--no-unicode-normalization` (default `false`) via the existing `--vfs-case-insensitive` logic, which is extended to apply to both case insensitivity and unicode normalization form. This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a probably rare but potentially possible scenario where a directory contains multiple duplicate filenames after applying case and unicode normalization settings. In such a scenario, this flag (disabled by default) hides the duplicates. This comes with a performance tradeoff, as rclone will have to scan the entire directory for duplicates when listing a directory. For this reason, it is recommended to leave this disabled if not needed. However, macOS users may wish to consider using it, as otherwise, if a remote directory contains both NFC and NFD versions of the same filename, an odd situation will occur: both versions of the file will be visible in the mount, and both will appear to be editable, however, editing either version will actually result in only the NFD version getting edited under the hood. `--vfs-block-norm-dupes` prevents this confusion by detecting this scenario, hiding the duplicates, and logging an error, similar to how this is handled in `rclone sync`.
This commit is contained in:
parent
dcdbad3554
commit
fd8faeb0e6
8 changed files with 115 additions and 26 deletions
|
@ -116,13 +116,6 @@ func mountOptions(VFS *vfs.VFS, device string, mountpoint string, opt *mountlib.
|
|||
for _, option := range opt.ExtraFlags {
|
||||
options = append(options, option)
|
||||
}
|
||||
if runtime.GOOS == "darwin" {
|
||||
if !findOption("modules=iconv", options) {
|
||||
iconv := "modules=iconv,from_code=UTF-8,to_code=UTF-8-MAC"
|
||||
options = append(options, "-o", iconv)
|
||||
fs.Debugf(nil, "Adding \"-o %s\" for macOS", iconv)
|
||||
}
|
||||
}
|
||||
return options
|
||||
}
|
||||
|
||||
|
|
|
@ -257,7 +257,12 @@ Mounting on macOS can be done either via [built-in NFS server](/commands/rclone_
|
|||
FUSE driver utilizing a macOS kernel extension (kext). FUSE-T is an alternative FUSE system
|
||||
which "mounts" via an NFSv4 local server.
|
||||
|
||||
## NFS mount
|
||||
##### Unicode Normalization
|
||||
|
||||
It is highly recommended to keep the default of `--no-unicode-normalization=false`
|
||||
for all `mount` and `serve` commands on macOS. For details, see [vfs-case-sensitivity](https://rclone.org/commands/rclone_mount/#vfs-case-sensitivity).
|
||||
|
||||
#### NFS mount
|
||||
|
||||
This method spins up an NFS server using [serve nfs](/commands/rclone_serve_nfs/) command and mounts
|
||||
it to the specified mountpoint. If you run this in background mode using |--daemon|, you will need to
|
||||
|
@ -294,15 +299,6 @@ As per the [FUSE-T wiki](https://github.com/macos-fuse-t/fuse-t/wiki#caveats):
|
|||
This means that viewing files with various tools, notably macOS Finder, will cause rlcone
|
||||
to update the modification time of the file. This may make rclone upload a full new copy
|
||||
of the file.
|
||||
|
||||
##### Unicode Normalization
|
||||
|
||||
Rclone includes flags for unicode normalization with macFUSE that should be updated
|
||||
for FUSE-T. See [this forum post](https://forum.rclone.org/t/some-unicode-forms-break-mount-on-macos-with-fuse-t/36403)
|
||||
and [FUSE-T issue #16](https://github.com/macos-fuse-t/fuse-t/issues/16). The following
|
||||
flag should be added to the `rclone mount` command.
|
||||
|
||||
-o modules=iconv,from_code=UTF-8,to_code=UTF-8
|
||||
|
||||
##### Read Only mounts
|
||||
|
||||
|
|
|
@ -380,10 +380,19 @@ func CheckDownload(ctx context.Context, opt *CheckOpt) error {
|
|||
// so that it matches behavior of Check (where it's handled by March)
|
||||
func ApplyTransforms(ctx context.Context, s string) string {
|
||||
ci := fs.GetConfig(ctx)
|
||||
if !ci.NoUnicodeNormalization {
|
||||
return ToNormal(s, !ci.NoUnicodeNormalization, ci.IgnoreCaseSync)
|
||||
}
|
||||
|
||||
// ToNormal normalizes case and unicode form and returns the transformed string.
|
||||
// It is similar to ApplyTransforms but does not use a context.
|
||||
// If normUnicode == true, s will be transformed to NFC.
|
||||
// If normCase == true, s will be transformed to lowercase.
|
||||
// If both are true, both transformations will be performed.
|
||||
func ToNormal(s string, normUnicode, normCase bool) string {
|
||||
if normUnicode {
|
||||
s = norm.NFC.String(s)
|
||||
}
|
||||
if ci.IgnoreCaseSync {
|
||||
if normCase {
|
||||
s = strings.ToLower(s)
|
||||
}
|
||||
return s
|
||||
|
|
46
vfs/dir.go
46
vfs/dir.go
|
@ -18,6 +18,7 @@ import (
|
|||
"github.com/rclone/rclone/fs/operations"
|
||||
"github.com/rclone/rclone/fs/walk"
|
||||
"github.com/rclone/rclone/vfs/vfscommon"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// Dir represents a directory entry
|
||||
|
@ -466,7 +467,6 @@ func (d *Dir) AddVirtual(leaf string, size int64, isDir bool) {
|
|||
node = f
|
||||
}
|
||||
d.addObject(node)
|
||||
|
||||
}
|
||||
|
||||
// delObject removes an object from the directory
|
||||
|
@ -514,6 +514,35 @@ func (d *Dir) _readDir() error {
|
|||
return err
|
||||
}
|
||||
|
||||
if d.vfs.Opt.BlockNormDupes { // do this only if requested, as it will have a performance hit
|
||||
ci := fs.GetConfig(context.TODO())
|
||||
|
||||
// sort entries such that NFD comes before NFC of same name
|
||||
sort.Slice(entries, func(i, j int) bool {
|
||||
if entries[i] != entries[j] && fs.DirEntryType(entries[i]) == fs.DirEntryType(entries[j]) && norm.NFC.String(entries[i].Remote()) == norm.NFC.String(entries[j].Remote()) {
|
||||
if norm.NFD.IsNormalString(entries[i].Remote()) && !norm.NFD.IsNormalString(entries[j].Remote()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return entries.Less(i, j)
|
||||
})
|
||||
|
||||
// detect dupes, remove them from the list and log an error
|
||||
normalizedNames := make(map[string]struct{}, entries.Len())
|
||||
filteredEntries := make(fs.DirEntries, 0)
|
||||
for _, e := range entries {
|
||||
normName := fmt.Sprintf("%s-%T", operations.ToNormal(e.Remote(), !ci.NoUnicodeNormalization, (ci.IgnoreCaseSync || d.vfs.Opt.CaseInsensitive)), e) // include type to track objects and dirs separately
|
||||
_, found := normalizedNames[normName]
|
||||
if found {
|
||||
fs.Errorf(e.Remote(), "duplicate normalized names detected - skipping")
|
||||
continue
|
||||
}
|
||||
normalizedNames[normName] = struct{}{}
|
||||
filteredEntries = append(filteredEntries, e)
|
||||
}
|
||||
entries = filteredEntries
|
||||
}
|
||||
|
||||
err = d._readDirFromEntries(entries, nil, time.Time{})
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -767,15 +796,18 @@ func (d *Dir) stat(leaf string) (Node, error) {
|
|||
}
|
||||
item, ok := d.items[leaf]
|
||||
|
||||
if !ok && d.vfs.Opt.CaseInsensitive {
|
||||
leafLower := strings.ToLower(leaf)
|
||||
ci := fs.GetConfig(context.TODO())
|
||||
normUnicode := !ci.NoUnicodeNormalization
|
||||
normCase := ci.IgnoreCaseSync || d.vfs.Opt.CaseInsensitive
|
||||
if !ok && (normUnicode || normCase) {
|
||||
leafNormalized := operations.ToNormal(leaf, normUnicode, normCase) // this handles both case and unicode normalization
|
||||
for name, node := range d.items {
|
||||
if strings.ToLower(name) == leafLower {
|
||||
if operations.ToNormal(name, normUnicode, normCase) == leafNormalized {
|
||||
if ok {
|
||||
// duplicate case insensitive match is an error
|
||||
return nil, fmt.Errorf("duplicate filename %q detected with --vfs-case-insensitive set", leaf)
|
||||
// duplicate normalized match is an error
|
||||
return nil, fmt.Errorf("duplicate filename %q detected with case/unicode normalization settings", leaf)
|
||||
}
|
||||
// found a case insensitive match
|
||||
// found a normalized match
|
||||
ok = true
|
||||
item = node
|
||||
}
|
||||
|
|
22
vfs/vfs.md
22
vfs/vfs.md
|
@ -309,6 +309,28 @@ If the flag is not provided on the command line, then its default value depends
|
|||
on the operating system where rclone runs: "true" on Windows and macOS, "false"
|
||||
otherwise. If the flag is provided without a value, then it is "true".
|
||||
|
||||
The `--no-unicode-normalization` flag controls whether a similar "fixup" is
|
||||
performed for filenames that differ but are [canonically
|
||||
equivalent](https://en.wikipedia.org/wiki/Unicode_equivalence) with respect to
|
||||
unicode. Unicode normalization can be particularly helpful for users of macOS,
|
||||
which prefers form NFD instead of the NFC used by most other platforms. It is
|
||||
therefore highly recommended to keep the default of `false` on macOS, to avoid
|
||||
encoding compatibility issues.
|
||||
|
||||
In the (probably unlikely) event that a directory has multiple duplicate
|
||||
filenames after applying case and unicode normalization, the `--vfs-block-norm-dupes`
|
||||
flag allows hiding these duplicates. This comes with a performance tradeoff, as
|
||||
rclone will have to scan the entire directory for duplicates when listing a
|
||||
directory. For this reason, it is recommended to leave this disabled if not
|
||||
needed. However, macOS users may wish to consider using it, as otherwise, if a
|
||||
remote directory contains both NFC and NFD versions of the same filename, an odd
|
||||
situation will occur: both versions of the file will be visible in the mount,
|
||||
and both will appear to be editable, however, editing either version will
|
||||
actually result in only the NFD version getting edited under the hood. `--vfs-block-
|
||||
norm-dupes` prevents this confusion by detecting this scenario, hiding the
|
||||
duplicates, and logging an error, similar to how this is handled in `rclone
|
||||
sync`.
|
||||
|
||||
### VFS Disk Options
|
||||
|
||||
This flag allows you to manually set the statistics about the filing system.
|
||||
|
|
|
@ -5,10 +5,12 @@ import (
|
|||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fstest"
|
||||
"github.com/rclone/rclone/vfs/vfscommon"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
func TestCaseSensitivity(t *testing.T) {
|
||||
|
@ -160,3 +162,36 @@ func assertFileAbsentVFS(t *testing.T, vfs *VFS, name string) {
|
|||
assert.Error(t, err)
|
||||
assert.Equal(t, err, ENOENT)
|
||||
}
|
||||
|
||||
func TestUnicodeNormalization(t *testing.T) {
|
||||
r := fstest.NewRun(t)
|
||||
|
||||
var (
|
||||
nfc = norm.NFC.String(norm.NFD.String("測試_Русский___ě_áñ"))
|
||||
nfd = norm.NFD.String(nfc)
|
||||
both = "normal name with no special characters.txt"
|
||||
)
|
||||
|
||||
// Create test files
|
||||
ctx := context.Background()
|
||||
file1 := r.WriteObject(ctx, both, "data1", t1)
|
||||
file2 := r.WriteObject(ctx, nfc, "data2", t2)
|
||||
r.CheckRemoteItems(t, file1, file2)
|
||||
|
||||
// Create VFS
|
||||
opt := vfscommon.DefaultOpt
|
||||
vfs := New(r.Fremote, &opt)
|
||||
defer cleanupVFS(t, vfs)
|
||||
|
||||
// assert that both files are found under NFD-normalized names
|
||||
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
|
||||
assertFileDataVFS(t, vfs, nfd, "data2")
|
||||
|
||||
// change ci.NoUnicodeNormalization to true and verify that only file1 is found
|
||||
ci := fs.GetConfig(ctx) // need to set the global config here as the *Dir methods don't take a ctx param
|
||||
oldVal := ci.NoUnicodeNormalization
|
||||
defer func() { fs.GetConfig(ctx).NoUnicodeNormalization = oldVal }() // restore the prior value after the test
|
||||
ci.NoUnicodeNormalization = true
|
||||
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
|
||||
assertFileAbsentVFS(t, vfs, nfd)
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ type Options struct {
|
|||
CacheMinFreeSpace fs.SizeSuffix
|
||||
CachePollInterval time.Duration
|
||||
CaseInsensitive bool
|
||||
BlockNormDupes bool
|
||||
WriteWait time.Duration // time to wait for in-sequence write
|
||||
ReadWait time.Duration // time to wait for in-sequence read
|
||||
WriteBack time.Duration // time to wait before writing back dirty files
|
||||
|
|
|
@ -35,6 +35,7 @@ func AddFlags(flagSet *pflag.FlagSet) {
|
|||
flags.FVarP(flagSet, DirPerms, "dir-perms", "", "Directory permissions", "VFS")
|
||||
flags.FVarP(flagSet, FilePerms, "file-perms", "", "File permissions", "VFS")
|
||||
flags.BoolVarP(flagSet, &Opt.CaseInsensitive, "vfs-case-insensitive", "", Opt.CaseInsensitive, "If a file name not found, find a case insensitive match", "VFS")
|
||||
flags.BoolVarP(flagSet, &Opt.BlockNormDupes, "vfs-block-norm-dupes", "", Opt.BlockNormDupes, "If duplicate filenames exist in the same directory (after normalization), log an error and hide the duplicates (may have a performance cost)", "VFS")
|
||||
flags.DurationVarP(flagSet, &Opt.WriteWait, "vfs-write-wait", "", Opt.WriteWait, "Time to wait for in-sequence write before giving error", "VFS")
|
||||
flags.DurationVarP(flagSet, &Opt.ReadWait, "vfs-read-wait", "", Opt.ReadWait, "Time to wait for in-sequence read before seeking", "VFS")
|
||||
flags.DurationVarP(flagSet, &Opt.WriteBack, "vfs-write-back", "", Opt.WriteBack, "Time to writeback files after last use when using cache", "VFS")
|
||||
|
|
Loading…
Reference in a new issue