rclone/vfs/vfs_case_test.go

198 lines
5.5 KiB
Go
Raw Normal View History

package vfs
import (
"context"
"os"
"testing"
vfs: fix unicode normalization on macOS - fixes #7072 Before this change, the VFS layer did not properly handle unicode normalization, which caused problems particularly for users of macOS. While attempts were made to handle it with various `-o modules=iconv` combinations, this was an imperfect solution, as no one combination allowed both NFC and NFD content to simultaneously be both visible and editable via Finder. After this change, the VFS supports `--no-unicode-normalization` (default `false`) via the existing `--vfs-case-insensitive` logic, which is extended to apply to both case insensitivity and unicode normalization form. This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a probably rare but potentially possible scenario where a directory contains multiple duplicate filenames after applying case and unicode normalization settings. In such a scenario, this flag (disabled by default) hides the duplicates. This comes with a performance tradeoff, as rclone will have to scan the entire directory for duplicates when listing a directory. For this reason, it is recommended to leave this disabled if not needed. However, macOS users may wish to consider using it, as otherwise, if a remote directory contains both NFC and NFD versions of the same filename, an odd situation will occur: both versions of the file will be visible in the mount, and both will appear to be editable, however, editing either version will actually result in only the NFD version getting edited under the hood. `--vfs-block-norm-dupes` prevents this confusion by detecting this scenario, hiding the duplicates, and logging an error, similar to how this is handled in `rclone sync`.
2024-02-05 07:58:11 +00:00
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/vfs/vfscommon"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
vfs: fix unicode normalization on macOS - fixes #7072 Before this change, the VFS layer did not properly handle unicode normalization, which caused problems particularly for users of macOS. While attempts were made to handle it with various `-o modules=iconv` combinations, this was an imperfect solution, as no one combination allowed both NFC and NFD content to simultaneously be both visible and editable via Finder. After this change, the VFS supports `--no-unicode-normalization` (default `false`) via the existing `--vfs-case-insensitive` logic, which is extended to apply to both case insensitivity and unicode normalization form. This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a probably rare but potentially possible scenario where a directory contains multiple duplicate filenames after applying case and unicode normalization settings. In such a scenario, this flag (disabled by default) hides the duplicates. This comes with a performance tradeoff, as rclone will have to scan the entire directory for duplicates when listing a directory. For this reason, it is recommended to leave this disabled if not needed. However, macOS users may wish to consider using it, as otherwise, if a remote directory contains both NFC and NFD versions of the same filename, an odd situation will occur: both versions of the file will be visible in the mount, and both will appear to be editable, however, editing either version will actually result in only the NFD version getting edited under the hood. `--vfs-block-norm-dupes` prevents this confusion by detecting this scenario, hiding the duplicates, and logging an error, similar to how this is handled in `rclone sync`.
2024-02-05 07:58:11 +00:00
"golang.org/x/text/unicode/norm"
)
func TestCaseSensitivity(t *testing.T) {
r := fstest.NewRun(t)
if r.Fremote.Features().CaseInsensitive {
t.Skip("Can't test case sensitivity - this remote is officially not case-sensitive")
}
// Create test files
ctx := context.Background()
file1 := r.WriteObject(ctx, "FiLeA", "data1", t1)
file2 := r.WriteObject(ctx, "FiLeB", "data2", t2)
r.CheckRemoteItems(t, file1, file2)
// Create file3 with name differing from file2 name only by case.
// On a case-Sensitive remote this will be a separate file.
// On a case-INsensitive remote this file will either not exist
// or overwrite file2 depending on how file system diverges.
// On a box.com remote this step will even fail.
file3 := r.WriteObject(ctx, "FilEb", "data3", t3)
// Create a case-Sensitive and case-INsensitive VFS
optCS := vfscommon.DefaultOpt
optCS.CaseInsensitive = false
vfsCS := New(r.Fremote, &optCS)
defer cleanupVFS(t, vfsCS)
optCI := vfscommon.DefaultOpt
optCI.CaseInsensitive = true
vfsCI := New(r.Fremote, &optCI)
defer cleanupVFS(t, vfsCI)
// Run basic checks that must pass on VFS of any type.
assertFileDataVFS(t, vfsCI, "FiLeA", "data1")
assertFileDataVFS(t, vfsCS, "FiLeA", "data1")
// Detect case sensitivity of the underlying remote.
remoteIsOK := true
if !checkFileDataVFS(t, vfsCS, "FiLeA", "data1") {
remoteIsOK = false
}
if !checkFileDataVFS(t, vfsCS, "FiLeB", "data2") {
remoteIsOK = false
}
if !checkFileDataVFS(t, vfsCS, "FilEb", "data3") {
remoteIsOK = false
}
// The remaining test is only meaningful on a case-Sensitive file system.
if !remoteIsOK {
t.Skip("Can't test case sensitivity - this remote doesn't comply as case-sensitive")
}
// Continue with test as the underlying remote is fully case-Sensitive.
r.CheckRemoteItems(t, file1, file2, file3)
// See how VFS handles case-INsensitive flag
assertFileDataVFS(t, vfsCI, "FiLeA", "data1")
assertFileDataVFS(t, vfsCI, "fileA", "data1")
assertFileDataVFS(t, vfsCI, "filea", "data1")
assertFileDataVFS(t, vfsCI, "FILEA", "data1")
assertFileDataVFS(t, vfsCI, "FiLeB", "data2")
assertFileDataVFS(t, vfsCI, "FilEb", "data3")
fd, err := vfsCI.OpenFile("fileb", os.O_RDONLY, 0777)
assert.Nil(t, fd)
assert.Error(t, err)
assert.NotEqual(t, err, ENOENT)
fd, err = vfsCI.OpenFile("FILEB", os.O_RDONLY, 0777)
assert.Nil(t, fd)
assert.Error(t, err)
assert.NotEqual(t, err, ENOENT)
// Run the same set of checks with case-Sensitive VFS, for comparison.
assertFileDataVFS(t, vfsCS, "FiLeA", "data1")
assertFileAbsentVFS(t, vfsCS, "fileA")
assertFileAbsentVFS(t, vfsCS, "filea")
assertFileAbsentVFS(t, vfsCS, "FILEA")
assertFileDataVFS(t, vfsCS, "FiLeB", "data2")
assertFileDataVFS(t, vfsCS, "FilEb", "data3")
assertFileAbsentVFS(t, vfsCS, "fileb")
assertFileAbsentVFS(t, vfsCS, "FILEB")
}
func checkFileDataVFS(t *testing.T, vfs *VFS, name string, expect string) bool {
fd, err := vfs.OpenFile(name, os.O_RDONLY, 0777)
if fd == nil || err != nil {
return false
}
defer func() {
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
_ = fd.Close()
}()
fh, ok := fd.(*ReadFileHandle)
if !ok {
return false
}
size := len(expect)
buf := make([]byte, size)
num, err := fh.Read(buf)
if err != nil || num != size {
return false
}
return string(buf) == expect
}
func assertFileDataVFS(t *testing.T, vfs *VFS, name string, expect string) {
fd, errOpen := vfs.OpenFile(name, os.O_RDONLY, 0777)
assert.NotNil(t, fd)
assert.NoError(t, errOpen)
defer func() {
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
if errOpen == nil && fd != nil {
_ = fd.Close()
}
}()
fh, ok := fd.(*ReadFileHandle)
require.True(t, ok)
size := len(expect)
buf := make([]byte, size)
numRead, errRead := fh.Read(buf)
assert.NoError(t, errRead)
assert.Equal(t, numRead, size)
assert.Equal(t, string(buf), expect)
}
func assertFileAbsentVFS(t *testing.T, vfs *VFS, name string) {
fd, err := vfs.OpenFile(name, os.O_RDONLY, 0777)
defer func() {
// File must be closed - otherwise Run.cleanUp() will fail on Windows.
if err == nil && fd != nil {
_ = fd.Close()
}
}()
assert.Nil(t, fd)
assert.Error(t, err)
assert.Equal(t, err, ENOENT)
}
vfs: fix unicode normalization on macOS - fixes #7072 Before this change, the VFS layer did not properly handle unicode normalization, which caused problems particularly for users of macOS. While attempts were made to handle it with various `-o modules=iconv` combinations, this was an imperfect solution, as no one combination allowed both NFC and NFD content to simultaneously be both visible and editable via Finder. After this change, the VFS supports `--no-unicode-normalization` (default `false`) via the existing `--vfs-case-insensitive` logic, which is extended to apply to both case insensitivity and unicode normalization form. This change also adds an additional flag, `--vfs-block-norm-dupes`, to address a probably rare but potentially possible scenario where a directory contains multiple duplicate filenames after applying case and unicode normalization settings. In such a scenario, this flag (disabled by default) hides the duplicates. This comes with a performance tradeoff, as rclone will have to scan the entire directory for duplicates when listing a directory. For this reason, it is recommended to leave this disabled if not needed. However, macOS users may wish to consider using it, as otherwise, if a remote directory contains both NFC and NFD versions of the same filename, an odd situation will occur: both versions of the file will be visible in the mount, and both will appear to be editable, however, editing either version will actually result in only the NFD version getting edited under the hood. `--vfs-block-norm-dupes` prevents this confusion by detecting this scenario, hiding the duplicates, and logging an error, similar to how this is handled in `rclone sync`.
2024-02-05 07:58:11 +00:00
func TestUnicodeNormalization(t *testing.T) {
r := fstest.NewRun(t)
var (
nfc = norm.NFC.String(norm.NFD.String("測試_Русский___ě_áñ"))
nfd = norm.NFD.String(nfc)
both = "normal name with no special characters.txt"
)
// Create test files
ctx := context.Background()
file1 := r.WriteObject(ctx, both, "data1", t1)
file2 := r.WriteObject(ctx, nfc, "data2", t2)
r.CheckRemoteItems(t, file1, file2)
// Create VFS
opt := vfscommon.DefaultOpt
vfs := New(r.Fremote, &opt)
defer cleanupVFS(t, vfs)
// assert that both files are found under NFD-normalized names
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
assertFileDataVFS(t, vfs, nfd, "data2")
// change ci.NoUnicodeNormalization to true and verify that only file1 is found
ci := fs.GetConfig(ctx) // need to set the global config here as the *Dir methods don't take a ctx param
oldVal := ci.NoUnicodeNormalization
defer func() { fs.GetConfig(ctx).NoUnicodeNormalization = oldVal }() // restore the prior value after the test
ci.NoUnicodeNormalization = true
assertFileDataVFS(t, vfs, norm.NFD.String(both), "data1")
assertFileAbsentVFS(t, vfs, nfd)
}