local: add --local-unicode-normalization (and remove --local-no-unicode-normalization)

macOS stores files in NFD form and transferring them like this to some
systems causes the Korean language to display incorrectly.

This adds the flag --local-unicode-normalization to optionally
normalize the file names to NFC.

This also removes the (long deprecated) --local-no-unicode-normalization flag

See: https://forum.rclone.org/t/support-for-korean-jaso-conversion/19435
This commit is contained in:
Nick Craig-Wood 2020-09-30 16:24:50 +01:00
parent 06f27384dd
commit 04308dcaa1

View file

@ -27,6 +27,7 @@ import (
"github.com/rclone/rclone/lib/encoder" "github.com/rclone/rclone/lib/encoder"
"github.com/rclone/rclone/lib/file" "github.com/rclone/rclone/lib/file"
"github.com/rclone/rclone/lib/readers" "github.com/rclone/rclone/lib/readers"
"golang.org/x/text/unicode/norm"
) )
// Constants // Constants
@ -86,12 +87,21 @@ So rclone now always reads the link
Default: false, Default: false,
Advanced: true, Advanced: true,
}, { }, {
Name: "no_unicode_normalization", Name: "unicode_normalization",
Help: `Don't apply unicode normalization to paths and filenames (Deprecated) Help: `Apply unicode NFC normalization to paths and filenames
This flag is deprecated now. Rclone no longer normalizes unicode file This flag can be used to normalize file names into unicode NFC form
names, but it compares them with unicode normalization in the sync that are read from the local filesystem.
routine instead.`,
Rclone does not normally touch the encoding of file names it reads from
the file system.
This can be useful when using macOS as it normally provides decomposed (NFD)
unicode which in some language (eg Korean) doesn't display properly on
some OSes.
Note that rclone compares filenames with unicode normalization in the sync
routine so this flag shouldn't normally be used.`,
Default: false, Default: false,
Advanced: true, Advanced: true,
}, { }, {
@ -196,7 +206,7 @@ type Options struct {
FollowSymlinks bool `config:"copy_links"` FollowSymlinks bool `config:"copy_links"`
TranslateSymlinks bool `config:"links"` TranslateSymlinks bool `config:"links"`
SkipSymlinks bool `config:"skip_links"` SkipSymlinks bool `config:"skip_links"`
NoUTFNorm bool `config:"no_unicode_normalization"` UTFNorm bool `config:"unicode_normalization"`
NoCheckUpdated bool `config:"no_check_updated"` NoCheckUpdated bool `config:"no_check_updated"`
NoUNC bool `config:"nounc"` NoUNC bool `config:"nounc"`
OneFileSystem bool `config:"one_file_system"` OneFileSystem bool `config:"one_file_system"`
@ -255,10 +265,6 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
return nil, errLinksAndCopyLinks return nil, errLinksAndCopyLinks
} }
if opt.NoUTFNorm {
fs.Errorf(nil, "The --local-no-unicode-normalization flag is deprecated and will be removed")
}
f := &Fs{ f := &Fs{
name: name, name: name,
opt: *opt, opt: *opt,
@ -521,6 +527,9 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
} }
func (f *Fs) cleanRemote(dir, filename string) (remote string) { func (f *Fs) cleanRemote(dir, filename string) (remote string) {
if f.opt.UTFNorm {
filename = norm.NFC.String(filename)
}
remote = path.Join(dir, f.opt.Enc.ToStandardName(filename)) remote = path.Join(dir, f.opt.Enc.ToStandardName(filename))
if !utf8.ValidString(filename) { if !utf8.ValidString(filename) {