forked from TrueCloudLab/rclone
fs: stop normalizing file names but do a normalized compare in the sync
This works by using a transform function to transform file names when doing a compare when matching file names in a directory. rclone now UTF-8 normalizes the file names and does a case insensitive compare if the destination remote is case insensitive. This deprecates the --local-no-unicode-normalization flag. Fixes #1477
This commit is contained in:
parent
a8e41f081c
commit
4ac9a65049
5 changed files with 194 additions and 39 deletions
|
@ -122,15 +122,9 @@ $ rclone -L ls /tmp/a
|
||||||
|
|
||||||
#### --local-no-unicode-normalization ####
|
#### --local-no-unicode-normalization ####
|
||||||
|
|
||||||
By default rclone normalizes (NFC) the unicode representation of filenames and
|
This flag is deprecated now. Rclone no longer normalizes unicode file
|
||||||
directories. This flag disables that normalization and uses the same
|
names, but it compares them with unicode normalization in the sync
|
||||||
representation as the local filesystem.
|
routine instead.
|
||||||
|
|
||||||
This can be useful if you need to retain the local unicode representation and
|
|
||||||
you are using a cloud provider which supports unnormalized names (e.g. S3 or ACD).
|
|
||||||
|
|
||||||
This should also work with any provider if you are using crypt and have file
|
|
||||||
name encryption (the default) or obfuscation turned on.
|
|
||||||
|
|
||||||
#### --one-file-system, -x ####
|
#### --one-file-system, -x ####
|
||||||
|
|
||||||
|
|
115
fs/march.go
115
fs/march.go
|
@ -1,9 +1,13 @@
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"path"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
|
||||||
// march traverses two Fs simultaneously, calling walker for each match
|
// march traverses two Fs simultaneously, calling walker for each match
|
||||||
|
@ -17,6 +21,7 @@ type march struct {
|
||||||
// internal state
|
// internal state
|
||||||
srcListDir listDirFn // function to call to list a directory in the src
|
srcListDir listDirFn // function to call to list a directory in the src
|
||||||
dstListDir listDirFn // function to call to list a directory in the dst
|
dstListDir listDirFn // function to call to list a directory in the dst
|
||||||
|
transforms []matchTransformFn
|
||||||
}
|
}
|
||||||
|
|
||||||
// marcher is called on each match
|
// marcher is called on each match
|
||||||
|
@ -40,6 +45,18 @@ func newMarch(ctx context.Context, fdst, fsrc Fs, dir string, callback marcher)
|
||||||
}
|
}
|
||||||
m.srcListDir = m.makeListDir(fsrc, false)
|
m.srcListDir = m.makeListDir(fsrc, false)
|
||||||
m.dstListDir = m.makeListDir(fdst, Config.Filter.DeleteExcluded)
|
m.dstListDir = m.makeListDir(fdst, Config.Filter.DeleteExcluded)
|
||||||
|
// Now create the matching transform
|
||||||
|
// ..normalise the UTF8 first
|
||||||
|
m.transforms = append(m.transforms, norm.NFC.String)
|
||||||
|
// ..if destination is caseInsensitive then make it lower case
|
||||||
|
// case Insensitive | src | dst | lower case compare |
|
||||||
|
// | No | No | No |
|
||||||
|
// | Yes | No | No |
|
||||||
|
// | No | Yes | Yes |
|
||||||
|
// | Yes | Yes | Yes |
|
||||||
|
if fdst.Features().CaseInsensitive {
|
||||||
|
m.transforms = append(m.transforms, strings.ToLower)
|
||||||
|
}
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,58 +173,122 @@ func (m *march) aborting() bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// matchEntry is an entry plus transformed name
|
||||||
|
type matchEntry struct {
|
||||||
|
entry DirEntry
|
||||||
|
leaf string
|
||||||
|
name string
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchEntries contains many matchEntry~s
|
||||||
|
type matchEntries []matchEntry
|
||||||
|
|
||||||
|
// Len is part of sort.Interface.
|
||||||
|
func (es matchEntries) Len() int { return len(es) }
|
||||||
|
|
||||||
|
// Swap is part of sort.Interface.
|
||||||
|
func (es matchEntries) Swap(i, j int) { es[i], es[j] = es[j], es[i] }
|
||||||
|
|
||||||
|
// Less is part of sort.Interface.
|
||||||
|
//
|
||||||
|
// Compare in order (name, leaf, remote)
|
||||||
|
func (es matchEntries) Less(i, j int) bool {
|
||||||
|
ei, ej := &es[i], &es[j]
|
||||||
|
if ei.name == ej.name {
|
||||||
|
if ei.leaf == ej.leaf {
|
||||||
|
return ei.entry.Remote() < ej.entry.Remote()
|
||||||
|
}
|
||||||
|
return ei.leaf < ej.leaf
|
||||||
|
}
|
||||||
|
return ei.name < ej.name
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort the directory entries by (name, leaf, remote)
|
||||||
|
//
|
||||||
|
// We use a stable sort here just in case there are
|
||||||
|
// duplicates. Assuming the remote delivers the entries in a
|
||||||
|
// consistent order, this will give the best user experience
|
||||||
|
// in syncing as it will use the first entry for the sync
|
||||||
|
// comparison.
|
||||||
|
func (es matchEntries) sort() {
|
||||||
|
sort.Stable(es)
|
||||||
|
}
|
||||||
|
|
||||||
|
// make a matchEntries from a newMatch entries
|
||||||
|
func newMatchEntries(entries DirEntries, transforms []matchTransformFn) matchEntries {
|
||||||
|
es := make(matchEntries, len(entries))
|
||||||
|
for i := range es {
|
||||||
|
es[i].entry = entries[i]
|
||||||
|
name := path.Base(entries[i].Remote())
|
||||||
|
es[i].leaf = name
|
||||||
|
for _, transform := range transforms {
|
||||||
|
name = transform(name)
|
||||||
|
}
|
||||||
|
es[i].name = name
|
||||||
|
}
|
||||||
|
es.sort()
|
||||||
|
return es
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchPair is a matched pair of direntries returned by matchListings
|
||||||
type matchPair struct {
|
type matchPair struct {
|
||||||
src, dst DirEntry
|
src, dst DirEntry
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the two sorted listings, matching up the items in the two
|
// matchTransformFn converts a name into a form which is used for
|
||||||
// sorted slices
|
// comparison in matchListings.
|
||||||
|
type matchTransformFn func(name string) string
|
||||||
|
|
||||||
|
// Process the two listings, matching up the items in the two slices
|
||||||
|
// using the transform function on each name first.
|
||||||
//
|
//
|
||||||
// Into srcOnly go Entries which only exist in the srcList
|
// Into srcOnly go Entries which only exist in the srcList
|
||||||
// Into dstOnly go Entries which only exist in the dstList
|
// Into dstOnly go Entries which only exist in the dstList
|
||||||
// Into matches go matchPair's of src and dst which have the same name
|
// Into matches go matchPair's of src and dst which have the same name
|
||||||
//
|
//
|
||||||
// This checks for duplicates and checks the list is sorted.
|
// This checks for duplicates and checks the list is sorted.
|
||||||
func matchListings(srcList, dstList DirEntries) (srcOnly DirEntries, dstOnly DirEntries, matches []matchPair) {
|
func matchListings(srcListEntries, dstListEntries DirEntries, transforms []matchTransformFn) (srcOnly DirEntries, dstOnly DirEntries, matches []matchPair) {
|
||||||
|
srcList := newMatchEntries(srcListEntries, transforms)
|
||||||
|
dstList := newMatchEntries(dstListEntries, transforms)
|
||||||
for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 {
|
for iSrc, iDst := 0, 0; ; iSrc, iDst = iSrc+1, iDst+1 {
|
||||||
var src, dst DirEntry
|
var src, dst DirEntry
|
||||||
var srcRemote, dstRemote string
|
var srcName, dstName string
|
||||||
if iSrc < len(srcList) {
|
if iSrc < len(srcList) {
|
||||||
src = srcList[iSrc]
|
src = srcList[iSrc].entry
|
||||||
srcRemote = src.Remote()
|
srcName = srcList[iSrc].name
|
||||||
}
|
}
|
||||||
if iDst < len(dstList) {
|
if iDst < len(dstList) {
|
||||||
dst = dstList[iDst]
|
dst = dstList[iDst].entry
|
||||||
dstRemote = dst.Remote()
|
dstName = dstList[iDst].name
|
||||||
}
|
}
|
||||||
if src == nil && dst == nil {
|
if src == nil && dst == nil {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if src != nil && iSrc > 0 {
|
if src != nil && iSrc > 0 {
|
||||||
prev := srcList[iSrc-1].Remote()
|
prev := srcList[iSrc-1].name
|
||||||
if srcRemote == prev {
|
if srcName == prev {
|
||||||
Logf(src, "Duplicate %s found in source - ignoring", DirEntryType(src))
|
Logf(src, "Duplicate %s found in source - ignoring", DirEntryType(src))
|
||||||
src = nil // ignore the src
|
src = nil // ignore the src
|
||||||
} else if srcRemote < prev {
|
} else if srcName < prev {
|
||||||
Errorf(src, "Out of order listing in source")
|
Errorf(src, "Out of order listing in source")
|
||||||
src = nil // ignore the src
|
src = nil // ignore the src
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if dst != nil && iDst > 0 {
|
if dst != nil && iDst > 0 {
|
||||||
prev := dstList[iDst-1].Remote()
|
prev := dstList[iDst-1].name
|
||||||
if dstRemote == prev {
|
if dstName == prev {
|
||||||
Logf(dst, "Duplicate %s found in destination - ignoring", DirEntryType(dst))
|
Logf(dst, "Duplicate %s found in destination - ignoring", DirEntryType(dst))
|
||||||
dst = nil // ignore the dst
|
dst = nil // ignore the dst
|
||||||
} else if dstRemote < prev {
|
} else if dstName < prev {
|
||||||
Errorf(dst, "Out of order listing in destination")
|
Errorf(dst, "Out of order listing in destination")
|
||||||
dst = nil // ignore the dst
|
dst = nil // ignore the dst
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if src != nil && dst != nil {
|
if src != nil && dst != nil {
|
||||||
if srcRemote < dstRemote {
|
if srcName < dstName {
|
||||||
dst = nil
|
dst = nil
|
||||||
iDst-- // retry the dst
|
iDst-- // retry the dst
|
||||||
} else if srcRemote > dstRemote {
|
} else if srcName > dstName {
|
||||||
src = nil
|
src = nil
|
||||||
iSrc-- // retry the src
|
iSrc-- // retry the src
|
||||||
}
|
}
|
||||||
|
@ -271,7 +352,7 @@ func (m *march) processJob(job listDirJob) (jobs []listDirJob) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Work out what to do and do it
|
// Work out what to do and do it
|
||||||
srcOnly, dstOnly, matches := matchListings(srcList, dstList)
|
srcOnly, dstOnly, matches := matchListings(srcList, dstList, m.transforms)
|
||||||
for _, src := range srcOnly {
|
for _, src := range srcOnly {
|
||||||
if m.aborting() {
|
if m.aborting() {
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -3,14 +3,41 @@
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestNewMatchEntries(t *testing.T) {
|
||||||
|
var (
|
||||||
|
a = mockObject("path/a")
|
||||||
|
A = mockObject("path/A")
|
||||||
|
B = mockObject("path/B")
|
||||||
|
c = mockObject("path/c")
|
||||||
|
)
|
||||||
|
|
||||||
|
es := newMatchEntries(DirEntries{a, A, B, c}, nil)
|
||||||
|
assert.Equal(t, es, matchEntries{
|
||||||
|
{name: "A", leaf: "A", entry: A},
|
||||||
|
{name: "B", leaf: "B", entry: B},
|
||||||
|
{name: "a", leaf: "a", entry: a},
|
||||||
|
{name: "c", leaf: "c", entry: c},
|
||||||
|
})
|
||||||
|
|
||||||
|
es = newMatchEntries(DirEntries{a, A, B, c}, []matchTransformFn{strings.ToLower})
|
||||||
|
assert.Equal(t, es, matchEntries{
|
||||||
|
{name: "a", leaf: "A", entry: A},
|
||||||
|
{name: "a", leaf: "a", entry: a},
|
||||||
|
{name: "b", leaf: "B", entry: B},
|
||||||
|
{name: "c", leaf: "c", entry: c},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestMatchListings(t *testing.T) {
|
func TestMatchListings(t *testing.T) {
|
||||||
var (
|
var (
|
||||||
a = mockObject("a")
|
a = mockObject("a")
|
||||||
|
A = mockObject("A")
|
||||||
b = mockObject("b")
|
b = mockObject("b")
|
||||||
c = mockObject("c")
|
c = mockObject("c")
|
||||||
d = mockObject("d")
|
d = mockObject("d")
|
||||||
|
@ -22,6 +49,7 @@ func TestMatchListings(t *testing.T) {
|
||||||
srcOnly DirEntries
|
srcOnly DirEntries
|
||||||
dstOnly DirEntries
|
dstOnly DirEntries
|
||||||
matches []matchPair // pairs of output
|
matches []matchPair // pairs of output
|
||||||
|
transforms []matchTransformFn
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
what: "only src or dst",
|
what: "only src or dst",
|
||||||
|
@ -92,6 +120,28 @@ func TestMatchListings(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
what: "Case insensitive duplicate - no transform",
|
||||||
|
input: DirEntries{
|
||||||
|
a, a,
|
||||||
|
A, A,
|
||||||
|
},
|
||||||
|
matches: []matchPair{
|
||||||
|
{A, A},
|
||||||
|
{a, a},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
what: "Case insensitive duplicate - transform to lower case",
|
||||||
|
input: DirEntries{
|
||||||
|
a, a,
|
||||||
|
A, A,
|
||||||
|
},
|
||||||
|
matches: []matchPair{
|
||||||
|
{A, A},
|
||||||
|
},
|
||||||
|
transforms: []matchTransformFn{strings.ToLower},
|
||||||
|
},
|
||||||
|
/*{
|
||||||
what: "Out of order",
|
what: "Out of order",
|
||||||
input: DirEntries{
|
input: DirEntries{
|
||||||
c, nil,
|
c, nil,
|
||||||
|
@ -104,7 +154,7 @@ func TestMatchListings(t *testing.T) {
|
||||||
dstOnly: DirEntries{
|
dstOnly: DirEntries{
|
||||||
b,
|
b,
|
||||||
},
|
},
|
||||||
},
|
},*/
|
||||||
} {
|
} {
|
||||||
var srcList, dstList DirEntries
|
var srcList, dstList DirEntries
|
||||||
for i := 0; i < len(test.input); i += 2 {
|
for i := 0; i < len(test.input); i += 2 {
|
||||||
|
@ -116,12 +166,12 @@ func TestMatchListings(t *testing.T) {
|
||||||
dstList = append(dstList, dst)
|
dstList = append(dstList, dst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
srcOnly, dstOnly, matches := matchListings(srcList, dstList)
|
srcOnly, dstOnly, matches := matchListings(srcList, dstList, test.transforms)
|
||||||
assert.Equal(t, test.srcOnly, srcOnly, test.what)
|
assert.Equal(t, test.srcOnly, srcOnly, test.what)
|
||||||
assert.Equal(t, test.dstOnly, dstOnly, test.what)
|
assert.Equal(t, test.dstOnly, dstOnly, test.what)
|
||||||
assert.Equal(t, test.matches, matches, test.what)
|
assert.Equal(t, test.matches, matches, test.what)
|
||||||
// now swap src and dst
|
// now swap src and dst
|
||||||
dstOnly, srcOnly, matches = matchListings(dstList, srcList)
|
dstOnly, srcOnly, matches = matchListings(dstList, srcList, test.transforms)
|
||||||
assert.Equal(t, test.srcOnly, srcOnly, test.what)
|
assert.Equal(t, test.srcOnly, srcOnly, test.what)
|
||||||
assert.Equal(t, test.dstOnly, dstOnly, test.what)
|
assert.Equal(t, test.dstOnly, dstOnly, test.what)
|
||||||
assert.Equal(t, test.matches, matches, test.what)
|
assert.Equal(t, test.matches, matches, test.what)
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"github.com/ncw/rclone/fstest"
|
"github.com/ncw/rclone/fstest"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Check dry run is working
|
// Check dry run is working
|
||||||
|
@ -961,3 +962,32 @@ func testSyncBackupDir(t *testing.T, suffix string) {
|
||||||
}
|
}
|
||||||
func TestSyncBackupDir(t *testing.T) { testSyncBackupDir(t, "") }
|
func TestSyncBackupDir(t *testing.T) { testSyncBackupDir(t, "") }
|
||||||
func TestSyncBackupDirWithSuffix(t *testing.T) { testSyncBackupDir(t, ".bak") }
|
func TestSyncBackupDirWithSuffix(t *testing.T) { testSyncBackupDir(t, ".bak") }
|
||||||
|
|
||||||
|
// Check we can sync two files with differing UTF-8 representations
|
||||||
|
func TestSyncUTFNorm(t *testing.T) {
|
||||||
|
r := NewRun(t)
|
||||||
|
defer r.Finalise()
|
||||||
|
|
||||||
|
// Two strings with different unicode normalization (from OS X)
|
||||||
|
Encoding1 := "Testêé"
|
||||||
|
Encoding2 := "Testêé"
|
||||||
|
assert.NotEqual(t, Encoding1, Encoding2)
|
||||||
|
assert.Equal(t, norm.NFC.String(Encoding1), norm.NFC.String(Encoding2))
|
||||||
|
|
||||||
|
file1 := r.WriteFile(Encoding1, "This is a test", t1)
|
||||||
|
fstest.CheckItems(t, r.flocal, file1)
|
||||||
|
|
||||||
|
file2 := r.WriteObject(Encoding2, "This is a old test", t2)
|
||||||
|
fstest.CheckItems(t, r.fremote, file2)
|
||||||
|
|
||||||
|
fs.Stats.ResetCounters()
|
||||||
|
err := fs.Sync(r.fremote, r.flocal)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// We should have transferred exactly one file, but kept the
|
||||||
|
// normalized state of the file.
|
||||||
|
assert.Equal(t, int64(1), fs.Stats.GetTransfers())
|
||||||
|
fstest.CheckItems(t, r.flocal, file1)
|
||||||
|
file1.Path = file2.Path
|
||||||
|
fstest.CheckItems(t, r.fremote, file1)
|
||||||
|
}
|
||||||
|
|
|
@ -15,10 +15,9 @@ import (
|
||||||
"time"
|
"time"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"golang.org/x/text/unicode/norm"
|
|
||||||
|
|
||||||
"github.com/ncw/rclone/fs"
|
"github.com/ncw/rclone/fs"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
"google.golang.org/appengine/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -82,6 +81,10 @@ type Object struct {
|
||||||
func NewFs(name, root string) (fs.Fs, error) {
|
func NewFs(name, root string) (fs.Fs, error) {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
|
if *noUTFNorm {
|
||||||
|
log.Errorf(nil, "The --local-no-unicode-normalization flag is deprecated and will be removed")
|
||||||
|
}
|
||||||
|
|
||||||
nounc := fs.ConfigFileGet(name, "nounc")
|
nounc := fs.ConfigFileGet(name, "nounc")
|
||||||
f := &Fs{
|
f := &Fs{
|
||||||
name: name,
|
name: name,
|
||||||
|
@ -273,9 +276,6 @@ func (f *Fs) cleanRemote(name string) string {
|
||||||
f.wmu.Unlock()
|
f.wmu.Unlock()
|
||||||
name = string([]rune(name))
|
name = string([]rune(name))
|
||||||
}
|
}
|
||||||
if !*noUTFNorm {
|
|
||||||
name = norm.NFC.String(name)
|
|
||||||
}
|
|
||||||
name = filepath.ToSlash(name)
|
name = filepath.ToSlash(name)
|
||||||
return name
|
return name
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue