local: fix encoding of non utf-8 file names - fixes #66

This commit is contained in:
Nick Craig-Wood 2015-05-21 18:40:16 +01:00
parent 8e3703abeb
commit 870a9fc3b2
2 changed files with 41 additions and 7 deletions

View file

@ -23,3 +23,18 @@ Rclone reads and writes the modified time using an accuracy determined by
the OS. Typically this is 1ns on Linux, 10 ns on Windows and 1 Second the OS. Typically this is 1ns on Linux, 10 ns on Windows and 1 Second
on OS X. on OS X.
Filenames
---------
Filenames are expected to be encoded in UTF-8 on disk. This is the
normal case for Windows and OS X. There is a bit more uncertainty in
the Linux world, but new distributions will have UTF-8 encoded files
names.
If an invalid (non-UTF8) filename is read, the invalid caracters will
be replaced with the unicode replacement character, '<27>'. `rclone`
will emit a warning in this case, eg
```
Local file system at .: Replacing invalid UTF-8 characters in "gro\xdf"
```

View file

@ -18,6 +18,7 @@ import (
"path/filepath" "path/filepath"
"sync" "sync"
"time" "time"
"unicode/utf8"
"github.com/ncw/rclone/fs" "github.com/ncw/rclone/fs"
) )
@ -32,14 +33,15 @@ func init() {
// FsLocal represents a local filesystem rooted at root // FsLocal represents a local filesystem rooted at root
type FsLocal struct { type FsLocal struct {
root string // The root directory root string // The root directory
precisionOk sync.Once // Whether we need to read the precision precisionOk sync.Once // Whether we need to read the precision
precision time.Duration // precision of local filesystem precision time.Duration // precision of local filesystem
warned map[string]struct{} // whether we have warned about this string
} }
// FsObjectLocal represents a local filesystem object // FsObjectLocal represents a local filesystem object
type FsObjectLocal struct { type FsObjectLocal struct {
local fs.Fs // The Fs this object is part of local *FsLocal // The Fs this object is part of
remote string // The remote path remote string // The remote path
path string // The local path path string // The local path
info os.FileInfo // Interface for file info (always present) info os.FileInfo // Interface for file info (always present)
@ -51,7 +53,10 @@ type FsObjectLocal struct {
// NewFs contstructs an FsLocal from the path // NewFs contstructs an FsLocal from the path
func NewFs(name, root string) (fs.Fs, error) { func NewFs(name, root string) (fs.Fs, error) {
root = filepath.ToSlash(path.Clean(root)) root = filepath.ToSlash(path.Clean(root))
f := &FsLocal{root: root} f := &FsLocal{
root: root,
warned: make(map[string]struct{}),
}
// Check to see if this points to a file // Check to see if this points to a file
fi, err := os.Lstat(f.root) fi, err := os.Lstat(f.root)
if err == nil && fi.Mode().IsRegular() { if err == nil && fi.Mode().IsRegular() {
@ -134,6 +139,20 @@ func (f *FsLocal) List() fs.ObjectsChan {
return out return out
} }
// CleanUtf8 makes string a valid UTF-8 string
//
// Any invalid UTF-8 characters will be replaced with utf8.RuneError
func (f *FsLocal) cleanUtf8(name string) string {
if utf8.ValidString(name) {
return name
}
if _, ok := f.warned[name]; !ok {
fs.Debug(f, "Replacing invalid UTF-8 characters in %q", name)
f.warned[name] = struct{}{}
}
return string([]rune(name))
}
// Walk the path returning a channel of FsObjects // Walk the path returning a channel of FsObjects
func (f *FsLocal) ListDir() fs.DirChan { func (f *FsLocal) ListDir() fs.DirChan {
out := make(fs.DirChan, fs.Config.Checkers) out := make(fs.DirChan, fs.Config.Checkers)
@ -147,7 +166,7 @@ func (f *FsLocal) ListDir() fs.DirChan {
for _, item := range items { for _, item := range items {
if item.IsDir() { if item.IsDir() {
dir := &fs.Dir{ dir := &fs.Dir{
Name: item.Name(), Name: f.cleanUtf8(item.Name()),
When: item.ModTime(), When: item.ModTime(),
Bytes: 0, Bytes: 0,
Count: 0, Count: 0,
@ -294,7 +313,7 @@ func (o *FsObjectLocal) String() string {
// Return the remote path // Return the remote path
func (o *FsObjectLocal) Remote() string { func (o *FsObjectLocal) Remote() string {
return o.remote return o.local.cleanUtf8(o.remote)
} }
// Md5sum calculates the Md5sum of a file returning a lowercase hex string // Md5sum calculates the Md5sum of a file returning a lowercase hex string