local: fix encoding of non utf-8 file names - fixes #66

This commit is contained in:
Nick Craig-Wood 2015-05-21 18:40:16 +01:00
parent 8e3703abeb
commit 870a9fc3b2
2 changed files with 41 additions and 7 deletions

View file

@ -23,3 +23,18 @@ Rclone reads and writes the modified time using an accuracy determined by
the OS. Typically this is 1ns on Linux, 10 ns on Windows and 1 Second
on OS X.
Filenames
---------
Filenames are expected to be encoded in UTF-8 on disk. This is the
normal case for Windows and OS X. There is a bit more uncertainty in
the Linux world, but new distributions will have UTF-8 encoded files
names.
If an invalid (non-UTF8) filename is read, the invalid caracters will
be replaced with the unicode replacement character, '<27>'. `rclone`
will emit a warning in this case, eg
```
Local file system at .: Replacing invalid UTF-8 characters in "gro\xdf"
```

View file

@ -18,6 +18,7 @@ import (
"path/filepath"
"sync"
"time"
"unicode/utf8"
"github.com/ncw/rclone/fs"
)
@ -32,14 +33,15 @@ func init() {
// FsLocal represents a local filesystem rooted at root
type FsLocal struct {
root string // The root directory
precisionOk sync.Once // Whether we need to read the precision
precision time.Duration // precision of local filesystem
root string // The root directory
precisionOk sync.Once // Whether we need to read the precision
precision time.Duration // precision of local filesystem
warned map[string]struct{} // whether we have warned about this string
}
// FsObjectLocal represents a local filesystem object
type FsObjectLocal struct {
local fs.Fs // The Fs this object is part of
local *FsLocal // The Fs this object is part of
remote string // The remote path
path string // The local path
info os.FileInfo // Interface for file info (always present)
@ -51,7 +53,10 @@ type FsObjectLocal struct {
// NewFs contstructs an FsLocal from the path
func NewFs(name, root string) (fs.Fs, error) {
root = filepath.ToSlash(path.Clean(root))
f := &FsLocal{root: root}
f := &FsLocal{
root: root,
warned: make(map[string]struct{}),
}
// Check to see if this points to a file
fi, err := os.Lstat(f.root)
if err == nil && fi.Mode().IsRegular() {
@ -134,6 +139,20 @@ func (f *FsLocal) List() fs.ObjectsChan {
return out
}
// CleanUtf8 makes string a valid UTF-8 string
//
// Any invalid UTF-8 characters will be replaced with utf8.RuneError
func (f *FsLocal) cleanUtf8(name string) string {
if utf8.ValidString(name) {
return name
}
if _, ok := f.warned[name]; !ok {
fs.Debug(f, "Replacing invalid UTF-8 characters in %q", name)
f.warned[name] = struct{}{}
}
return string([]rune(name))
}
// Walk the path returning a channel of FsObjects
func (f *FsLocal) ListDir() fs.DirChan {
out := make(fs.DirChan, fs.Config.Checkers)
@ -147,7 +166,7 @@ func (f *FsLocal) ListDir() fs.DirChan {
for _, item := range items {
if item.IsDir() {
dir := &fs.Dir{
Name: item.Name(),
Name: f.cleanUtf8(item.Name()),
When: item.ModTime(),
Bytes: 0,
Count: 0,
@ -294,7 +313,7 @@ func (o *FsObjectLocal) String() string {
// Return the remote path
func (o *FsObjectLocal) Remote() string {
return o.remote
return o.local.cleanUtf8(o.remote)
}
// Md5sum calculates the Md5sum of a file returning a lowercase hex string