forked from TrueCloudLab/rclone
local: fix encoding of non utf-8 file names - fixes #66
This commit is contained in:
parent
8e3703abeb
commit
870a9fc3b2
2 changed files with 41 additions and 7 deletions
|
@ -23,3 +23,18 @@ Rclone reads and writes the modified time using an accuracy determined by
|
||||||
the OS. Typically this is 1ns on Linux, 10 ns on Windows and 1 Second
|
the OS. Typically this is 1ns on Linux, 10 ns on Windows and 1 Second
|
||||||
on OS X.
|
on OS X.
|
||||||
|
|
||||||
|
Filenames
|
||||||
|
---------
|
||||||
|
|
||||||
|
Filenames are expected to be encoded in UTF-8 on disk. This is the
|
||||||
|
normal case for Windows and OS X. There is a bit more uncertainty in
|
||||||
|
the Linux world, but new distributions will have UTF-8 encoded files
|
||||||
|
names.
|
||||||
|
|
||||||
|
If an invalid (non-UTF8) filename is read, the invalid caracters will
|
||||||
|
be replaced with the unicode replacement character, '<27>'. `rclone`
|
||||||
|
will emit a warning in this case, eg
|
||||||
|
|
||||||
|
```
|
||||||
|
Local file system at .: Replacing invalid UTF-8 characters in "gro\xdf"
|
||||||
|
```
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/ncw/rclone/fs"
|
"github.com/ncw/rclone/fs"
|
||||||
)
|
)
|
||||||
|
@ -32,14 +33,15 @@ func init() {
|
||||||
|
|
||||||
// FsLocal represents a local filesystem rooted at root
|
// FsLocal represents a local filesystem rooted at root
|
||||||
type FsLocal struct {
|
type FsLocal struct {
|
||||||
root string // The root directory
|
root string // The root directory
|
||||||
precisionOk sync.Once // Whether we need to read the precision
|
precisionOk sync.Once // Whether we need to read the precision
|
||||||
precision time.Duration // precision of local filesystem
|
precision time.Duration // precision of local filesystem
|
||||||
|
warned map[string]struct{} // whether we have warned about this string
|
||||||
}
|
}
|
||||||
|
|
||||||
// FsObjectLocal represents a local filesystem object
|
// FsObjectLocal represents a local filesystem object
|
||||||
type FsObjectLocal struct {
|
type FsObjectLocal struct {
|
||||||
local fs.Fs // The Fs this object is part of
|
local *FsLocal // The Fs this object is part of
|
||||||
remote string // The remote path
|
remote string // The remote path
|
||||||
path string // The local path
|
path string // The local path
|
||||||
info os.FileInfo // Interface for file info (always present)
|
info os.FileInfo // Interface for file info (always present)
|
||||||
|
@ -51,7 +53,10 @@ type FsObjectLocal struct {
|
||||||
// NewFs contstructs an FsLocal from the path
|
// NewFs contstructs an FsLocal from the path
|
||||||
func NewFs(name, root string) (fs.Fs, error) {
|
func NewFs(name, root string) (fs.Fs, error) {
|
||||||
root = filepath.ToSlash(path.Clean(root))
|
root = filepath.ToSlash(path.Clean(root))
|
||||||
f := &FsLocal{root: root}
|
f := &FsLocal{
|
||||||
|
root: root,
|
||||||
|
warned: make(map[string]struct{}),
|
||||||
|
}
|
||||||
// Check to see if this points to a file
|
// Check to see if this points to a file
|
||||||
fi, err := os.Lstat(f.root)
|
fi, err := os.Lstat(f.root)
|
||||||
if err == nil && fi.Mode().IsRegular() {
|
if err == nil && fi.Mode().IsRegular() {
|
||||||
|
@ -134,6 +139,20 @@ func (f *FsLocal) List() fs.ObjectsChan {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CleanUtf8 makes string a valid UTF-8 string
|
||||||
|
//
|
||||||
|
// Any invalid UTF-8 characters will be replaced with utf8.RuneError
|
||||||
|
func (f *FsLocal) cleanUtf8(name string) string {
|
||||||
|
if utf8.ValidString(name) {
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
if _, ok := f.warned[name]; !ok {
|
||||||
|
fs.Debug(f, "Replacing invalid UTF-8 characters in %q", name)
|
||||||
|
f.warned[name] = struct{}{}
|
||||||
|
}
|
||||||
|
return string([]rune(name))
|
||||||
|
}
|
||||||
|
|
||||||
// Walk the path returning a channel of FsObjects
|
// Walk the path returning a channel of FsObjects
|
||||||
func (f *FsLocal) ListDir() fs.DirChan {
|
func (f *FsLocal) ListDir() fs.DirChan {
|
||||||
out := make(fs.DirChan, fs.Config.Checkers)
|
out := make(fs.DirChan, fs.Config.Checkers)
|
||||||
|
@ -147,7 +166,7 @@ func (f *FsLocal) ListDir() fs.DirChan {
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
if item.IsDir() {
|
if item.IsDir() {
|
||||||
dir := &fs.Dir{
|
dir := &fs.Dir{
|
||||||
Name: item.Name(),
|
Name: f.cleanUtf8(item.Name()),
|
||||||
When: item.ModTime(),
|
When: item.ModTime(),
|
||||||
Bytes: 0,
|
Bytes: 0,
|
||||||
Count: 0,
|
Count: 0,
|
||||||
|
@ -294,7 +313,7 @@ func (o *FsObjectLocal) String() string {
|
||||||
|
|
||||||
// Return the remote path
|
// Return the remote path
|
||||||
func (o *FsObjectLocal) Remote() string {
|
func (o *FsObjectLocal) Remote() string {
|
||||||
return o.remote
|
return o.local.cleanUtf8(o.remote)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Md5sum calculates the Md5sum of a file returning a lowercase hex string
|
// Md5sum calculates the Md5sum of a file returning a lowercase hex string
|
||||||
|
|
Loading…
Reference in a new issue