diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 11d84ece1..8040fcd14 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -42,6 +42,7 @@ import ( "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/config/configmap" "github.com/rclone/rclone/fs/config/configstruct" + "github.com/rclone/rclone/fs/encodings" "github.com/rclone/rclone/fs/fserrors" "github.com/rclone/rclone/fs/fshttp" "github.com/rclone/rclone/fs/hash" @@ -51,6 +52,8 @@ import ( "github.com/rclone/rclone/lib/rest" ) +const enc = encodings.S3 + // Register with Fs func init() { fs.Register(&fs.RegInfo{ @@ -900,7 +903,8 @@ func parsePath(path string) (root string) { // split returns bucket and bucketPath from the rootRelativePath // relative to f.root func (f *Fs) split(rootRelativePath string) (bucketName, bucketPath string) { - return bucket.Split(path.Join(f.root, rootRelativePath)) + bucketName, bucketPath = bucket.Split(path.Join(f.root, rootRelativePath)) + return enc.FromStandardName(bucketName), enc.FromStandardPath(bucketPath) } // split returns bucket and bucketPath from the object @@ -1096,9 +1100,10 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { }).Fill(f) if f.rootBucket != "" && f.rootDirectory != "" { // Check to see if the object exists + encodedDirectory := enc.FromStandardPath(f.rootDirectory) req := s3.HeadObjectInput{ Bucket: &f.rootBucket, - Key: &f.rootDirectory, + Key: &encodedDirectory, } err = f.pacer.Call(func() (bool, error) { _, err = f.c.HeadObject(&req) @@ -1266,6 +1271,7 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck fs.Logf(f, "failed to URL decode %q in listing common prefix: %v", *commonPrefix.Prefix, err) continue } + remote = enc.ToStandardPath(remote) if !strings.HasPrefix(remote, prefix) { fs.Logf(f, "Odd name received %q", remote) continue @@ -1290,6 +1296,7 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck fs.Logf(f, "failed to URL decode %q in listing: %v", aws.StringValue(object.Key), err) continue } + remote = enc.ToStandardPath(remote) if !strings.HasPrefix(remote, prefix) { fs.Logf(f, "Odd name received %q", remote) continue @@ -1374,7 +1381,7 @@ func (f *Fs) listBuckets(ctx context.Context) (entries fs.DirEntries, err error) return nil, err } for _, bucket := range resp.Buckets { - bucketName := aws.StringValue(bucket.Name) + bucketName := enc.ToStandardName(aws.StringValue(bucket.Name)) f.cache.MarkOK(bucketName) d := fs.NewDir(bucketName, aws.TimeValue(bucket.CreationDate)) entries = append(entries, d) diff --git a/docs/content/s3.md b/docs/content/s3.md index 21a8a2b4b..934c23bdd 100644 --- a/docs/content/s3.md +++ b/docs/content/s3.md @@ -268,6 +268,29 @@ side copy to update the modification if the object can be copied in a single par In the case the object is larger than 5Gb or is in Glacier or Glacier Deep Archive storage the object will be uploaded rather than copied. +#### Restricted filename characters + +S3 allows any valid UTF-8 string as a key. + +Invalid UTF-8 bytes will be [replaced](/overview/#invalid-utf8), as +they can't be used in XML. + +The following characters are replaced since these are problematic when +dealing with the REST API: + +| Character | Value | Replacement | +| --------- |:-----:|:-----------:| +| NUL | 0x00 | ␀ | +| / | 0x2F | / | + +The encoding will also encode these file names as they don't seem to +work with the SDK properly: + +| File name | Replacement | +| --------- |:-----------:| +| . | . | +| .. | .. | + ### Multipart uploads ### rclone supports multipart uploads with S3 which means that it can diff --git a/fs/encodings/encodings.go b/fs/encodings/encodings.go index 546ab11f6..d30538583 100644 --- a/fs/encodings/encodings.go +++ b/fs/encodings/encodings.go @@ -240,6 +240,23 @@ const FTP = encoder.MultiEncoder( uint(Display) | encoder.EncodeRightSpace) +// S3 is the encoding used by the s3 backend +// +// Any UTF-8 character is valid in a key, however it can't handle +// invalid UTF-8 and / have a special meaning. +// +// The SDK can't seem to handle uploading files called '.' +// +// FIXME would be nice to add +// - initial / encoding +// - doubled / encoding +// - trailing / encoding +// so that AWS keys are always valid file names +const S3 = encoder.MultiEncoder( + encoder.EncodeInvalidUtf8 | + encoder.EncodeSlash | + encoder.EncodeDot) + // ByName returns the encoder for a give backend name or nil func ByName(name string) encoder.Encoder { switch strings.ToLower(name) { diff --git a/fs/encodings/encodings_noencode.go b/fs/encodings/encodings_noencode.go index ef7547a06..aed762326 100644 --- a/fs/encodings/encodings_noencode.go +++ b/fs/encodings/encodings_noencode.go @@ -27,6 +27,7 @@ const ( OneDrive = Base OpenDrive = Base Pcloud = Base + S3 = Base ) // ByName returns the encoder for a give backend name or nil