s3: only ask for URL encoded directory listings if we need them on Ceph

This works around a bug in Ceph which doesn't encode CommonPrefixes
when using URL encoded directory listings.

See: https://tracker.ceph.com/issues/41870
This commit is contained in:
Nick Craig-Wood 2019-09-16 20:25:55 +01:00
parent 7e738c9d71
commit 6e053ecbd0

View file

@ -17,6 +17,7 @@ import (
"context" "context"
"encoding/base64" "encoding/base64"
"encoding/hex" "encoding/hex"
"encoding/xml"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@ -1224,20 +1225,51 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck
delimiter = "/" delimiter = "/"
} }
var marker *string var marker *string
// URL encode the listings so we can use control characters in object names
// See: https://github.com/aws/aws-sdk-go/issues/1914
//
// However this doesn't work perfectly under Ceph (and hence DigitalOcean/Dreamhost) because
// it doesn't encode CommonPrefixes.
// See: https://tracker.ceph.com/issues/41870
//
// This does not work under IBM COS also: See https://github.com/rclone/rclone/issues/3345
// though maybe it does on some versions.
//
// This does work with minio but was only added relatively recently
// https://github.com/minio/minio/pull/7265
//
// So we enable only on providers we know supports it properly, all others can retry when a
// XML Syntax error is detected.
var urlEncodeListings = (f.opt.Provider == "AWS" || f.opt.Provider == "Wasabi" || f.opt.Provider == "Alibaba")
for { for {
// FIXME need to implement ALL loop // FIXME need to implement ALL loop
req := s3.ListObjectsInput{ req := s3.ListObjectsInput{
Bucket: &bucket, Bucket: &bucket,
Delimiter: &delimiter, Delimiter: &delimiter,
Prefix: &directory, Prefix: &directory,
MaxKeys: &maxKeys, MaxKeys: &maxKeys,
Marker: marker, Marker: marker,
EncodingType: aws.String(s3.EncodingTypeUrl), }
if urlEncodeListings {
req.EncodingType = aws.String(s3.EncodingTypeUrl)
} }
var resp *s3.ListObjectsOutput var resp *s3.ListObjectsOutput
var err error var err error
err = f.pacer.Call(func() (bool, error) { err = f.pacer.Call(func() (bool, error) {
resp, err = f.c.ListObjectsWithContext(ctx, &req) resp, err = f.c.ListObjectsWithContext(ctx, &req)
if err != nil && !urlEncodeListings {
if awsErr, ok := err.(awserr.RequestFailure); ok {
if origErr := awsErr.OrigErr(); origErr != nil {
if _, ok := origErr.(*xml.SyntaxError); ok {
// Retry the listing with URL encoding as there were characters that XML can't encode
urlEncodeListings = true
req.EncodingType = aws.String(s3.EncodingTypeUrl)
fs.Debugf(f, "Retrying listing because of characters which can't be XML encoded")
return true, err
}
}
}
}
return f.shouldRetry(err) return f.shouldRetry(err)
}) })
if err != nil { if err != nil {
@ -1266,10 +1298,12 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck
continue continue
} }
remote := *commonPrefix.Prefix remote := *commonPrefix.Prefix
remote, err = url.QueryUnescape(remote) if urlEncodeListings {
if err != nil { remote, err = url.QueryUnescape(remote)
fs.Logf(f, "failed to URL decode %q in listing common prefix: %v", *commonPrefix.Prefix, err) if err != nil {
continue fs.Logf(f, "failed to URL decode %q in listing common prefix: %v", *commonPrefix.Prefix, err)
continue
}
} }
remote = enc.ToStandardPath(remote) remote = enc.ToStandardPath(remote)
if !strings.HasPrefix(remote, prefix) { if !strings.HasPrefix(remote, prefix) {
@ -1291,10 +1325,12 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck
} }
for _, object := range resp.Contents { for _, object := range resp.Contents {
remote := aws.StringValue(object.Key) remote := aws.StringValue(object.Key)
remote, err = url.QueryUnescape(remote) if urlEncodeListings {
if err != nil { remote, err = url.QueryUnescape(remote)
fs.Logf(f, "failed to URL decode %q in listing: %v", aws.StringValue(object.Key), err) if err != nil {
continue fs.Logf(f, "failed to URL decode %q in listing: %v", aws.StringValue(object.Key), err)
continue
}
} }
remote = enc.ToStandardPath(remote) remote = enc.ToStandardPath(remote)
if !strings.HasPrefix(remote, prefix) { if !strings.HasPrefix(remote, prefix) {