From 584e705c0cdfec0afdef4e9a1d3681809521a61e Mon Sep 17 00:00:00 2001 From: Thomas Kriechbaumer Date: Thu, 26 Dec 2019 11:05:00 +0000 Subject: [PATCH] s3: introduce list_chunk option for bucket listing The S3 ListObject API returns paginated bucket listings, with "MaxKeys" items for each GET call. The default value is 1000 entries, but for buckets with millions of objects it might make sense to request more elements per request, if the backend supports it. This commit adds a "list_chunk" option for the user to specify a lower or higher value. This commit does not add safe guards around this value - if a user decides to request a too large list, it might result in connection timeouts (on the server or client). In AWS S3, there is a fixed limit of 1000, some other services might have one too. In Ceph, this can be configured in RadosGW. --- backend/s3/s3.go | 16 +++++++++++++--- docs/content/s3.md | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 04fdefe3a..4b7b93a27 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -793,6 +793,17 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us `, Default: false, Advanced: true, + }, { + Name: "list_chunk", + Help: `Size of listing chunk (response list for each ListObject S3 request). + +This option is also known as "MaxKeys", "max-items", or "page-size" from the AWS S3 specification. +Most services truncate the response list to 1000 objects even if requested more than that. +In AWS S3 this is a global maximum and cannot be changed, see [AWS S3](https://docs.aws.amazon.com/cli/latest/reference/s3/ls.html). +In Ceph, this can be increased with the "rgw list buckets max chunk" option. +`, + Default: 1000, + Advanced: true, }}, }) } @@ -801,7 +812,6 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us const ( metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime metaMD5Hash = "Md5chksum" // the meta key to store md5hash in - listChunkSize = 1000 // number of items to read at once maxRetries = 10 // number of retries to make of operations maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY minChunkSize = fs.SizeSuffix(s3manager.MinUploadPartSize) @@ -834,6 +844,7 @@ type Options struct { V2Auth bool `config:"v2_auth"` UseAccelerateEndpoint bool `config:"use_accelerate_endpoint"` LeavePartsOnError bool `config:"leave_parts_on_error"` + ListChunk int64 `config:"list_chunk"` } // Fs represents a remote s3 server @@ -1260,7 +1271,6 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck if directory != "" { directory += "/" } - maxKeys := int64(listChunkSize) delimiter := "" if !recurse { delimiter = "/" @@ -1288,7 +1298,7 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck Bucket: &bucket, Delimiter: &delimiter, Prefix: &directory, - MaxKeys: &maxKeys, + MaxKeys: &f.opt.ListChunk, Marker: marker, } if urlEncodeListings { diff --git a/docs/content/s3.md b/docs/content/s3.md index d000c291c..7ffdbed8c 100644 --- a/docs/content/s3.md +++ b/docs/content/s3.md @@ -1134,6 +1134,21 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us - Type: bool - Default: false +#### --s3-list-chunk + +Size of listing chunk (response list for each ListObject S3 request). + +This option is also known as "MaxKeys", "max-items", or "page-size" from the AWS S3 specification. +Most services truncate the response list to 1000 objects even if requested more than that. +In AWS S3 this is a global maximum and cannot be changed, see [AWS S3](https://docs.aws.amazon.com/cli/latest/reference/s3/ls.html). +In Ceph, this can be increased with the "rgw list buckets max chunk" option. + + +- Config: list_chunk +- Env Var: RCLONE_S3_LIST_CHUNK +- Type: int +- Default: 1000 + ### Anonymous access to public buckets ###