s3: introduce list_chunk option for bucket listing
The S3 ListObject API returns paginated bucket listings, with "MaxKeys" items for each GET call. The default value is 1000 entries, but for buckets with millions of objects it might make sense to request more elements per request, if the backend supports it. This commit adds a "list_chunk" option for the user to specify a lower or higher value. This commit does not add safe guards around this value - if a user decides to request a too large list, it might result in connection timeouts (on the server or client). In AWS S3, there is a fixed limit of 1000, some other services might have one too. In Ceph, this can be configured in RadosGW.
This commit is contained in:
parent
32a3ba9e3f
commit
584e705c0c
2 changed files with 28 additions and 3 deletions
|
@ -793,6 +793,17 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us
|
|||
`,
|
||||
Default: false,
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "list_chunk",
|
||||
Help: `Size of listing chunk (response list for each ListObject S3 request).
|
||||
|
||||
This option is also known as "MaxKeys", "max-items", or "page-size" from the AWS S3 specification.
|
||||
Most services truncate the response list to 1000 objects even if requested more than that.
|
||||
In AWS S3 this is a global maximum and cannot be changed, see [AWS S3](https://docs.aws.amazon.com/cli/latest/reference/s3/ls.html).
|
||||
In Ceph, this can be increased with the "rgw list buckets max chunk" option.
|
||||
`,
|
||||
Default: 1000,
|
||||
Advanced: true,
|
||||
}},
|
||||
})
|
||||
}
|
||||
|
@ -801,7 +812,6 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us
|
|||
const (
|
||||
metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime
|
||||
metaMD5Hash = "Md5chksum" // the meta key to store md5hash in
|
||||
listChunkSize = 1000 // number of items to read at once
|
||||
maxRetries = 10 // number of retries to make of operations
|
||||
maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY
|
||||
minChunkSize = fs.SizeSuffix(s3manager.MinUploadPartSize)
|
||||
|
@ -834,6 +844,7 @@ type Options struct {
|
|||
V2Auth bool `config:"v2_auth"`
|
||||
UseAccelerateEndpoint bool `config:"use_accelerate_endpoint"`
|
||||
LeavePartsOnError bool `config:"leave_parts_on_error"`
|
||||
ListChunk int64 `config:"list_chunk"`
|
||||
}
|
||||
|
||||
// Fs represents a remote s3 server
|
||||
|
@ -1260,7 +1271,6 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck
|
|||
if directory != "" {
|
||||
directory += "/"
|
||||
}
|
||||
maxKeys := int64(listChunkSize)
|
||||
delimiter := ""
|
||||
if !recurse {
|
||||
delimiter = "/"
|
||||
|
@ -1288,7 +1298,7 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck
|
|||
Bucket: &bucket,
|
||||
Delimiter: &delimiter,
|
||||
Prefix: &directory,
|
||||
MaxKeys: &maxKeys,
|
||||
MaxKeys: &f.opt.ListChunk,
|
||||
Marker: marker,
|
||||
}
|
||||
if urlEncodeListings {
|
||||
|
|
|
@ -1134,6 +1134,21 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us
|
|||
- Type: bool
|
||||
- Default: false
|
||||
|
||||
#### --s3-list-chunk
|
||||
|
||||
Size of listing chunk (response list for each ListObject S3 request).
|
||||
|
||||
This option is also known as "MaxKeys", "max-items", or "page-size" from the AWS S3 specification.
|
||||
Most services truncate the response list to 1000 objects even if requested more than that.
|
||||
In AWS S3 this is a global maximum and cannot be changed, see [AWS S3](https://docs.aws.amazon.com/cli/latest/reference/s3/ls.html).
|
||||
In Ceph, this can be increased with the "rgw list buckets max chunk" option.
|
||||
|
||||
|
||||
- Config: list_chunk
|
||||
- Env Var: RCLONE_S3_LIST_CHUNK
|
||||
- Type: int
|
||||
- Default: 1000
|
||||
|
||||
<!--- autogenerated options stop -->
|
||||
|
||||
### Anonymous access to public buckets ###
|
||||
|
|
Loading…
Reference in a new issue