From 282540c2d4ec311e1dd5308b8e3bb9945e76110d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santiago=20Rodr=C3=ADguez?= Date: Mon, 10 Sep 2018 21:45:06 +0200 Subject: [PATCH] azureblob: add --azureblob-list-chunk parameter - Fixes #2390 This parameter can be used to adjust the size of the listing chunks which can be used to workaround problems listing large buckets. --- backend/azureblob/azureblob.go | 31 ++++++++++++++++++++----------- docs/content/azureblob.md | 7 +++++++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/backend/azureblob/azureblob.go b/backend/azureblob/azureblob.go index a6701c901..cc17b40a9 100644 --- a/backend/azureblob/azureblob.go +++ b/backend/azureblob/azureblob.go @@ -37,7 +37,7 @@ const ( minSleep = 10 * time.Millisecond maxSleep = 10 * time.Second decayConstant = 1 // bigger for slower decay, exponential - listChunkSize = 5000 // number of items to read at once + maxListChunkSize = 5000 // number of items to read at once modTimeKey = "mtime" timeFormatIn = time.RFC3339 timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00" @@ -80,6 +80,11 @@ func init() { Help: "Upload chunk size. Must fit in memory.", Default: fs.SizeSuffix(defaultChunkSize), Advanced: true, + }, { + Name: "list_chunk", + Help: "Size of blob list.", + Default: maxListChunkSize, + Advanced: true, }, { Name: "access_tier", Help: "Access tier of blob, supports hot, cool and archive tiers.\nArchived blobs can be restored by setting access tier to hot or cool." + @@ -91,13 +96,14 @@ func init() { // Options defines the configuration for this backend type Options struct { - Account string `config:"account"` - Key string `config:"key"` - Endpoint string `config:"endpoint"` - SASURL string `config:"sas_url"` - UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` - ChunkSize fs.SizeSuffix `config:"chunk_size"` - AccessTier string `config:"access_tier"` + Account string `config:"account"` + Key string `config:"key"` + Endpoint string `config:"endpoint"` + SASURL string `config:"sas_url"` + UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` + ChunkSize fs.SizeSuffix `config:"chunk_size"` + ListChunkSize uint `config:"list_chunk"` + AccessTier string `config:"access_tier"` } // Fs represents a remote azure server @@ -211,6 +217,9 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { if opt.ChunkSize > maxChunkSize { return nil, errors.Errorf("azure: chunk size can't be greater than %v - was %v", maxChunkSize, opt.ChunkSize) } + if opt.ListChunkSize > maxListChunkSize { + return nil, errors.Errorf("azure: blob list size can't be greater than %v - was %v", maxListChunkSize, opt.ListChunkSize) + } container, directory, err := parsePath(root) if err != nil { return nil, err @@ -474,7 +483,7 @@ func (f *Fs) markContainerOK() { // listDir lists a single directory func (f *Fs) listDir(dir string) (entries fs.DirEntries, err error) { - err = f.list(dir, false, listChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error { + err = f.list(dir, false, f.opt.ListChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error { entry, err := f.itemToDirEntry(remote, object, isDirectory) if err != nil { return err @@ -545,7 +554,7 @@ func (f *Fs) ListR(dir string, callback fs.ListRCallback) (err error) { return fs.ErrorListBucketRequired } list := walk.NewListRHelper(callback) - err = f.list(dir, true, listChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error { + err = f.list(dir, true, f.opt.ListChunkSize, func(remote string, object *azblob.BlobItem, isDirectory bool) error { entry, err := f.itemToDirEntry(remote, object, isDirectory) if err != nil { return err @@ -566,7 +575,7 @@ type listContainerFn func(*azblob.ContainerItem) error // listContainersToFn lists the containers to the function supplied func (f *Fs) listContainersToFn(fn listContainerFn) error { params := azblob.ListContainersSegmentOptions{ - MaxResults: int32(listChunkSize), + MaxResults: int32(f.opt.ListChunkSize), } ctx := context.Background() for marker := (azblob.Marker{}); marker.NotDone(); { diff --git a/docs/content/azureblob.md b/docs/content/azureblob.md index 84b67b35e..837e25c5e 100644 --- a/docs/content/azureblob.md +++ b/docs/content/azureblob.md @@ -184,6 +184,13 @@ Upload chunk size. Default 4MB. Note that this is stored in memory and there may be up to `--transfers` chunks stored at once in memory. This can be at most 100MB. +#### --azureblob-list-chunk=SIZE #### + +List blob limit. Default is the maximum, 5000. `List blobs` requests +are permitted 2 minutes per megabyte to complete. If an operation is +taking longer than 2 minutes per megabyte on average, it will time out ( [source](https://docs.microsoft.com/en-us/rest/api/storageservices/setting-timeouts-for-blob-service-operations#exceptions-to-default-timeout-interval) ). This limit the number of blobs items to return, to avoid the time out. + + #### --azureblob-access-tier=Hot/Cool/Archive #### Azure storage supports blob tiering, you can configure tier in advanced