forked from TrueCloudLab/rclone
b2: Make NewObject use less expensive API calls
Before this change when NewObject was called the b2 backend would list the directory that the object was in in order to find it. Unfortunately list calls are Class C transactions and cost more. This patch switches to using HEAD requests instead. These are Class B transactions. It is then necessary to parse the headers from response back into the data that we get from the listing. However B2 returns exactly the same data, just in a different form. Rclone will use the old directory listing method when looking for files with versions as these can't be found via a HEAD request. This change will particularly benefit --files-from, rclone serve restic but most operations will see some benefit.
This commit is contained in:
parent
7ae84a3c91
commit
cb16f42075
1 changed files with 75 additions and 28 deletions
103
backend/b2/b2.go
103
backend/b2/b2.go
|
@ -44,8 +44,10 @@ const (
|
|||
timeHeader = headerPrefix + timeKey
|
||||
sha1Key = "large_file_sha1"
|
||||
sha1Header = "X-Bz-Content-Sha1"
|
||||
sha1InfoHeader = headerPrefix + sha1Key
|
||||
testModeHeader = "X-Bz-Test-Mode"
|
||||
idHeader = "X-Bz-File-Id"
|
||||
nameHeader = "X-Bz-File-Name"
|
||||
timestampHeader = "X-Bz-Upload-Timestamp"
|
||||
retryAfterHeader = "Retry-After"
|
||||
minSleep = 10 * time.Millisecond
|
||||
maxSleep = 5 * time.Minute
|
||||
|
@ -1496,8 +1498,11 @@ func (o *Object) decodeMetaDataFileInfo(info *api.FileInfo) (err error) {
|
|||
return o.decodeMetaDataRaw(info.ID, info.SHA1, info.Size, info.UploadTimestamp, info.Info, info.ContentType)
|
||||
}
|
||||
|
||||
// getMetaData gets the metadata from the object unconditionally
|
||||
func (o *Object) getMetaData(ctx context.Context) (info *api.File, err error) {
|
||||
// getMetaDataListing gets the metadata from the object unconditionally from the listing
|
||||
//
|
||||
// Note that listing is a class C transaction which costs more than
|
||||
// the B transaction used in getMetaData
|
||||
func (o *Object) getMetaDataListing(ctx context.Context) (info *api.File, err error) {
|
||||
bucket, bucketPath := o.split()
|
||||
maxSearched := 1
|
||||
var timestamp api.Timestamp
|
||||
|
@ -1530,6 +1535,19 @@ func (o *Object) getMetaData(ctx context.Context) (info *api.File, err error) {
|
|||
return info, nil
|
||||
}
|
||||
|
||||
// getMetaData gets the metadata from the object unconditionally
|
||||
func (o *Object) getMetaData(ctx context.Context) (info *api.File, err error) {
|
||||
// If using versions and have a version suffix, need to list the directory to find the correct versions
|
||||
if o.fs.opt.Versions {
|
||||
timestamp, _ := api.RemoveVersion(o.remote)
|
||||
if !timestamp.IsZero() {
|
||||
return o.getMetaDataListing(ctx)
|
||||
}
|
||||
}
|
||||
_, info, err = o.getOrHead(ctx, "HEAD", nil)
|
||||
return info, err
|
||||
}
|
||||
|
||||
// readMetaData gets the metadata if it hasn't already been fetched
|
||||
//
|
||||
// Sets
|
||||
|
@ -1659,12 +1677,11 @@ func (file *openFile) Close() (err error) {
|
|||
// Check it satisfies the interfaces
|
||||
var _ io.ReadCloser = &openFile{}
|
||||
|
||||
// Open an object for read
|
||||
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
||||
fs.FixRangeOption(options, o.size)
|
||||
func (o *Object) getOrHead(ctx context.Context, method string, options []fs.OpenOption) (resp *http.Response, info *api.File, err error) {
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
Options: options,
|
||||
Method: method,
|
||||
Options: options,
|
||||
NoResponse: method == "HEAD",
|
||||
}
|
||||
|
||||
// Use downloadUrl from backblaze if downloadUrl is not set
|
||||
|
@ -1682,37 +1699,67 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
|
|||
bucket, bucketPath := o.split()
|
||||
opts.Path += "/file/" + urlEncode(o.fs.opt.Enc.FromStandardName(bucket)) + "/" + urlEncode(o.fs.opt.Enc.FromStandardPath(bucketPath))
|
||||
}
|
||||
var resp *http.Response
|
||||
err = o.fs.pacer.Call(func() (bool, error) {
|
||||
resp, err = o.fs.srv.Call(ctx, &opts)
|
||||
return o.fs.shouldRetry(ctx, resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to open for download")
|
||||
// 404 for files, 400 for directories
|
||||
if resp != nil && (resp.StatusCode == http.StatusNotFound || resp.StatusCode == http.StatusBadRequest) {
|
||||
return nil, nil, fs.ErrorObjectNotFound
|
||||
}
|
||||
return nil, nil, errors.Wrapf(err, "failed to %s for download", method)
|
||||
}
|
||||
|
||||
// Parse the time out of the headers if possible
|
||||
err = o.parseTimeString(resp.Header.Get(timeHeader))
|
||||
// NB resp may be Open here - don't return err != nil without closing
|
||||
|
||||
// Convert the Headers into an api.File
|
||||
var uploadTimestamp api.Timestamp
|
||||
err = uploadTimestamp.UnmarshalJSON([]byte(resp.Header.Get(timestampHeader)))
|
||||
if err != nil {
|
||||
fs.Debugf(o, "Bad "+timestampHeader+" header: %v", err)
|
||||
}
|
||||
var Info = make(map[string]string)
|
||||
for k, vs := range resp.Header {
|
||||
k = strings.ToLower(k)
|
||||
for _, v := range vs {
|
||||
if strings.HasPrefix(k, headerPrefix) {
|
||||
Info[k[len(headerPrefix):]] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
info = &api.File{
|
||||
ID: resp.Header.Get(idHeader),
|
||||
Name: resp.Header.Get(nameHeader),
|
||||
Action: "upload",
|
||||
Size: resp.ContentLength,
|
||||
UploadTimestamp: uploadTimestamp,
|
||||
SHA1: resp.Header.Get(sha1Header),
|
||||
ContentType: resp.Header.Get("Content-Type"),
|
||||
Info: Info,
|
||||
}
|
||||
return resp, info, nil
|
||||
}
|
||||
|
||||
// Open an object for read
|
||||
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
||||
fs.FixRangeOption(options, o.size)
|
||||
|
||||
resp, info, err := o.getOrHead(ctx, "GET", options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Don't check length or hash or metadata on partial content
|
||||
if resp.StatusCode == http.StatusPartialContent {
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
err = o.decodeMetaData(info)
|
||||
if err != nil {
|
||||
_ = resp.Body.Close()
|
||||
return nil, err
|
||||
}
|
||||
// Read sha1 from header if it isn't set
|
||||
if o.sha1 == "" {
|
||||
o.sha1 = resp.Header.Get(sha1Header)
|
||||
fs.Debugf(o, "Reading sha1 from header - %q", o.sha1)
|
||||
// if sha1 header is "none" (in big files), then need
|
||||
// to read it from the metadata
|
||||
if o.sha1 == "none" {
|
||||
o.sha1 = resp.Header.Get(sha1InfoHeader)
|
||||
fs.Debugf(o, "Reading sha1 from info - %q", o.sha1)
|
||||
}
|
||||
o.sha1 = cleanSHA1(o.sha1)
|
||||
}
|
||||
// Don't check length or hash on partial content
|
||||
if resp.StatusCode == http.StatusPartialContent {
|
||||
return resp.Body, nil
|
||||
}
|
||||
return newOpenFile(o, resp), nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue