box: use upload preflight check to avoid listings in file uploads

Before this change, rclone checked to see if an object existed before
doing an upload by listing the destination directory. This was very
inefficient, especially with large directories.

After this change rclone uses the pre upload check API call which
checks to see if it is OK to upload an object, and also returns the ID
of an existing object which saves rclone having to do a directory
listing.
This commit is contained in:
Nick Craig-Wood 2021-04-24 20:11:15 +01:00
parent ffec0d4f03
commit d27c35ee4a
2 changed files with 105 additions and 15 deletions

View file

@ -39,7 +39,7 @@ type Error struct {
Type string `json:"type"`
Status int `json:"status"`
Code string `json:"code"`
ContextInfo json.RawMessage
ContextInfo json.RawMessage `json:"context_info"`
HelpURL string `json:"help_url"`
Message string `json:"message"`
RequestID string `json:"request_id"`
@ -132,6 +132,38 @@ type UploadFile struct {
ContentModifiedAt Time `json:"content_modified_at"`
}
// PreUploadCheck is the request for upload preflight check
type PreUploadCheck struct {
Name string `json:"name"`
Parent Parent `json:"parent"`
Size *int64 `json:"size,omitempty"`
}
// PreUploadCheckResponse is the response from upload preflight check
// if successful
type PreUploadCheckResponse struct {
UploadToken string `json:"upload_token"`
UploadURL string `json:"upload_url"`
}
// PreUploadCheckConflict is returned in the ContextInfo error field
// from PreUploadCheck when the error code is "item_name_in_use"
type PreUploadCheckConflict struct {
Conflicts struct {
Type string `json:"type"`
ID string `json:"id"`
FileVersion struct {
Type string `json:"type"`
ID string `json:"id"`
Sha1 string `json:"sha1"`
} `json:"file_version"`
SequenceID string `json:"sequence_id"`
Etag string `json:"etag"`
Sha1 string `json:"sha1"`
Name string `json:"name"`
} `json:"conflicts"`
}
// UpdateFileModTime is used in Update File Info
type UpdateFileModTime struct {
ContentModifiedAt Time `json:"content_modified_at"`

View file

@ -686,22 +686,80 @@ func (f *Fs) createObject(ctx context.Context, remote string, modTime time.Time,
return o, leaf, directoryID, nil
}
// preUploadCheck checks to see if a file can be uploaded
//
// It returns "", nil if the file is good to go
// It returns "ID", nil if the file must be updated
func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size int64) (ID string, err error) {
check := api.PreUploadCheck{
Name: f.opt.Enc.FromStandardName(leaf),
Parent: api.Parent{
ID: directoryID,
},
}
if size >= 0 {
check.Size = &size
}
opts := rest.Opts{
Method: "OPTIONS",
Path: "/files/content/",
}
var result api.PreUploadCheckResponse
var resp *http.Response
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.CallJSON(ctx, &opts, &check, &result)
return shouldRetry(ctx, resp, err)
})
if err != nil {
if apiErr, ok := err.(*api.Error); ok && apiErr.Code == "item_name_in_use" {
var conflict api.PreUploadCheckConflict
err = json.Unmarshal(apiErr.ContextInfo, &conflict)
if err != nil {
return "", errors.Wrap(err, "pre-upload check: JSON decode failed")
}
if conflict.Conflicts.Type != api.ItemTypeFile {
return "", errors.Wrap(err, "pre-upload check: can't overwrite non file with file")
}
return conflict.Conflicts.ID, nil
}
return "", errors.Wrap(err, "pre-upload check")
}
return "", nil
}
// Put the object
//
// Copy the reader in to the new object which is returned
//
// The new object may have been created if an error is returned
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
existingObj, err := f.newObjectWithInfo(ctx, src.Remote(), nil)
switch err {
case nil:
return existingObj, existingObj.Update(ctx, in, src, options...)
case fs.ErrorObjectNotFound:
// Not found so create it
return f.PutUnchecked(ctx, in, src)
default:
// If directory doesn't exist, file doesn't exist so can upload
remote := src.Remote()
leaf, directoryID, err := f.dirCache.FindPath(ctx, remote, false)
if err != nil {
if err == fs.ErrorDirNotFound {
return f.PutUnchecked(ctx, in, src, options...)
}
return nil, err
}
// Preflight check the upload, which returns the ID if the
// object already exists
ID, err := f.preUploadCheck(ctx, leaf, directoryID, src.Size())
if err != nil {
return nil, err
}
if ID == "" {
return f.PutUnchecked(ctx, in, src, options...)
}
// If object exists then create a skeleton one with just id
o := &Object{
fs: f,
remote: remote,
id: ID,
}
return o, o.Update(ctx, in, src, options...)
}
// PutStream uploads to the remote path with the modTime given of indeterminate size