box: fix performance problem reading metadata for single files

Before this change the backend used to list the directory to find the
metadata for a single file. For lots of files in a directory this
caused a serious performance problem.

This change uses the preflight check to check for a files existence
and find its ID.

See: https://forum.rclone.org/t/psa-box-com-has-serious-performance-issues-in-directories-with-thousands-of-files/41128/10
See: https://forum.box.com/t/is-there-an-api-to-find-a-file-by-leaf-name-given-a-folder-id/997/
See: https://developer.box.com/guides/uploads/check/
This commit is contained in:
Nick Craig-Wood 2023-09-09 12:40:06 +01:00
parent 4ac5cb07ca
commit bb88b8499b
2 changed files with 31 additions and 33 deletions

View file

@ -167,19 +167,7 @@ type PreUploadCheckResponse struct {
// PreUploadCheckConflict is returned in the ContextInfo error field
// from PreUploadCheck when the error code is "item_name_in_use"
type PreUploadCheckConflict struct {
Conflicts struct {
Type string `json:"type"`
ID string `json:"id"`
FileVersion struct {
Type string `json:"type"`
ID string `json:"id"`
Sha1 string `json:"sha1"`
} `json:"file_version"`
SequenceID string `json:"sequence_id"`
Etag string `json:"etag"`
Sha1 string `json:"sha1"`
Name string `json:"name"`
} `json:"conflicts"`
Conflicts ItemMini `json:"conflicts"`
}
// UpdateFileModTime is used in Update File Info

View file

@ -380,7 +380,7 @@ func shouldRetry(ctx context.Context, resp *http.Response, err error) (bool, err
// readMetaDataForPath reads the metadata from the path
func (f *Fs) readMetaDataForPath(ctx context.Context, path string) (info *api.Item, err error) {
// defer fs.Trace(f, "path=%q", path)("info=%+v, err=%v", &info, &err)
// defer log.Trace(f, "path=%q", path)("info=%+v, err=%v", &info, &err)
leaf, directoryID, err := f.dirCache.FindPath(ctx, path, false)
if err != nil {
if err == fs.ErrorDirNotFound {
@ -389,20 +389,30 @@ func (f *Fs) readMetaDataForPath(ctx context.Context, path string) (info *api.It
return nil, err
}
found, err := f.listAll(ctx, directoryID, false, true, true, func(item *api.Item) bool {
if strings.EqualFold(item.Name, leaf) {
info = item
return true
}
return false
// Use preupload to find the ID
itemMini, err := f.preUploadCheck(ctx, leaf, directoryID, -1)
if err != nil {
return nil, err
}
if itemMini == nil {
return nil, fs.ErrorObjectNotFound
}
// Now we have the ID we can look up the object proper
opts := rest.Opts{
Method: "GET",
Path: "/files/" + itemMini.ID,
Parameters: fieldsValue(),
}
var item api.Item
err = f.pacer.Call(func() (bool, error) {
resp, err := f.srv.CallJSON(ctx, &opts, nil, &item)
return shouldRetry(ctx, resp, err)
})
if err != nil {
return nil, err
}
if !found {
return nil, fs.ErrorObjectNotFound
}
return info, nil
return &item, nil
}
// errorHandler parses a non 2xx error response into an error
@ -762,7 +772,7 @@ func (f *Fs) createObject(ctx context.Context, remote string, modTime time.Time,
//
// It returns "", nil if the file is good to go
// It returns "ID", nil if the file must be updated
func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size int64) (ID string, err error) {
func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size int64) (item *api.ItemMini, err error) {
check := api.PreUploadCheck{
Name: f.opt.Enc.FromStandardName(leaf),
Parent: api.Parent{
@ -787,16 +797,16 @@ func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size
var conflict api.PreUploadCheckConflict
err = json.Unmarshal(apiErr.ContextInfo, &conflict)
if err != nil {
return "", fmt.Errorf("pre-upload check: JSON decode failed: %w", err)
return nil, fmt.Errorf("pre-upload check: JSON decode failed: %w", err)
}
if conflict.Conflicts.Type != api.ItemTypeFile {
return "", fmt.Errorf("pre-upload check: can't overwrite non file with file: %w", err)
return nil, fs.ErrorIsDir
}
return conflict.Conflicts.ID, nil
return &conflict.Conflicts, nil
}
return "", fmt.Errorf("pre-upload check: %w", err)
return nil, fmt.Errorf("pre-upload check: %w", err)
}
return "", nil
return nil, nil
}
// Put the object
@ -817,11 +827,11 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
// Preflight check the upload, which returns the ID if the
// object already exists
ID, err := f.preUploadCheck(ctx, leaf, directoryID, src.Size())
item, err := f.preUploadCheck(ctx, leaf, directoryID, src.Size())
if err != nil {
return nil, err
}
if ID == "" {
if item == nil {
return f.PutUnchecked(ctx, in, src, options...)
}
@ -829,7 +839,7 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
o := &Object{
fs: f,
remote: remote,
id: ID,
id: item.ID,
}
return o, o.Update(ctx, in, src, options...)
}