onedrive: implement ListR method which gives --fast-list support

This implents ListR for onedrive. The API only allows doing this at
the root so it is inefficient to use it not at the root.

Fixes #7317
This commit is contained in:
Nick Craig-Wood 2023-09-19 12:09:51 +01:00
parent ecb09badba
commit b8591b230d
4 changed files with 218 additions and 52 deletions

View file

@ -99,6 +99,16 @@ type ItemReference struct {
DriveType string `json:"driveType"` // Type of the drive, Read-Only DriveType string `json:"driveType"` // Type of the drive, Read-Only
} }
// GetID returns a normalized ID of the item
// If DriveID is known it will be prefixed to the ID with # separator
// Can be parsed using onedrive.parseNormalizedID(normalizedID)
func (i *ItemReference) GetID() string {
if !strings.Contains(i.ID, "#") {
return i.DriveID + "#" + i.ID
}
return i.ID
}
// RemoteItemFacet groups data needed to reference a OneDrive remote item // RemoteItemFacet groups data needed to reference a OneDrive remote item
type RemoteItemFacet struct { type RemoteItemFacet struct {
ID string `json:"id"` // The unique identifier of the item within the remote Drive. Read-only. ID string `json:"id"` // The unique identifier of the item within the remote Drive. Read-only.

View file

@ -1095,32 +1095,29 @@ func (f *Fs) CreateDir(ctx context.Context, dirID, leaf string) (newID string, e
// If directories is set it only sends directories // If directories is set it only sends directories
// User function to process a File item from listAll // User function to process a File item from listAll
// //
// Should return true to finish processing // If an error is returned then processing stops
type listAllFn func(*api.Item) bool type listAllFn func(*api.Item) error
// Lists the directory required calling the user function on each item found // Lists the directory required calling the user function on each item found
// //
// If the user fn ever returns true then it early exits with found = true // If the user fn ever returns true then it early exits with found = true
func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (found bool, err error) { //
// Top parameter asks for bigger pages of data // This listing function works on both normal listings and delta listings
// https://dev.onedrive.com/odata/optional-query-parameters.htm func (f *Fs) _listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn, opts *rest.Opts, result any, pValue *[]api.Item, pNextLink *string) (err error) {
opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk))
OUTER:
for { for {
var result api.ListChildrenResponse
var resp *http.Response var resp *http.Response
err = f.pacer.Call(func() (bool, error) { err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) resp, err = f.srv.CallJSON(ctx, opts, nil, result)
return shouldRetry(ctx, resp, err) return shouldRetry(ctx, resp, err)
}) })
if err != nil { if err != nil {
return found, fmt.Errorf("couldn't list files: %w", err) return fmt.Errorf("couldn't list files: %w", err)
} }
if len(result.Value) == 0 { if len(*pValue) == 0 {
break break
} }
for i := range result.Value { for i := range *pValue {
item := &result.Value[i] item := &(*pValue)[i]
isFolder := item.GetFolder() != nil isFolder := item.GetFolder() != nil
if isFolder { if isFolder {
if filesOnly { if filesOnly {
@ -1135,18 +1132,60 @@ OUTER:
continue continue
} }
item.Name = f.opt.Enc.ToStandardName(item.GetName()) item.Name = f.opt.Enc.ToStandardName(item.GetName())
if fn(item) { err = fn(item)
found = true if err != nil {
break OUTER return err
} }
} }
if result.NextLink == "" { if *pNextLink == "" {
break break
} }
opts.Path = "" opts.Path = ""
opts.RootURL = result.NextLink opts.Parameters = nil
opts.RootURL = *pNextLink
// reset results
*pNextLink = ""
*pValue = nil
} }
return return nil
}
// Lists the directory required calling the user function on each item found
//
// If the user fn ever returns true then it early exits with found = true
func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (err error) {
// Top parameter asks for bigger pages of data
// https://dev.onedrive.com/odata/optional-query-parameters.htm
opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk))
var result api.ListChildrenResponse
return f._listAll(ctx, dirID, directoriesOnly, filesOnly, fn, &opts, &result, &result.Value, &result.NextLink)
}
// Convert a list item into a DirEntry
//
// Can return nil for an item which should be skipped
func (f *Fs) itemToDirEntry(ctx context.Context, dir string, info *api.Item) (entry fs.DirEntry, err error) {
if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote {
fs.Debugf(info.Name, "OneNote file not shown in directory listing")
return nil, nil
}
remote := path.Join(dir, info.GetName())
folder := info.GetFolder()
if folder != nil {
// cache the directory ID for later lookups
id := info.GetID()
f.dirCache.Put(remote, id)
d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id)
d.SetItems(folder.ChildCount)
entry = d
} else {
o, err := f.newObjectWithInfo(ctx, remote, info)
if err != nil {
return nil, err
}
entry = o
}
return entry, nil
} }
// List the objects and directories in dir into entries. The // List the objects and directories in dir into entries. The
@ -1163,41 +1202,137 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
if err != nil { if err != nil {
return nil, err return nil, err
} }
var iErr error err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) error {
_, err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) bool { entry, err := f.itemToDirEntry(ctx, dir, info)
if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote { if err == nil {
fs.Debugf(info.Name, "OneNote file not shown in directory listing") entries = append(entries, entry)
return false
} }
return err
remote := path.Join(dir, info.GetName())
folder := info.GetFolder()
if folder != nil {
// cache the directory ID for later lookups
id := info.GetID()
f.dirCache.Put(remote, id)
d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id)
d.SetItems(folder.ChildCount)
entries = append(entries, d)
} else {
o, err := f.newObjectWithInfo(ctx, remote, info)
if err != nil {
iErr = err
return true
}
entries = append(entries, o)
}
return false
}) })
if err != nil { if err != nil {
return nil, err return nil, err
} }
if iErr != nil {
return nil, iErr
}
return entries, nil return entries, nil
} }
// ListR lists the objects and directories of the Fs starting
// from dir recursively into out.
//
// dir should be "" to start from the root, and should not
// have trailing slashes.
//
// This should return ErrDirNotFound if the directory isn't
// found.
//
// It should call callback for each tranche of entries read.
// These need not be returned in any particular order. If
// callback returns an error then the listing will stop
// immediately.
//
// Don't implement this unless you have a more efficient way
// of listing recursively than doing a directory traversal.
func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
// Make sure this ID is in the directory cache
directoryID, err := f.dirCache.FindDir(ctx, dir, false)
if err != nil {
return err
}
// ListR only works at the root of a onedrive, not on a folder
// So we have to filter things outside of the root which is
// inefficient.
list := walk.NewListRHelper(callback)
// list a folder conventionally - used for shared folders
var listFolder func(dir string) error
listFolder = func(dir string) error {
entries, err := f.List(ctx, dir)
if err != nil {
return err
}
for _, entry := range entries {
err = list.Add(entry)
if err != nil {
return err
}
if _, isDir := entry.(fs.Directory); isDir {
err = listFolder(entry.Remote())
if err != nil {
return err
}
}
}
return nil
}
// This code relies on the fact that directories are sent before their children. This isn't
// mentioned in the docs though, so maybe it shouldn't be relied on.
seen := map[string]struct{}{}
fn := func(info *api.Item) error {
var parentPath string
var ok bool
id := info.GetID()
// The API can produce duplicates, so skip them
if _, found := seen[id]; found {
return nil
}
seen[id] = struct{}{}
// Skip the root directory
if id == directoryID {
return nil
}
// Skip deleted items
if info.Deleted != nil {
return nil
}
dirID := info.GetParentReference().GetID()
// Skip files that don't have their parent directory
// cached as they are outside the root.
parentPath, ok = f.dirCache.GetInv(dirID)
if !ok {
return nil
}
// Skip files not under the root directory
remote := path.Join(parentPath, info.GetName())
if dir != "" && !strings.HasPrefix(remote, dir+"/") {
return nil
}
entry, err := f.itemToDirEntry(ctx, parentPath, info)
if err != nil {
return err
}
err = list.Add(entry)
if err != nil {
return err
}
// If this is a shared folder, we'll need list it too
if info.RemoteItem != nil && info.RemoteItem.Folder != nil {
fs.Debugf(remote, "Listing shared directory")
return listFolder(remote)
}
return nil
}
opts := rest.Opts{
Method: "GET",
Path: "/root/delta",
Parameters: map[string][]string{
// "token": {token},
"$top": {fmt.Sprintf("%d", f.opt.ListChunk)},
},
}
var result api.DeltaResponse
err = f._listAll(ctx, "", false, false, fn, &opts, &result, &result.Value, &result.NextLink)
if err != nil {
return err
}
return list.Flush()
}
// Creates from the parameters passed in a half finished Object which // Creates from the parameters passed in a half finished Object which
// must have setMetaData called on it // must have setMetaData called on it
// //
@ -1266,15 +1401,12 @@ func (f *Fs) purgeCheck(ctx context.Context, dir string, check bool) error {
} }
if check { if check {
// check to see if there are any items // check to see if there are any items
found, err := f.listAll(ctx, rootID, false, false, func(item *api.Item) bool { err := f.listAll(ctx, rootID, false, false, func(item *api.Item) error {
return true return fs.ErrorDirectoryNotEmpty
}) })
if err != nil { if err != nil {
return err return err
} }
if found {
return fs.ErrorDirectoryNotEmpty
}
} }
err = f.deleteObject(ctx, rootID) err = f.deleteObject(ctx, rootID)
if err != nil { if err != nil {
@ -2578,6 +2710,7 @@ var (
_ fs.Abouter = (*Fs)(nil) _ fs.Abouter = (*Fs)(nil)
_ fs.PublicLinker = (*Fs)(nil) _ fs.PublicLinker = (*Fs)(nil)
_ fs.CleanUpper = (*Fs)(nil) _ fs.CleanUpper = (*Fs)(nil)
_ fs.ListRer = (*Fs)(nil)
_ fs.Object = (*Object)(nil) _ fs.Object = (*Object)(nil)
_ fs.MimeTyper = &Object{} _ fs.MimeTyper = &Object{}
_ fs.IDer = &Object{} _ fs.IDer = &Object{}

View file

@ -183,6 +183,29 @@ your workflow.
For all types of OneDrive you can use the `--checksum` flag. For all types of OneDrive you can use the `--checksum` flag.
### --fast-list
This remote supports `--fast-list` which allows you to use fewer
transactions in exchange for more memory. See the [rclone
docs](/docs/#fast-list) for more details.
It does this by using the delta listing facilities of OneDrive which
returns all the files in the remote very efficiently. This is much
more efficient than listing directories recursively and is Microsoft's
recommended way of reading all the file information from a drive.
This can be useful with `rclone mount` and [rclone rc vfs/refresh
recursive=true](/rc/#vfs-refresh)) to very quickly fill the mount with
information about all the files.
The API used for the recursive listing (`ListR`) only supports listing
from the root of the drive. This will become increasingly inefficient
the further away you get from the root as rclone will have to discard
files outside of the directory you are using.
Some commands (like `rclone lsf -R`) will use `ListR` by default - you
can turn this off with `--disable ListR` if you need to.
### Restricted filename characters ### Restricted filename characters
In addition to the [default restricted characters set](/overview/#restricted-characters) In addition to the [default restricted characters set](/overview/#restricted-characters)

View file

@ -492,7 +492,7 @@ upon backend-specific capabilities.
| Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | | Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes |
| Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No | | Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No |
| Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No | | Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No |
| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | | Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes |
| OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes | | OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes |
| OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | No | Yes | No | | OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | No | Yes | No |
| Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | No | | Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | No |