onedrive: implement ListR method which gives --fast-list support
This implents ListR for onedrive. The API only allows doing this at the root so it is inefficient to use it not at the root. Fixes #7317
This commit is contained in:
parent
ecb09badba
commit
b8591b230d
4 changed files with 218 additions and 52 deletions
|
@ -99,6 +99,16 @@ type ItemReference struct {
|
|||
DriveType string `json:"driveType"` // Type of the drive, Read-Only
|
||||
}
|
||||
|
||||
// GetID returns a normalized ID of the item
|
||||
// If DriveID is known it will be prefixed to the ID with # separator
|
||||
// Can be parsed using onedrive.parseNormalizedID(normalizedID)
|
||||
func (i *ItemReference) GetID() string {
|
||||
if !strings.Contains(i.ID, "#") {
|
||||
return i.DriveID + "#" + i.ID
|
||||
}
|
||||
return i.ID
|
||||
}
|
||||
|
||||
// RemoteItemFacet groups data needed to reference a OneDrive remote item
|
||||
type RemoteItemFacet struct {
|
||||
ID string `json:"id"` // The unique identifier of the item within the remote Drive. Read-only.
|
||||
|
|
|
@ -1095,32 +1095,29 @@ func (f *Fs) CreateDir(ctx context.Context, dirID, leaf string) (newID string, e
|
|||
// If directories is set it only sends directories
|
||||
// User function to process a File item from listAll
|
||||
//
|
||||
// Should return true to finish processing
|
||||
type listAllFn func(*api.Item) bool
|
||||
// If an error is returned then processing stops
|
||||
type listAllFn func(*api.Item) error
|
||||
|
||||
// Lists the directory required calling the user function on each item found
|
||||
//
|
||||
// If the user fn ever returns true then it early exits with found = true
|
||||
func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (found bool, err error) {
|
||||
// Top parameter asks for bigger pages of data
|
||||
// https://dev.onedrive.com/odata/optional-query-parameters.htm
|
||||
opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk))
|
||||
OUTER:
|
||||
//
|
||||
// This listing function works on both normal listings and delta listings
|
||||
func (f *Fs) _listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn, opts *rest.Opts, result any, pValue *[]api.Item, pNextLink *string) (err error) {
|
||||
for {
|
||||
var result api.ListChildrenResponse
|
||||
var resp *http.Response
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
|
||||
resp, err = f.srv.CallJSON(ctx, opts, nil, result)
|
||||
return shouldRetry(ctx, resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return found, fmt.Errorf("couldn't list files: %w", err)
|
||||
return fmt.Errorf("couldn't list files: %w", err)
|
||||
}
|
||||
if len(result.Value) == 0 {
|
||||
if len(*pValue) == 0 {
|
||||
break
|
||||
}
|
||||
for i := range result.Value {
|
||||
item := &result.Value[i]
|
||||
for i := range *pValue {
|
||||
item := &(*pValue)[i]
|
||||
isFolder := item.GetFolder() != nil
|
||||
if isFolder {
|
||||
if filesOnly {
|
||||
|
@ -1135,18 +1132,60 @@ OUTER:
|
|||
continue
|
||||
}
|
||||
item.Name = f.opt.Enc.ToStandardName(item.GetName())
|
||||
if fn(item) {
|
||||
found = true
|
||||
break OUTER
|
||||
err = fn(item)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if result.NextLink == "" {
|
||||
if *pNextLink == "" {
|
||||
break
|
||||
}
|
||||
opts.Path = ""
|
||||
opts.RootURL = result.NextLink
|
||||
opts.Parameters = nil
|
||||
opts.RootURL = *pNextLink
|
||||
// reset results
|
||||
*pNextLink = ""
|
||||
*pValue = nil
|
||||
}
|
||||
return
|
||||
return nil
|
||||
}
|
||||
|
||||
// Lists the directory required calling the user function on each item found
|
||||
//
|
||||
// If the user fn ever returns true then it early exits with found = true
|
||||
func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (err error) {
|
||||
// Top parameter asks for bigger pages of data
|
||||
// https://dev.onedrive.com/odata/optional-query-parameters.htm
|
||||
opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk))
|
||||
var result api.ListChildrenResponse
|
||||
return f._listAll(ctx, dirID, directoriesOnly, filesOnly, fn, &opts, &result, &result.Value, &result.NextLink)
|
||||
}
|
||||
|
||||
// Convert a list item into a DirEntry
|
||||
//
|
||||
// Can return nil for an item which should be skipped
|
||||
func (f *Fs) itemToDirEntry(ctx context.Context, dir string, info *api.Item) (entry fs.DirEntry, err error) {
|
||||
if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote {
|
||||
fs.Debugf(info.Name, "OneNote file not shown in directory listing")
|
||||
return nil, nil
|
||||
}
|
||||
remote := path.Join(dir, info.GetName())
|
||||
folder := info.GetFolder()
|
||||
if folder != nil {
|
||||
// cache the directory ID for later lookups
|
||||
id := info.GetID()
|
||||
f.dirCache.Put(remote, id)
|
||||
d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id)
|
||||
d.SetItems(folder.ChildCount)
|
||||
entry = d
|
||||
} else {
|
||||
o, err := f.newObjectWithInfo(ctx, remote, info)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entry = o
|
||||
}
|
||||
return entry, nil
|
||||
}
|
||||
|
||||
// List the objects and directories in dir into entries. The
|
||||
|
@ -1163,41 +1202,137 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var iErr error
|
||||
_, err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) bool {
|
||||
if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote {
|
||||
fs.Debugf(info.Name, "OneNote file not shown in directory listing")
|
||||
return false
|
||||
err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) error {
|
||||
entry, err := f.itemToDirEntry(ctx, dir, info)
|
||||
if err == nil {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
remote := path.Join(dir, info.GetName())
|
||||
folder := info.GetFolder()
|
||||
if folder != nil {
|
||||
// cache the directory ID for later lookups
|
||||
id := info.GetID()
|
||||
f.dirCache.Put(remote, id)
|
||||
d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id)
|
||||
d.SetItems(folder.ChildCount)
|
||||
entries = append(entries, d)
|
||||
} else {
|
||||
o, err := f.newObjectWithInfo(ctx, remote, info)
|
||||
if err != nil {
|
||||
iErr = err
|
||||
return true
|
||||
}
|
||||
entries = append(entries, o)
|
||||
}
|
||||
return false
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if iErr != nil {
|
||||
return nil, iErr
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
// ListR lists the objects and directories of the Fs starting
|
||||
// from dir recursively into out.
|
||||
//
|
||||
// dir should be "" to start from the root, and should not
|
||||
// have trailing slashes.
|
||||
//
|
||||
// This should return ErrDirNotFound if the directory isn't
|
||||
// found.
|
||||
//
|
||||
// It should call callback for each tranche of entries read.
|
||||
// These need not be returned in any particular order. If
|
||||
// callback returns an error then the listing will stop
|
||||
// immediately.
|
||||
//
|
||||
// Don't implement this unless you have a more efficient way
|
||||
// of listing recursively than doing a directory traversal.
|
||||
func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
|
||||
// Make sure this ID is in the directory cache
|
||||
directoryID, err := f.dirCache.FindDir(ctx, dir, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ListR only works at the root of a onedrive, not on a folder
|
||||
// So we have to filter things outside of the root which is
|
||||
// inefficient.
|
||||
|
||||
list := walk.NewListRHelper(callback)
|
||||
|
||||
// list a folder conventionally - used for shared folders
|
||||
var listFolder func(dir string) error
|
||||
listFolder = func(dir string) error {
|
||||
entries, err := f.List(ctx, dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, entry := range entries {
|
||||
err = list.Add(entry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, isDir := entry.(fs.Directory); isDir {
|
||||
err = listFolder(entry.Remote())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// This code relies on the fact that directories are sent before their children. This isn't
|
||||
// mentioned in the docs though, so maybe it shouldn't be relied on.
|
||||
seen := map[string]struct{}{}
|
||||
fn := func(info *api.Item) error {
|
||||
var parentPath string
|
||||
var ok bool
|
||||
id := info.GetID()
|
||||
// The API can produce duplicates, so skip them
|
||||
if _, found := seen[id]; found {
|
||||
return nil
|
||||
}
|
||||
seen[id] = struct{}{}
|
||||
// Skip the root directory
|
||||
if id == directoryID {
|
||||
return nil
|
||||
}
|
||||
// Skip deleted items
|
||||
if info.Deleted != nil {
|
||||
return nil
|
||||
}
|
||||
dirID := info.GetParentReference().GetID()
|
||||
// Skip files that don't have their parent directory
|
||||
// cached as they are outside the root.
|
||||
parentPath, ok = f.dirCache.GetInv(dirID)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
// Skip files not under the root directory
|
||||
remote := path.Join(parentPath, info.GetName())
|
||||
if dir != "" && !strings.HasPrefix(remote, dir+"/") {
|
||||
return nil
|
||||
}
|
||||
entry, err := f.itemToDirEntry(ctx, parentPath, info)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = list.Add(entry)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If this is a shared folder, we'll need list it too
|
||||
if info.RemoteItem != nil && info.RemoteItem.Folder != nil {
|
||||
fs.Debugf(remote, "Listing shared directory")
|
||||
return listFolder(remote)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
opts := rest.Opts{
|
||||
Method: "GET",
|
||||
Path: "/root/delta",
|
||||
Parameters: map[string][]string{
|
||||
// "token": {token},
|
||||
"$top": {fmt.Sprintf("%d", f.opt.ListChunk)},
|
||||
},
|
||||
}
|
||||
|
||||
var result api.DeltaResponse
|
||||
err = f._listAll(ctx, "", false, false, fn, &opts, &result, &result.Value, &result.NextLink)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return list.Flush()
|
||||
|
||||
}
|
||||
|
||||
// Creates from the parameters passed in a half finished Object which
|
||||
// must have setMetaData called on it
|
||||
//
|
||||
|
@ -1266,15 +1401,12 @@ func (f *Fs) purgeCheck(ctx context.Context, dir string, check bool) error {
|
|||
}
|
||||
if check {
|
||||
// check to see if there are any items
|
||||
found, err := f.listAll(ctx, rootID, false, false, func(item *api.Item) bool {
|
||||
return true
|
||||
err := f.listAll(ctx, rootID, false, false, func(item *api.Item) error {
|
||||
return fs.ErrorDirectoryNotEmpty
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if found {
|
||||
return fs.ErrorDirectoryNotEmpty
|
||||
}
|
||||
}
|
||||
err = f.deleteObject(ctx, rootID)
|
||||
if err != nil {
|
||||
|
@ -2578,6 +2710,7 @@ var (
|
|||
_ fs.Abouter = (*Fs)(nil)
|
||||
_ fs.PublicLinker = (*Fs)(nil)
|
||||
_ fs.CleanUpper = (*Fs)(nil)
|
||||
_ fs.ListRer = (*Fs)(nil)
|
||||
_ fs.Object = (*Object)(nil)
|
||||
_ fs.MimeTyper = &Object{}
|
||||
_ fs.IDer = &Object{}
|
||||
|
|
|
@ -183,6 +183,29 @@ your workflow.
|
|||
|
||||
For all types of OneDrive you can use the `--checksum` flag.
|
||||
|
||||
### --fast-list
|
||||
|
||||
This remote supports `--fast-list` which allows you to use fewer
|
||||
transactions in exchange for more memory. See the [rclone
|
||||
docs](/docs/#fast-list) for more details.
|
||||
|
||||
It does this by using the delta listing facilities of OneDrive which
|
||||
returns all the files in the remote very efficiently. This is much
|
||||
more efficient than listing directories recursively and is Microsoft's
|
||||
recommended way of reading all the file information from a drive.
|
||||
|
||||
This can be useful with `rclone mount` and [rclone rc vfs/refresh
|
||||
recursive=true](/rc/#vfs-refresh)) to very quickly fill the mount with
|
||||
information about all the files.
|
||||
|
||||
The API used for the recursive listing (`ListR`) only supports listing
|
||||
from the root of the drive. This will become increasingly inefficient
|
||||
the further away you get from the root as rclone will have to discard
|
||||
files outside of the directory you are using.
|
||||
|
||||
Some commands (like `rclone lsf -R`) will use `ListR` by default - you
|
||||
can turn this off with `--disable ListR` if you need to.
|
||||
|
||||
### Restricted filename characters
|
||||
|
||||
In addition to the [default restricted characters set](/overview/#restricted-characters)
|
||||
|
|
|
@ -492,7 +492,7 @@ upon backend-specific capabilities.
|
|||
| Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes |
|
||||
| Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No |
|
||||
| Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No |
|
||||
| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes |
|
||||
| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes |
|
||||
| OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes |
|
||||
| OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | No | Yes | No |
|
||||
| Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | No |
|
||||
|
|
Loading…
Reference in a new issue