onedrive: implement ListR method which gives --fast-list support
This implents ListR for onedrive. The API only allows doing this at the root so it is inefficient to use it not at the root. Fixes #7317
This commit is contained in:
parent
ecb09badba
commit
b8591b230d
4 changed files with 218 additions and 52 deletions
|
@ -99,6 +99,16 @@ type ItemReference struct {
|
||||||
DriveType string `json:"driveType"` // Type of the drive, Read-Only
|
DriveType string `json:"driveType"` // Type of the drive, Read-Only
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetID returns a normalized ID of the item
|
||||||
|
// If DriveID is known it will be prefixed to the ID with # separator
|
||||||
|
// Can be parsed using onedrive.parseNormalizedID(normalizedID)
|
||||||
|
func (i *ItemReference) GetID() string {
|
||||||
|
if !strings.Contains(i.ID, "#") {
|
||||||
|
return i.DriveID + "#" + i.ID
|
||||||
|
}
|
||||||
|
return i.ID
|
||||||
|
}
|
||||||
|
|
||||||
// RemoteItemFacet groups data needed to reference a OneDrive remote item
|
// RemoteItemFacet groups data needed to reference a OneDrive remote item
|
||||||
type RemoteItemFacet struct {
|
type RemoteItemFacet struct {
|
||||||
ID string `json:"id"` // The unique identifier of the item within the remote Drive. Read-only.
|
ID string `json:"id"` // The unique identifier of the item within the remote Drive. Read-only.
|
||||||
|
|
|
@ -1095,32 +1095,29 @@ func (f *Fs) CreateDir(ctx context.Context, dirID, leaf string) (newID string, e
|
||||||
// If directories is set it only sends directories
|
// If directories is set it only sends directories
|
||||||
// User function to process a File item from listAll
|
// User function to process a File item from listAll
|
||||||
//
|
//
|
||||||
// Should return true to finish processing
|
// If an error is returned then processing stops
|
||||||
type listAllFn func(*api.Item) bool
|
type listAllFn func(*api.Item) error
|
||||||
|
|
||||||
// Lists the directory required calling the user function on each item found
|
// Lists the directory required calling the user function on each item found
|
||||||
//
|
//
|
||||||
// If the user fn ever returns true then it early exits with found = true
|
// If the user fn ever returns true then it early exits with found = true
|
||||||
func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (found bool, err error) {
|
//
|
||||||
// Top parameter asks for bigger pages of data
|
// This listing function works on both normal listings and delta listings
|
||||||
// https://dev.onedrive.com/odata/optional-query-parameters.htm
|
func (f *Fs) _listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn, opts *rest.Opts, result any, pValue *[]api.Item, pNextLink *string) (err error) {
|
||||||
opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk))
|
|
||||||
OUTER:
|
|
||||||
for {
|
for {
|
||||||
var result api.ListChildrenResponse
|
|
||||||
var resp *http.Response
|
var resp *http.Response
|
||||||
err = f.pacer.Call(func() (bool, error) {
|
err = f.pacer.Call(func() (bool, error) {
|
||||||
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
|
resp, err = f.srv.CallJSON(ctx, opts, nil, result)
|
||||||
return shouldRetry(ctx, resp, err)
|
return shouldRetry(ctx, resp, err)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return found, fmt.Errorf("couldn't list files: %w", err)
|
return fmt.Errorf("couldn't list files: %w", err)
|
||||||
}
|
}
|
||||||
if len(result.Value) == 0 {
|
if len(*pValue) == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
for i := range result.Value {
|
for i := range *pValue {
|
||||||
item := &result.Value[i]
|
item := &(*pValue)[i]
|
||||||
isFolder := item.GetFolder() != nil
|
isFolder := item.GetFolder() != nil
|
||||||
if isFolder {
|
if isFolder {
|
||||||
if filesOnly {
|
if filesOnly {
|
||||||
|
@ -1135,18 +1132,60 @@ OUTER:
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
item.Name = f.opt.Enc.ToStandardName(item.GetName())
|
item.Name = f.opt.Enc.ToStandardName(item.GetName())
|
||||||
if fn(item) {
|
err = fn(item)
|
||||||
found = true
|
if err != nil {
|
||||||
break OUTER
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if result.NextLink == "" {
|
if *pNextLink == "" {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
opts.Path = ""
|
opts.Path = ""
|
||||||
opts.RootURL = result.NextLink
|
opts.Parameters = nil
|
||||||
|
opts.RootURL = *pNextLink
|
||||||
|
// reset results
|
||||||
|
*pNextLink = ""
|
||||||
|
*pValue = nil
|
||||||
}
|
}
|
||||||
return
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lists the directory required calling the user function on each item found
|
||||||
|
//
|
||||||
|
// If the user fn ever returns true then it early exits with found = true
|
||||||
|
func (f *Fs) listAll(ctx context.Context, dirID string, directoriesOnly bool, filesOnly bool, fn listAllFn) (err error) {
|
||||||
|
// Top parameter asks for bigger pages of data
|
||||||
|
// https://dev.onedrive.com/odata/optional-query-parameters.htm
|
||||||
|
opts := f.newOptsCall(dirID, "GET", fmt.Sprintf("/children?$top=%d", f.opt.ListChunk))
|
||||||
|
var result api.ListChildrenResponse
|
||||||
|
return f._listAll(ctx, dirID, directoriesOnly, filesOnly, fn, &opts, &result, &result.Value, &result.NextLink)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert a list item into a DirEntry
|
||||||
|
//
|
||||||
|
// Can return nil for an item which should be skipped
|
||||||
|
func (f *Fs) itemToDirEntry(ctx context.Context, dir string, info *api.Item) (entry fs.DirEntry, err error) {
|
||||||
|
if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote {
|
||||||
|
fs.Debugf(info.Name, "OneNote file not shown in directory listing")
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
remote := path.Join(dir, info.GetName())
|
||||||
|
folder := info.GetFolder()
|
||||||
|
if folder != nil {
|
||||||
|
// cache the directory ID for later lookups
|
||||||
|
id := info.GetID()
|
||||||
|
f.dirCache.Put(remote, id)
|
||||||
|
d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id)
|
||||||
|
d.SetItems(folder.ChildCount)
|
||||||
|
entry = d
|
||||||
|
} else {
|
||||||
|
o, err := f.newObjectWithInfo(ctx, remote, info)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
entry = o
|
||||||
|
}
|
||||||
|
return entry, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// List the objects and directories in dir into entries. The
|
// List the objects and directories in dir into entries. The
|
||||||
|
@ -1163,41 +1202,137 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
var iErr error
|
err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) error {
|
||||||
_, err = f.listAll(ctx, directoryID, false, false, func(info *api.Item) bool {
|
entry, err := f.itemToDirEntry(ctx, dir, info)
|
||||||
if !f.opt.ExposeOneNoteFiles && info.GetPackageType() == api.PackageTypeOneNote {
|
if err == nil {
|
||||||
fs.Debugf(info.Name, "OneNote file not shown in directory listing")
|
entries = append(entries, entry)
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
return err
|
||||||
remote := path.Join(dir, info.GetName())
|
|
||||||
folder := info.GetFolder()
|
|
||||||
if folder != nil {
|
|
||||||
// cache the directory ID for later lookups
|
|
||||||
id := info.GetID()
|
|
||||||
f.dirCache.Put(remote, id)
|
|
||||||
d := fs.NewDir(remote, time.Time(info.GetLastModifiedDateTime())).SetID(id)
|
|
||||||
d.SetItems(folder.ChildCount)
|
|
||||||
entries = append(entries, d)
|
|
||||||
} else {
|
|
||||||
o, err := f.newObjectWithInfo(ctx, remote, info)
|
|
||||||
if err != nil {
|
|
||||||
iErr = err
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
entries = append(entries, o)
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if iErr != nil {
|
|
||||||
return nil, iErr
|
|
||||||
}
|
|
||||||
return entries, nil
|
return entries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListR lists the objects and directories of the Fs starting
|
||||||
|
// from dir recursively into out.
|
||||||
|
//
|
||||||
|
// dir should be "" to start from the root, and should not
|
||||||
|
// have trailing slashes.
|
||||||
|
//
|
||||||
|
// This should return ErrDirNotFound if the directory isn't
|
||||||
|
// found.
|
||||||
|
//
|
||||||
|
// It should call callback for each tranche of entries read.
|
||||||
|
// These need not be returned in any particular order. If
|
||||||
|
// callback returns an error then the listing will stop
|
||||||
|
// immediately.
|
||||||
|
//
|
||||||
|
// Don't implement this unless you have a more efficient way
|
||||||
|
// of listing recursively than doing a directory traversal.
|
||||||
|
func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
|
||||||
|
// Make sure this ID is in the directory cache
|
||||||
|
directoryID, err := f.dirCache.FindDir(ctx, dir, false)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListR only works at the root of a onedrive, not on a folder
|
||||||
|
// So we have to filter things outside of the root which is
|
||||||
|
// inefficient.
|
||||||
|
|
||||||
|
list := walk.NewListRHelper(callback)
|
||||||
|
|
||||||
|
// list a folder conventionally - used for shared folders
|
||||||
|
var listFolder func(dir string) error
|
||||||
|
listFolder = func(dir string) error {
|
||||||
|
entries, err := f.List(ctx, dir)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, entry := range entries {
|
||||||
|
err = list.Add(entry)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if _, isDir := entry.(fs.Directory); isDir {
|
||||||
|
err = listFolder(entry.Remote())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This code relies on the fact that directories are sent before their children. This isn't
|
||||||
|
// mentioned in the docs though, so maybe it shouldn't be relied on.
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
fn := func(info *api.Item) error {
|
||||||
|
var parentPath string
|
||||||
|
var ok bool
|
||||||
|
id := info.GetID()
|
||||||
|
// The API can produce duplicates, so skip them
|
||||||
|
if _, found := seen[id]; found {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
seen[id] = struct{}{}
|
||||||
|
// Skip the root directory
|
||||||
|
if id == directoryID {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Skip deleted items
|
||||||
|
if info.Deleted != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
dirID := info.GetParentReference().GetID()
|
||||||
|
// Skip files that don't have their parent directory
|
||||||
|
// cached as they are outside the root.
|
||||||
|
parentPath, ok = f.dirCache.GetInv(dirID)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Skip files not under the root directory
|
||||||
|
remote := path.Join(parentPath, info.GetName())
|
||||||
|
if dir != "" && !strings.HasPrefix(remote, dir+"/") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
entry, err := f.itemToDirEntry(ctx, parentPath, info)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = list.Add(entry)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If this is a shared folder, we'll need list it too
|
||||||
|
if info.RemoteItem != nil && info.RemoteItem.Folder != nil {
|
||||||
|
fs.Debugf(remote, "Listing shared directory")
|
||||||
|
return listFolder(remote)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := rest.Opts{
|
||||||
|
Method: "GET",
|
||||||
|
Path: "/root/delta",
|
||||||
|
Parameters: map[string][]string{
|
||||||
|
// "token": {token},
|
||||||
|
"$top": {fmt.Sprintf("%d", f.opt.ListChunk)},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var result api.DeltaResponse
|
||||||
|
err = f._listAll(ctx, "", false, false, fn, &opts, &result, &result.Value, &result.NextLink)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return list.Flush()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Creates from the parameters passed in a half finished Object which
|
// Creates from the parameters passed in a half finished Object which
|
||||||
// must have setMetaData called on it
|
// must have setMetaData called on it
|
||||||
//
|
//
|
||||||
|
@ -1266,15 +1401,12 @@ func (f *Fs) purgeCheck(ctx context.Context, dir string, check bool) error {
|
||||||
}
|
}
|
||||||
if check {
|
if check {
|
||||||
// check to see if there are any items
|
// check to see if there are any items
|
||||||
found, err := f.listAll(ctx, rootID, false, false, func(item *api.Item) bool {
|
err := f.listAll(ctx, rootID, false, false, func(item *api.Item) error {
|
||||||
return true
|
return fs.ErrorDirectoryNotEmpty
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if found {
|
|
||||||
return fs.ErrorDirectoryNotEmpty
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
err = f.deleteObject(ctx, rootID)
|
err = f.deleteObject(ctx, rootID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -2578,6 +2710,7 @@ var (
|
||||||
_ fs.Abouter = (*Fs)(nil)
|
_ fs.Abouter = (*Fs)(nil)
|
||||||
_ fs.PublicLinker = (*Fs)(nil)
|
_ fs.PublicLinker = (*Fs)(nil)
|
||||||
_ fs.CleanUpper = (*Fs)(nil)
|
_ fs.CleanUpper = (*Fs)(nil)
|
||||||
|
_ fs.ListRer = (*Fs)(nil)
|
||||||
_ fs.Object = (*Object)(nil)
|
_ fs.Object = (*Object)(nil)
|
||||||
_ fs.MimeTyper = &Object{}
|
_ fs.MimeTyper = &Object{}
|
||||||
_ fs.IDer = &Object{}
|
_ fs.IDer = &Object{}
|
||||||
|
|
|
@ -183,6 +183,29 @@ your workflow.
|
||||||
|
|
||||||
For all types of OneDrive you can use the `--checksum` flag.
|
For all types of OneDrive you can use the `--checksum` flag.
|
||||||
|
|
||||||
|
### --fast-list
|
||||||
|
|
||||||
|
This remote supports `--fast-list` which allows you to use fewer
|
||||||
|
transactions in exchange for more memory. See the [rclone
|
||||||
|
docs](/docs/#fast-list) for more details.
|
||||||
|
|
||||||
|
It does this by using the delta listing facilities of OneDrive which
|
||||||
|
returns all the files in the remote very efficiently. This is much
|
||||||
|
more efficient than listing directories recursively and is Microsoft's
|
||||||
|
recommended way of reading all the file information from a drive.
|
||||||
|
|
||||||
|
This can be useful with `rclone mount` and [rclone rc vfs/refresh
|
||||||
|
recursive=true](/rc/#vfs-refresh)) to very quickly fill the mount with
|
||||||
|
information about all the files.
|
||||||
|
|
||||||
|
The API used for the recursive listing (`ListR`) only supports listing
|
||||||
|
from the root of the drive. This will become increasingly inefficient
|
||||||
|
the further away you get from the root as rclone will have to discard
|
||||||
|
files outside of the directory you are using.
|
||||||
|
|
||||||
|
Some commands (like `rclone lsf -R`) will use `ListR` by default - you
|
||||||
|
can turn this off with `--disable ListR` if you need to.
|
||||||
|
|
||||||
### Restricted filename characters
|
### Restricted filename characters
|
||||||
|
|
||||||
In addition to the [default restricted characters set](/overview/#restricted-characters)
|
In addition to the [default restricted characters set](/overview/#restricted-characters)
|
||||||
|
|
|
@ -492,7 +492,7 @@ upon backend-specific capabilities.
|
||||||
| Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes |
|
| Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes |
|
||||||
| Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No |
|
| Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No |
|
||||||
| Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No |
|
| Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No |
|
||||||
| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes |
|
| Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes |
|
||||||
| OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes |
|
| OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes |
|
||||||
| OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | No | Yes | No |
|
| OpenStack Swift | Yes † | Yes | No | No | No | Yes | Yes | No | No | Yes | No |
|
||||||
| Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | No |
|
| Oracle Object Storage | No | Yes | No | No | Yes | Yes | Yes | Yes | No | No | No |
|
||||||
|
|
Loading…
Reference in a new issue