forked from TrueCloudLab/rclone
drive: Export Google documents - fixes #49
Rclone will download one format of a google doc. The choice of which export format is controlled by the `--drive-formats` flag.
This commit is contained in:
parent
0f73129ab7
commit
558bc2e132
2 changed files with 216 additions and 14 deletions
|
@ -132,6 +132,50 @@ off, namely deleting files permanently.
|
||||||
Only consider files owned by the authenticated user. Requires
|
Only consider files owned by the authenticated user. Requires
|
||||||
that --drive-full-list=true (default).
|
that --drive-full-list=true (default).
|
||||||
|
|
||||||
|
#### --drive-formats ####
|
||||||
|
|
||||||
|
Google documents can only be exported from Google drive. When rclone
|
||||||
|
downloads a Google doc it chooses a format to download depending upon
|
||||||
|
this setting.
|
||||||
|
|
||||||
|
By default the formats are `docx,xlsx,pptx,svg` which are a sensible
|
||||||
|
default for an editable document.
|
||||||
|
|
||||||
|
When choosing a format, rclone runs down the list provided in order
|
||||||
|
and chooses the first file format the doc can be exported as from the
|
||||||
|
list. If the file can't be exported to a format on the formats list,
|
||||||
|
then rclone will choose a format from the default list.
|
||||||
|
|
||||||
|
If you prefer an archive copy then you might use `--drive-formats
|
||||||
|
pdf`, or if you prefer openoffice/libreoffice formats you might use
|
||||||
|
`--drive-formats ods,odt`.
|
||||||
|
|
||||||
|
Note that rclone adds the extension to the google doc, so if it is
|
||||||
|
calles `My Spreadsheet` on google docs, it will be exported as `My
|
||||||
|
Spreadsheet.xlsx` or `My Spreadsheet.pdf` etc.
|
||||||
|
|
||||||
|
Here are the possible extensions with their corresponding mime types.
|
||||||
|
|
||||||
|
| Extension | Mime Type | Description |
|
||||||
|
| --------- |-----------| ------------|
|
||||||
|
| csv | text/csv | Standard CSV format for Spreadsheets |
|
||||||
|
| doc | application/msword | Micosoft Office Document |
|
||||||
|
| docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document | Microsoft Office Document |
|
||||||
|
| html | text/html | An HTML Document |
|
||||||
|
| jpg | image/jpeg | A JPEG Image File |
|
||||||
|
| ods | application/vnd.oasis.opendocument.spreadsheet | Openoffice Spreadsheet |
|
||||||
|
| ods | application/x-vnd.oasis.opendocument.spreadsheet | Openoffice Spreadsheet |
|
||||||
|
| odt | application/vnd.oasis.opendocument.text | Openoffice Document |
|
||||||
|
| pdf | application/pdf | Adobe PDF Format |
|
||||||
|
| png | image/png | PNG Image Format|
|
||||||
|
| pptx | application/vnd.openxmlformats-officedocument.presentationml.presentation | Microsoft Office Powerpoint |
|
||||||
|
| rtf | application/rtf | Rich Text Format |
|
||||||
|
| svg | image/svg+xml | Scalable Vector Graphics Format |
|
||||||
|
| txt | text/plain | Plain Text |
|
||||||
|
| xls | application/vnd.ms-excel | Microsoft Office Spreadsheet |
|
||||||
|
| xlsx | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | Microsoft Office Spreadsheet |
|
||||||
|
| zip | application/zip | A ZIP file of HTML, Images CSS |
|
||||||
|
|
||||||
### Limitations ###
|
### Limitations ###
|
||||||
|
|
||||||
Drive has quite a lot of rate limiting. This causes rclone to be
|
Drive has quite a lot of rate limiting. This causes rclone to be
|
||||||
|
|
172
drive/drive.go
172
drive/drive.go
|
@ -38,6 +38,7 @@ const (
|
||||||
minSleep = 10 * time.Millisecond
|
minSleep = 10 * time.Millisecond
|
||||||
maxSleep = 2 * time.Second
|
maxSleep = 2 * time.Second
|
||||||
decayConstant = 2 // bigger for slower decay, exponential
|
decayConstant = 2 // bigger for slower decay, exponential
|
||||||
|
defaultExtensions = "docx,xlsx,pptx,svg"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Globals
|
// Globals
|
||||||
|
@ -46,6 +47,7 @@ var (
|
||||||
driveFullList = pflag.BoolP("drive-full-list", "", true, "Use a full listing for directory list. More data but usually quicker.")
|
driveFullList = pflag.BoolP("drive-full-list", "", true, "Use a full listing for directory list. More data but usually quicker.")
|
||||||
driveAuthOwnerOnly = pflag.BoolP("drive-auth-owner-only", "", false, "Only consider files owned by the authenticated user. Requires drive-full-list.")
|
driveAuthOwnerOnly = pflag.BoolP("drive-auth-owner-only", "", false, "Only consider files owned by the authenticated user. Requires drive-full-list.")
|
||||||
driveUseTrash = pflag.BoolP("drive-use-trash", "", false, "Send files to the trash instead of deleting permanently.")
|
driveUseTrash = pflag.BoolP("drive-use-trash", "", false, "Send files to the trash instead of deleting permanently.")
|
||||||
|
driveExtensions = pflag.StringP("drive-formats", "", defaultExtensions, "Comma separated list of preferred formats for downloading Google docs.")
|
||||||
// chunkSize is the size of the chunks created during a resumable upload and should be a power of two.
|
// chunkSize is the size of the chunks created during a resumable upload and should be a power of two.
|
||||||
// 1<<18 is the minimum size supported by the Google uploader, and there is no maximum.
|
// 1<<18 is the minimum size supported by the Google uploader, and there is no maximum.
|
||||||
chunkSize = fs.SizeSuffix(256 * 1024)
|
chunkSize = fs.SizeSuffix(256 * 1024)
|
||||||
|
@ -58,6 +60,25 @@ var (
|
||||||
ClientSecret: fs.Reveal(rcloneClientSecret),
|
ClientSecret: fs.Reveal(rcloneClientSecret),
|
||||||
RedirectURL: oauthutil.TitleBarRedirectURL,
|
RedirectURL: oauthutil.TitleBarRedirectURL,
|
||||||
}
|
}
|
||||||
|
mimeTypeToExtension = map[string]string{
|
||||||
|
"application/msword": "doc",
|
||||||
|
"application/pdf": "pdf",
|
||||||
|
"application/rtf": "rtf",
|
||||||
|
"application/vnd.ms-excel": "xls",
|
||||||
|
"application/vnd.oasis.opendocument.spreadsheet": "ods",
|
||||||
|
"application/vnd.oasis.opendocument.text": "odt",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
||||||
|
"application/x-vnd.oasis.opendocument.spreadsheet": "ods",
|
||||||
|
"application/zip": "zip",
|
||||||
|
"image/jpeg": "jpg",
|
||||||
|
"image/png": "png",
|
||||||
|
"image/svg+xml": "svg",
|
||||||
|
"text/csv": "csv",
|
||||||
|
"text/html": "html",
|
||||||
|
"text/plain": "txt",
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Register with Fs
|
// Register with Fs
|
||||||
|
@ -92,6 +113,7 @@ type Fs struct {
|
||||||
about *drive.About // information about the drive, including the root
|
about *drive.About // information about the drive, including the root
|
||||||
dirCache *dircache.DirCache // Map of directory path to directory id
|
dirCache *dircache.DirCache // Map of directory path to directory id
|
||||||
pacer *pacer.Pacer // To pace the API calls
|
pacer *pacer.Pacer // To pace the API calls
|
||||||
|
extensions []string // preferred extensions to download docs
|
||||||
}
|
}
|
||||||
|
|
||||||
// Object describes a drive object
|
// Object describes a drive object
|
||||||
|
@ -103,6 +125,7 @@ type Object struct {
|
||||||
md5sum string // md5sum of the object
|
md5sum string // md5sum of the object
|
||||||
bytes int64 // size of the object
|
bytes int64 // size of the object
|
||||||
modifiedDate string // RFC3339 time it was last modified
|
modifiedDate string // RFC3339 time it was last modified
|
||||||
|
isDocument bool // if set this is a Google doc
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
@ -217,6 +240,31 @@ func isPowerOfTwo(x int64) bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseExtensions parses drive export extensions from a string
|
||||||
|
func (f *Fs) parseExtensions(extensions string) {
|
||||||
|
// Invert mimeTypeToExtension
|
||||||
|
var extensionToMimeType = make(map[string]string, len(mimeTypeToExtension))
|
||||||
|
for mimeType, extension := range mimeTypeToExtension {
|
||||||
|
extensionToMimeType[extension] = mimeType
|
||||||
|
}
|
||||||
|
for _, extension := range strings.Split(extensions, ",") {
|
||||||
|
extension = strings.ToLower(strings.TrimSpace(extension))
|
||||||
|
if _, found := extensionToMimeType[extension]; !found {
|
||||||
|
log.Fatalf("Couldn't find mime type for extension %q", extension)
|
||||||
|
}
|
||||||
|
found := false
|
||||||
|
for _, existingExtension := range f.extensions {
|
||||||
|
if extension == existingExtension {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
f.extensions = append(f.extensions, extension)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// NewFs contstructs an Fs from the path, container:path
|
// NewFs contstructs an Fs from the path, container:path
|
||||||
func NewFs(name, path string) (fs.Fs, error) {
|
func NewFs(name, path string) (fs.Fs, error) {
|
||||||
if !isPowerOfTwo(int64(chunkSize)) {
|
if !isPowerOfTwo(int64(chunkSize)) {
|
||||||
|
@ -260,6 +308,10 @@ func NewFs(name, path string) (fs.Fs, error) {
|
||||||
|
|
||||||
f.dirCache = dircache.New(root, f.about.RootFolderId, f)
|
f.dirCache = dircache.New(root, f.about.RootFolderId, f)
|
||||||
|
|
||||||
|
// Parse extensions
|
||||||
|
f.parseExtensions(*driveExtensions)
|
||||||
|
f.parseExtensions(defaultExtensions) // make sure there are some sensible ones on there
|
||||||
|
|
||||||
// Find the current root
|
// Find the current root
|
||||||
err = f.dirCache.FindRoot(false)
|
err = f.dirCache.FindRoot(false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -381,11 +433,52 @@ func (f *Fs) listDirRecursive(dirID string, path string, out fs.ObjectsChan) err
|
||||||
|
|
||||||
}()
|
}()
|
||||||
} else {
|
} else {
|
||||||
// If item has no MD5 sum it isn't stored on drive, so ignore it
|
filepath := path + item.Title
|
||||||
if item.Md5Checksum != "" {
|
if item.Md5Checksum != "" {
|
||||||
if fs := f.newFsObjectWithInfo(path+item.Title, item); fs != nil {
|
// If item has MD5 sum it is a file stored on drive
|
||||||
out <- fs
|
if o := f.newFsObjectWithInfo(filepath, item); o != nil {
|
||||||
|
out <- o
|
||||||
}
|
}
|
||||||
|
} else if len(item.ExportLinks) != 0 {
|
||||||
|
// If item has export links then it is a google doc
|
||||||
|
var firstExtension, firstLink string
|
||||||
|
var extension, link string
|
||||||
|
outer:
|
||||||
|
for exportMimeType, exportLink := range item.ExportLinks {
|
||||||
|
exportExtension, ok := mimeTypeToExtension[exportMimeType]
|
||||||
|
if !ok {
|
||||||
|
fs.Debug(filepath, "Unknown export type %q - ignoring", exportMimeType)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if firstExtension == "" {
|
||||||
|
firstExtension = exportExtension
|
||||||
|
firstLink = exportLink
|
||||||
|
}
|
||||||
|
for _, preferredExtension := range f.extensions {
|
||||||
|
if exportExtension == preferredExtension {
|
||||||
|
extension = exportExtension
|
||||||
|
link = exportLink
|
||||||
|
break outer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if extension == "" {
|
||||||
|
extension = firstExtension
|
||||||
|
link = firstLink
|
||||||
|
}
|
||||||
|
if extension == "" {
|
||||||
|
fs.Debug(filepath, "No export formats found")
|
||||||
|
} else {
|
||||||
|
if o := f.newFsObjectWithInfo(filepath+"."+extension, item); o != nil {
|
||||||
|
obj := o.(*Object)
|
||||||
|
obj.isDocument = true
|
||||||
|
obj.url = link
|
||||||
|
obj.bytes = -1
|
||||||
|
out <- o
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fs.Debug(filepath, "Ignoring unknown object")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -817,6 +910,18 @@ func (o *Object) Hash(t fs.HashType) (string, error) {
|
||||||
|
|
||||||
// Size returns the size of an object in bytes
|
// Size returns the size of an object in bytes
|
||||||
func (o *Object) Size() int64 {
|
func (o *Object) Size() int64 {
|
||||||
|
if o.isDocument && o.bytes < 0 {
|
||||||
|
// If it is a google doc then we must HEAD it to see
|
||||||
|
// how big it is
|
||||||
|
res, err := o.httpResponse("HEAD")
|
||||||
|
if err != nil {
|
||||||
|
fs.ErrorLog(o, "Error reading size: %v", err)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
_ = res.Body.Close()
|
||||||
|
o.bytes = res.ContentLength
|
||||||
|
// fs.Debug(o, "Read size of document: %v", o.bytes)
|
||||||
|
}
|
||||||
return o.bytes
|
return o.bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -908,17 +1013,17 @@ func (o *Object) Storable() bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Open an object for read
|
// httpResponse gets an http.Response object for the object o.url
|
||||||
func (o *Object) Open() (in io.ReadCloser, err error) {
|
// using the method passed in
|
||||||
|
func (o *Object) httpResponse(method string) (res *http.Response, err error) {
|
||||||
if o.url == "" {
|
if o.url == "" {
|
||||||
return nil, fmt.Errorf("Forbidden to download - check sharing permission")
|
return nil, fmt.Errorf("Forbidden to download - check sharing permission")
|
||||||
}
|
}
|
||||||
req, err := http.NewRequest("GET", o.url, nil)
|
req, err := http.NewRequest(method, o.url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
req.Header.Set("User-Agent", fs.UserAgent)
|
req.Header.Set("User-Agent", fs.UserAgent)
|
||||||
var res *http.Response
|
|
||||||
err = o.fs.pacer.Call(func() (bool, error) {
|
err = o.fs.pacer.Call(func() (bool, error) {
|
||||||
res, err = o.fs.client.Do(req)
|
res, err = o.fs.client.Do(req)
|
||||||
return shouldRetry(err)
|
return shouldRetry(err)
|
||||||
|
@ -926,10 +1031,57 @@ func (o *Object) Open() (in io.ReadCloser, err error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// openFile represents an Object open for reading
|
||||||
|
type openFile struct {
|
||||||
|
o *Object // Object we are reading for
|
||||||
|
in io.ReadCloser // reading from here
|
||||||
|
bytes int64 // number of bytes read on this connection
|
||||||
|
eof bool // whether we have read end of file
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read bytes from the object - see io.Reader
|
||||||
|
func (file *openFile) Read(p []byte) (n int, err error) {
|
||||||
|
n, err = file.in.Read(p)
|
||||||
|
file.bytes += int64(n)
|
||||||
|
if err == io.EOF {
|
||||||
|
file.eof = true
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the object and update bytes read
|
||||||
|
func (file *openFile) Close() (err error) {
|
||||||
|
// If end of file, update bytes read
|
||||||
|
if file.eof {
|
||||||
|
// fs.Debug(file.o, "Updating size of doc after download to %v", file.bytes)
|
||||||
|
file.o.bytes = file.bytes
|
||||||
|
}
|
||||||
|
return file.in.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check it satisfies the interfaces
|
||||||
|
var _ io.ReadCloser = &openFile{}
|
||||||
|
|
||||||
|
// Open an object for read
|
||||||
|
func (o *Object) Open() (in io.ReadCloser, err error) {
|
||||||
|
res, err := o.httpResponse("GET")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
if res.StatusCode != 200 {
|
if res.StatusCode != 200 {
|
||||||
_ = res.Body.Close() // ignore error
|
_ = res.Body.Close() // ignore error
|
||||||
return nil, fmt.Errorf("Bad response: %d: %s", res.StatusCode, res.Status)
|
return nil, fmt.Errorf("Bad response: %d: %s", res.StatusCode, res.Status)
|
||||||
}
|
}
|
||||||
|
// If it is a document, update the size with what we are
|
||||||
|
// reading as it can change from the HEAD in the listing to
|
||||||
|
// this GET. This stops rclone marking the transfer as
|
||||||
|
// corrupted.
|
||||||
|
if o.isDocument {
|
||||||
|
return &openFile{o: o, in: res.Body}, nil
|
||||||
|
}
|
||||||
return res.Body, nil
|
return res.Body, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -939,6 +1091,9 @@ func (o *Object) Open() (in io.ReadCloser, err error) {
|
||||||
//
|
//
|
||||||
// The new object may have been created if an error is returned
|
// The new object may have been created if an error is returned
|
||||||
func (o *Object) Update(in io.Reader, modTime time.Time, size int64) error {
|
func (o *Object) Update(in io.Reader, modTime time.Time, size int64) error {
|
||||||
|
if o.isDocument {
|
||||||
|
return fmt.Errorf("Can't update a google document")
|
||||||
|
}
|
||||||
updateInfo := &drive.File{
|
updateInfo := &drive.File{
|
||||||
Id: o.id,
|
Id: o.id,
|
||||||
ModifiedDate: modTime.Format(timeFormatOut),
|
ModifiedDate: modTime.Format(timeFormatOut),
|
||||||
|
@ -969,6 +1124,9 @@ func (o *Object) Update(in io.Reader, modTime time.Time, size int64) error {
|
||||||
|
|
||||||
// Remove an object
|
// Remove an object
|
||||||
func (o *Object) Remove() error {
|
func (o *Object) Remove() error {
|
||||||
|
if o.isDocument {
|
||||||
|
return fmt.Errorf("Can't delete a google document")
|
||||||
|
}
|
||||||
var err error
|
var err error
|
||||||
err = o.fs.pacer.Call(func() (bool, error) {
|
err = o.fs.pacer.Call(func() (bool, error) {
|
||||||
if *driveUseTrash {
|
if *driveUseTrash {
|
||||||
|
|
Loading…
Reference in a new issue