drive: add --drive-import-formats

Add a new flag to the drive backend to allow document conversions oni upload.
The existing --drive-formats flag has been renamed to --drive-export-formats.
The old flag is still working to be backward compatible.
This commit is contained in:
Fabian Möller 2018-08-19 16:16:11 +02:00 committed by Nick Craig-Wood
parent 690a44e40e
commit 171e39b230
7 changed files with 562 additions and 163 deletions

View file

@ -51,7 +51,7 @@ const (
timeFormatIn = time.RFC3339 timeFormatIn = time.RFC3339
timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00" timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00"
minSleep = 10 * time.Millisecond minSleep = 10 * time.Millisecond
defaultExtensions = "docx,xlsx,pptx,svg" defaultExportExtensions = "docx,xlsx,pptx,svg"
scopePrefix = "https://www.googleapis.com/auth/" scopePrefix = "https://www.googleapis.com/auth/"
defaultScope = "drive" defaultScope = "drive"
// chunkSize is the size of the chunks created during a resumable upload and should be a power of two. // chunkSize is the size of the chunks created during a resumable upload and should be a power of two.
@ -103,10 +103,10 @@ var (
"text/plain": ".txt", "text/plain": ".txt",
"text/tab-separated-values": ".tsv", "text/tab-separated-values": ".tsv",
} }
extensionToMimeType map[string]string partialFields = "id,name,size,md5Checksum,trashed,modifiedTime,createdTime,mimeType,parents"
partialFields = "id,name,size,md5Checksum,trashed,modifiedTime,createdTime,mimeType,parents" fetchFormatsOnce sync.Once // make sure we fetch the export/import formats only once
exportFormatsOnce sync.Once // make sure we fetch the export formats only once _exportFormats map[string][]string // allowed export MIME type conversions
_exportFormats map[string][]string // allowed export MIME type conversions _importFormats map[string][]string // allowed import MIME type conversions
) )
// Register with Fs // Register with Fs
@ -214,9 +214,25 @@ func init() {
Advanced: true, Advanced: true,
}, { }, {
Name: "formats", Name: "formats",
Default: defaultExtensions, Default: "",
Help: "Deprecated: see export_formats",
Advanced: true,
Hide: fs.OptionHideConfigurator,
}, {
Name: "export_formats",
Default: defaultExportExtensions,
Help: "Comma separated list of preferred formats for downloading Google docs.", Help: "Comma separated list of preferred formats for downloading Google docs.",
Advanced: true, Advanced: true,
}, {
Name: "import_formats",
Default: "",
Help: "Comma separated list of preferred formats for uploading Google docs.",
Advanced: true,
}, {
Name: "allow_import_name_change",
Default: false,
Help: "Allow the filetype to change when uploading Google docs (e.g. file.doc to file.docx). This will confuse sync and reupload every time.",
Advanced: true,
}, { }, {
Name: "use_created_date", Name: "use_created_date",
Default: false, Default: false,
@ -290,6 +306,9 @@ type Options struct {
SharedWithMe bool `config:"shared_with_me"` SharedWithMe bool `config:"shared_with_me"`
TrashedOnly bool `config:"trashed_only"` TrashedOnly bool `config:"trashed_only"`
Extensions string `config:"formats"` Extensions string `config:"formats"`
ExportExtensions string `config:"export_formats"`
ImportExtensions string `config:"import_formats"`
AllowImportNameChange bool `config:"allow_import_name_change"`
UseCreatedDate bool `config:"use_created_date"` UseCreatedDate bool `config:"use_created_date"`
ListChunk int64 `config:"list_chunk"` ListChunk int64 `config:"list_chunk"`
Impersonate string `config:"impersonate"` Impersonate string `config:"impersonate"`
@ -303,32 +322,33 @@ type Options struct {
// Fs represents a remote drive server // Fs represents a remote drive server
type Fs struct { type Fs struct {
name string // name of this remote name string // name of this remote
root string // the path we are working on root string // the path we are working on
opt Options // parsed options opt Options // parsed options
features *fs.Features // optional features features *fs.Features // optional features
svc *drive.Service // the connection to the drive server svc *drive.Service // the connection to the drive server
v2Svc *drive_v2.Service // used to create download links for the v2 api v2Svc *drive_v2.Service // used to create download links for the v2 api
client *http.Client // authorized client client *http.Client // authorized client
rootFolderID string // the id of the root folder rootFolderID string // the id of the root folder
dirCache *dircache.DirCache // Map of directory path to directory id dirCache *dircache.DirCache // Map of directory path to directory id
pacer *pacer.Pacer // To pace the API calls pacer *pacer.Pacer // To pace the API calls
extensions []string // preferred extensions to download docs exportExtensions []string // preferred extensions to download docs
isTeamDrive bool // true if this is a team drive importMimeTypes []string // MIME types to convert to docs
isTeamDrive bool // true if this is a team drive
} }
// Object describes a drive object // Object describes a drive object
type Object struct { type Object struct {
fs *Fs // what this object is part of fs *Fs // what this object is part of
remote string // The remote path remote string // The remote path
id string // Drive Id of this object id string // Drive Id of this object
url string // Download URL of this object url string // Download URL of this object
md5sum string // md5sum of the object md5sum string // md5sum of the object
bytes int64 // size of the object bytes int64 // size of the object
modifiedDate string // RFC3339 time it was last modified modifiedDate string // RFC3339 time it was last modified
isDocument bool // if set this is a Google doc documentMimeType string // if set this is a Google doc
v2Download bool // generate v2 download link ondemand v2Download bool // generate v2 download link ondemand
mimeType string mimeType string
} }
// ------------------------------------------------------------ // ------------------------------------------------------------
@ -444,7 +464,7 @@ func (f *Fs) list(dirIDs []string, title string, directoriesOnly bool, filesOnly
// if the search title contains an extension and the extension is in the export extensions add a search // if the search title contains an extension and the extension is in the export extensions add a search
// for the filename without the extension. // for the filename without the extension.
// assume that export extensions don't contain escape sequences and only have one part (not .tar.gz) // assume that export extensions don't contain escape sequences and only have one part (not .tar.gz)
if ext := path.Ext(searchTitle); handleGdocs && len(ext) > 0 && containsString(f.extensions, ext) { if ext := path.Ext(searchTitle); handleGdocs && len(ext) > 0 && containsString(f.exportExtensions, ext) {
stem = title[:len(title)-len(ext)] stem = title[:len(title)-len(ext)]
query = append(query, fmt.Sprintf("(name='%s' or name='%s')", searchTitle, searchTitle[:len(searchTitle)-len(ext)])) query = append(query, fmt.Sprintf("(name='%s' or name='%s')", searchTitle, searchTitle[:len(searchTitle)-len(ext)]))
} else { } else {
@ -563,49 +583,35 @@ func isInternalMimeType(mimeType string) bool {
} }
// parseExtensions parses a list of comma separated extensions // parseExtensions parses a list of comma separated extensions
// into a list of unique extensions with leading "." // into a list of unique extensions with leading "." and a list of associated MIME types
func parseExtensions(extensions ...string) ([]string, error) { func parseExtensions(extensionsIn ...string) (extensions, mimeTypes []string, err error) {
var result []string for _, extensionText := range extensionsIn {
for _, extensionText := range extensions {
for _, extension := range strings.Split(extensionText, ",") { for _, extension := range strings.Split(extensionText, ",") {
extension = strings.ToLower(strings.TrimSpace(extension)) extension = strings.ToLower(strings.TrimSpace(extension))
if extension == "" {
continue
}
if len(extension) > 0 && extension[0] != '.' { if len(extension) > 0 && extension[0] != '.' {
extension = "." + extension extension = "." + extension
} }
if mime.TypeByExtension(extension) == "" { mt := mime.TypeByExtension(extension)
return result, errors.Errorf("couldn't find MIME type for extension %q", extension) if mt == "" {
return extensions, mimeTypes, errors.Errorf("couldn't find MIME type for extension %q", extension)
} }
found := false found := false
for _, existingExtension := range result { for _, existingExtension := range extensions {
if extension == existingExtension { if extension == existingExtension {
found = true found = true
break break
} }
} }
if !found { if !found {
result = append(result, extension) extensions = append(extensions, extension)
mimeTypes = append(mimeTypes, mt)
} }
} }
} }
return result, nil return
}
// parseExtensionMimeTypes parses the given extensions using parseExtensions
// and maps each resulting extension to its MIME type.
func parseExtensionMimeTypes(extensions ...string) ([]string, error) {
parsedExtensions, err := parseExtensions(extensions...)
if err != nil {
return nil, err
}
mimeTypes := make([]string, 0, len(parsedExtensions))
for i, extension := range parsedExtensions {
mt := mime.TypeByExtension(extension)
if mt == "" {
return nil, errors.Errorf("couldn't find MIME type for extension %q", extension)
}
mimeTypes[i] = mt
}
return mimeTypes, nil
} }
// Figure out if the user wants to use a team drive // Figure out if the user wants to use a team drive
@ -770,7 +776,18 @@ func NewFs(name, path string, m configmap.Mapper) (fs.Fs, error) {
f.dirCache = dircache.New(root, f.rootFolderID, f) f.dirCache = dircache.New(root, f.rootFolderID, f)
// Parse extensions // Parse extensions
f.extensions, err = parseExtensions(opt.Extensions, defaultExtensions) if opt.Extensions != "" {
if opt.ExportExtensions != defaultExportExtensions {
return nil, errors.New("only one of 'formats' and 'export_formats' can be specified")
}
opt.Extensions, opt.ExportExtensions = "", opt.Extensions
}
f.exportExtensions, _, err = parseExtensions(opt.ExportExtensions, defaultExportExtensions)
if err != nil {
return nil, err
}
_, f.importMimeTypes, err = parseExtensions(opt.ImportExtensions)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -824,6 +841,15 @@ func (f *Fs) newObjectWithInfo(remote string, info *drive.File) (fs.Object, erro
return o, nil return o, nil
} }
func (f *Fs) newDocumentObjectWithInfo(remote, extension, mimeType string, info *drive.File) (fs.Object, error) {
o, err := f.newObjectWithInfo(remote, info)
if err != nil {
return nil, err
}
o.(*Object).setGdocsMetaData(info, extension, mimeType)
return o, nil
}
// NewObject finds the Object at remote. If it can't be found // NewObject finds the Object at remote. If it can't be found
// it returns the error fs.ErrorObjectNotFound. // it returns the error fs.ErrorObjectNotFound.
func (f *Fs) NewObject(remote string) (fs.Object, error) { func (f *Fs) NewObject(remote string) (fs.Object, error) {
@ -884,50 +910,101 @@ func isAuthOwned(item *drive.File) bool {
return false return false
} }
func (f *Fs) fetchFormats() {
fetchFormatsOnce.Do(func() {
var about *drive.About
var err error
err = f.pacer.Call(func() (bool, error) {
about, err = f.svc.About.Get().
Fields("exportFormats,importFormats").
Do()
return shouldRetry(err)
})
if err != nil {
fs.Errorf(f, "Failed to get Drive exportFormats and importFormats: %v", err)
_exportFormats = map[string][]string{}
_importFormats = map[string][]string{}
return
}
_exportFormats = fixMimeTypeMap(about.ExportFormats)
_importFormats = fixMimeTypeMap(about.ImportFormats)
})
}
// exportFormats returns the export formats from drive, fetching them // exportFormats returns the export formats from drive, fetching them
// if necessary. // if necessary.
// //
// if the fetch fails then it will not export any drive formats // if the fetch fails then it will not export any drive formats
func (f *Fs) exportFormats() map[string][]string { func (f *Fs) exportFormats() map[string][]string {
exportFormatsOnce.Do(func() { f.fetchFormats()
var about *drive.About
var err error
err = f.pacer.Call(func() (bool, error) {
about, err = f.svc.About.Get().
Fields("exportFormats").
Do()
return shouldRetry(err)
})
if err != nil {
fs.Errorf(f, "Failed to get Drive exportFormats: %v", err)
_exportFormats = map[string][]string{}
return
}
_exportFormats = fixMimeTypeMap(about.ExportFormats)
})
return _exportFormats return _exportFormats
} }
// findExportFormat works out the optimum extension and MIME type // importFormats returns the import formats from drive, fetching them
// for this item. // if necessary.
// //
// Look through the extensions and find the first format that can be // if the fetch fails then it will not import any drive formats
// converted. If none found then return "", "" func (f *Fs) importFormats() map[string][]string {
func (f *Fs) findExportFormat(item *drive.File) (extension, filename, mimeType string, isDocument bool) { f.fetchFormats()
exportMimeTypes, isDocument := f.exportFormats()[item.MimeType] return _importFormats
}
// findExportFormatByMimeType works out the optimum export settings
// for the given MIME type.
//
// Look through the exportExtensions and find the first format that can be
// converted. If none found then return ("", "", false)
func (f *Fs) findExportFormatByMimeType(itemMimeType string) (
extension, mimeType string, isDocument bool) {
exportMimeTypes, isDocument := f.exportFormats()[itemMimeType]
if isDocument { if isDocument {
for _, _extension := range f.extensions { for _, _extension := range f.exportExtensions {
_mimeType := mime.TypeByExtension(_extension) _mimeType := mime.TypeByExtension(_extension)
for _, emt := range exportMimeTypes { for _, emt := range exportMimeTypes {
if emt == _mimeType { if emt == _mimeType {
return _extension, item.Name + _extension, _mimeType, true return _extension, _mimeType, true
} }
} }
} }
} }
// else return empty // else return empty
return "", "", "", isDocument return "", "", isDocument
}
// findExportFormatByMimeType works out the optimum export settings
// for the given drive.File.
//
// Look through the exportExtensions and find the first format that can be
// converted. If none found then return ("", "", "", false)
func (f *Fs) findExportFormat(item *drive.File) (extension, filename, mimeType string, isDocument bool) {
extension, mimeType, isDocument = f.findExportFormatByMimeType(item.MimeType)
if extension != "" {
filename = item.Name + extension
}
return
}
// findImportFormat finds the matching upload MIME type for a file
// If the given MIME type is in importMimeTypes, the matching upload
// MIME type is returned
//
// When no match is found "" is returned.
func (f *Fs) findImportFormat(mimeType string) string {
mimeType = fixMimeType(mimeType)
ifs := f.importFormats()
for _, mt := range f.importMimeTypes {
if mt == mimeType {
importMimeTypes := ifs[mimeType]
if l := len(importMimeTypes); l > 0 {
if l > 1 {
fs.Infof(f, "found %d import formats for %q: %q", l, mimeType, importMimeTypes)
}
return importMimeTypes[0]
}
}
}
return ""
} }
// List the objects and directories in dir into entries. The // List the objects and directories in dir into entries. The
@ -1170,11 +1247,10 @@ func (f *Fs) itemToDirEntry(remote string, item *drive.File) (fs.DirEntry, error
fs.Debugf(remote, "No export formats found for %q", item.MimeType) fs.Debugf(remote, "No export formats found for %q", item.MimeType)
break break
} }
o, err := f.newObjectWithInfo(remote+extension, item) o, err := f.newDocumentObjectWithInfo(remote, extension, exportMimeType, item)
if err != nil { if err != nil {
return nil, err return nil, err
} }
o.(*Object).setGdocsMetaData(item, extension, exportMimeType)
return o, nil return o, nil
} }
return nil, nil return nil, nil
@ -1239,11 +1315,35 @@ func (f *Fs) PutUnchecked(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOpt
remote := src.Remote() remote := src.Remote()
size := src.Size() size := src.Size()
modTime := src.ModTime() modTime := src.ModTime()
srcMimeType := fs.MimeTypeFromName(remote)
srcExt := path.Ext(remote)
exportExt := ""
importMimeType := ""
exportMimeType := ""
if f.importMimeTypes != nil && !f.opt.SkipGdocs {
importMimeType = f.findImportFormat(srcMimeType)
if isInternalMimeType(importMimeType) {
remote = remote[:len(remote)-len(srcExt)]
exportExt, exportMimeType, _ = f.findExportFormatByMimeType(importMimeType)
if exportExt == "" {
return nil, errors.Errorf("No export format found for %q", importMimeType)
}
if exportExt != srcExt && !f.opt.AllowImportNameChange {
return nil, errors.Errorf("Can't convert %q to a document with a different export filetype (%q)", srcExt, exportExt)
}
}
}
o, createInfo, err := f.createFileInfo(remote, modTime, size) o, createInfo, err := f.createFileInfo(remote, modTime, size)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if importMimeType != "" {
createInfo.MimeType = importMimeType
}
var info *drive.File var info *drive.File
if size == 0 || size < int64(f.opt.UploadCutoff) { if size == 0 || size < int64(f.opt.UploadCutoff) {
@ -1251,7 +1351,7 @@ func (f *Fs) PutUnchecked(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOpt
// Don't retry, return a retry error instead // Don't retry, return a retry error instead
err = f.pacer.CallNoRetry(func() (bool, error) { err = f.pacer.CallNoRetry(func() (bool, error) {
info, err = f.svc.Files.Create(createInfo). info, err = f.svc.Files.Create(createInfo).
Media(in, googleapi.ContentType("")). Media(in, googleapi.ContentType(srcMimeType)).
Fields(googleapi.Field(partialFields)). Fields(googleapi.Field(partialFields)).
SupportsTeamDrives(f.isTeamDrive). SupportsTeamDrives(f.isTeamDrive).
KeepRevisionForever(f.opt.KeepRevisionForever). KeepRevisionForever(f.opt.KeepRevisionForever).
@ -1263,11 +1363,14 @@ func (f *Fs) PutUnchecked(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOpt
} }
} else { } else {
// Upload the file in chunks // Upload the file in chunks
info, err = f.Upload(in, size, createInfo.MimeType, "", createInfo, remote) info, err = f.Upload(in, size, srcMimeType, "", createInfo, remote)
if err != nil { if err != nil {
return o, err return o, err
} }
} }
if isInternalMimeType(importMimeType) {
return f.newDocumentObjectWithInfo(remote, exportExt, exportMimeType, info)
}
o.setMetaData(info) o.setMetaData(info)
return o, nil return o, nil
} }
@ -1412,7 +1515,7 @@ func (f *Fs) Copy(src fs.Object, remote string) (fs.Object, error) {
fs.Debugf(src, "Can't copy - not same remote type") fs.Debugf(src, "Can't copy - not same remote type")
return nil, fs.ErrorCantCopy return nil, fs.ErrorCantCopy
} }
if srcObj.isDocument { if srcObj.documentMimeType != "" {
return nil, errors.New("can't copy a Google document") return nil, errors.New("can't copy a Google document")
} }
@ -1531,7 +1634,7 @@ func (f *Fs) Move(src fs.Object, remote string) (fs.Object, error) {
fs.Debugf(src, "Can't move - not same remote type") fs.Debugf(src, "Can't move - not same remote type")
return nil, fs.ErrorCantMove return nil, fs.ErrorCantMove
} }
if srcObj.isDocument { if srcObj.documentMimeType != "" {
return nil, errors.New("can't move a Google document") return nil, errors.New("can't move a Google document")
} }
_, srcParentID, err := srcObj.fs.dirCache.FindPath(src.Remote(), false) _, srcParentID, err := srcObj.fs.dirCache.FindPath(src.Remote(), false)
@ -1926,7 +2029,7 @@ func (o *Object) setGdocsMetaData(info *drive.File, extension, exportMimeType st
o.url = fmt.Sprintf("https://docs.google.com/presentation/d/%s/export/%s", info.Id, extension[1:]) o.url = fmt.Sprintf("https://docs.google.com/presentation/d/%s/export/%s", info.Id, extension[1:])
} }
} }
o.isDocument = true o.documentMimeType = o.mimeType
o.mimeType = exportMimeType o.mimeType = exportMimeType
o.bytes = -1 o.bytes = -1
} }
@ -2026,7 +2129,7 @@ func (o *Object) httpResponse(method string, options []fs.OpenOption) (req *http
if o.url == "" { if o.url == "" {
return nil, nil, errors.New("forbidden to download - check sharing permission") return nil, nil, errors.New("forbidden to download - check sharing permission")
} }
if o.isDocument { if o.documentMimeType != "" {
for _, o := range options { for _, o := range options {
// https://developers.google.com/drive/v3/web/manage-downloads#partial_download // https://developers.google.com/drive/v3/web/manage-downloads#partial_download
if _, ok := o.(*fs.RangeOption); ok { if _, ok := o.(*fs.RangeOption); ok {
@ -2144,7 +2247,7 @@ func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) {
// reading as it can change from the HEAD in the listing to // reading as it can change from the HEAD in the listing to
// this GET. This stops rclone marking the transfer as // this GET. This stops rclone marking the transfer as
// corrupted. // corrupted.
if o.isDocument { if o.documentMimeType != "" {
return &openFile{o: o, in: res.Body}, nil return &openFile{o: o, in: res.Body}, nil
} }
return res.Body, nil return res.Body, nil
@ -2158,14 +2261,24 @@ func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) {
func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
size := src.Size() size := src.Size()
modTime := src.ModTime() modTime := src.ModTime()
if o.isDocument { srcMimeType := fs.MimeType(src)
return errors.New("can't update a google document") importMimeType := ""
}
updateInfo := &drive.File{ updateInfo := &drive.File{
MimeType: fs.MimeType(src), MimeType: srcMimeType,
ModifiedTime: modTime.Format(timeFormatOut), ModifiedTime: modTime.Format(timeFormatOut),
} }
if o.fs.importMimeTypes != nil && !o.fs.opt.SkipGdocs {
importMimeType = o.fs.findImportFormat(updateInfo.MimeType)
if importMimeType != "" {
// FIXME: check importMimeType against original object MIME type
// if importMimeType != o.mimeType {
// return errors.Errorf("can't change google document type (o: %q, src: %q, import: %q)", o.mimeType, srcMimeType, importMimeType)
// }
updateInfo.MimeType = importMimeType
}
}
// Make the API request to upload metadata and file data. // Make the API request to upload metadata and file data.
var err error var err error
var info *drive.File var info *drive.File
@ -2173,7 +2286,7 @@ func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOptio
// Don't retry, return a retry error instead // Don't retry, return a retry error instead
err = o.fs.pacer.CallNoRetry(func() (bool, error) { err = o.fs.pacer.CallNoRetry(func() (bool, error) {
info, err = o.fs.svc.Files.Update(o.id, updateInfo). info, err = o.fs.svc.Files.Update(o.id, updateInfo).
Media(in, googleapi.ContentType("")). Media(in, googleapi.ContentType(srcMimeType)).
Fields(googleapi.Field(partialFields)). Fields(googleapi.Field(partialFields)).
SupportsTeamDrives(o.fs.isTeamDrive). SupportsTeamDrives(o.fs.isTeamDrive).
KeepRevisionForever(o.fs.opt.KeepRevisionForever). KeepRevisionForever(o.fs.opt.KeepRevisionForever).
@ -2185,20 +2298,22 @@ func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOptio
} }
} else { } else {
// Upload the file in chunks // Upload the file in chunks
info, err = o.fs.Upload(in, size, updateInfo.MimeType, o.id, updateInfo, o.remote) info, err = o.fs.Upload(in, size, srcMimeType, o.id, updateInfo, o.remote)
if err != nil { if err != nil {
return err return err
} }
} }
o.setMetaData(info) o.setMetaData(info)
if importMimeType != "" {
extension, exportMimeType, _ := o.fs.findExportFormatByMimeType(importMimeType)
o.setGdocsMetaData(info, extension, exportMimeType)
}
return nil return nil
} }
// Remove an object // Remove an object
func (o *Object) Remove() error { func (o *Object) Remove() error {
if o.isDocument {
return errors.New("can't delete a google document")
}
var err error var err error
err = o.fs.pacer.Call(func() (bool, error) { err = o.fs.pacer.Call(func() (bool, error) {
if o.fs.opt.UseTrash { if o.fs.opt.UseTrash {

View file

@ -1,64 +1,54 @@
package drive package drive
import ( import (
"bytes"
"encoding/json" "encoding/json"
"io"
"io/ioutil"
"mime" "mime"
"path/filepath"
"testing" "testing"
"google.golang.org/api/drive/v3" _ "github.com/ncw/rclone/backend/local"
"github.com/ncw/rclone/fs"
"github.com/ncw/rclone/fs/operations"
"github.com/ncw/rclone/fstest/fstests"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/api/drive/v3"
) )
const exampleExportFormats = `{ /*
"application/vnd.google-apps.document": [ var additionalMimeTypes = map[string]string{
"application/rtf", "application/vnd.ms-excel.sheet.macroenabled.12": ".xlsm",
"application/vnd.oasis.opendocument.text", "application/vnd.ms-excel.template.macroenabled.12": ".xltm",
"text/html", "application/vnd.ms-powerpoint.presentation.macroenabled.12": ".pptm",
"application/pdf", "application/vnd.ms-powerpoint.slideshow.macroenabled.12": ".ppsm",
"application/epub+zip", "application/vnd.ms-powerpoint.template.macroenabled.12": ".potm",
"application/zip", "application/vnd.ms-powerpoint": ".ppt",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.ms-word.document.macroenabled.12": ".docm",
"text/plain" "application/vnd.ms-word.template.macroenabled.12": ".dotm",
], "application/vnd.openxmlformats-officedocument.presentationml.template": ".potx",
"application/vnd.google-apps.spreadsheet": [ "application/vnd.openxmlformats-officedocument.spreadsheetml.template": ".xltx",
"application/x-vnd.oasis.opendocument.spreadsheet", "application/vnd.openxmlformats-officedocument.wordprocessingml.template": ".dotx",
"text/tab-separated-values", "application/vnd.sun.xml.writer": ".sxw",
"application/pdf", "text/richtext": ".rtf",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", }
"text/csv", */
"application/zip",
"application/vnd.oasis.opendocument.spreadsheet"
],
"application/vnd.google-apps.jam": [
"application/pdf"
],
"application/vnd.google-apps.script": [
"application/vnd.google-apps.script+json"
],
"application/vnd.google-apps.presentation": [
"application/vnd.oasis.opendocument.presentation",
"application/pdf",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"text/plain"
],
"application/vnd.google-apps.form": [
"application/zip"
],
"application/vnd.google-apps.drawing": [
"image/svg+xml",
"image/png",
"application/pdf",
"image/jpeg"
]
}`
// Load the example export formats into exportFormats for testing // Load the example export formats into exportFormats for testing
func TestInternalLoadExampleExportFormats(t *testing.T) { func TestInternalLoadExampleFormats(t *testing.T) {
exportFormatsOnce.Do(func() {}) fetchFormatsOnce.Do(func() {})
assert.NoError(t, json.Unmarshal([]byte(exampleExportFormats), &_exportFormats)) buf, err := ioutil.ReadFile(filepath.FromSlash("test/about.json"))
_exportFormats = fixMimeTypeMap(_exportFormats) var about struct {
ExportFormats map[string][]string `json:"exportFormats,omitempty"`
ImportFormats map[string][]string `json:"importFormats,omitempty"`
}
require.NoError(t, err)
require.NoError(t, json.Unmarshal(buf, &about))
_exportFormats = fixMimeTypeMap(about.ExportFormats)
_importFormats = fixMimeTypeMap(about.ImportFormats)
} }
func TestInternalParseExtensions(t *testing.T) { func TestInternalParseExtensions(t *testing.T) {
@ -72,7 +62,7 @@ func TestInternalParseExtensions(t *testing.T) {
{"docx,svg,Docx", []string{".docx", ".svg"}, nil}, {"docx,svg,Docx", []string{".docx", ".svg"}, nil},
{"docx,potato,docx", []string{".docx"}, errors.New(`couldn't find MIME type for extension ".potato"`)}, {"docx,potato,docx", []string{".docx"}, errors.New(`couldn't find MIME type for extension ".potato"`)},
} { } {
extensions, gotErr := parseExtensions(test.in) extensions, _, gotErr := parseExtensions(test.in)
if test.wantErr == nil { if test.wantErr == nil {
assert.NoError(t, gotErr) assert.NoError(t, gotErr)
} else { } else {
@ -82,7 +72,7 @@ func TestInternalParseExtensions(t *testing.T) {
} }
// Test it is appending // Test it is appending
extensions, gotErr := parseExtensions("docx,svg", "docx,svg,xlsx") extensions, _, gotErr := parseExtensions("docx,svg", "docx,svg,xlsx")
assert.NoError(t, gotErr) assert.NoError(t, gotErr)
assert.Equal(t, []string{".docx", ".svg", ".xlsx"}, extensions) assert.Equal(t, []string{".docx", ".svg", ".xlsx"}, extensions)
} }
@ -104,11 +94,11 @@ func TestInternalFindExportFormat(t *testing.T) {
{[]string{".xls", ".csv", ".svg"}, "", ""}, {[]string{".xls", ".csv", ".svg"}, "", ""},
} { } {
f := new(Fs) f := new(Fs)
f.extensions = test.extensions f.exportExtensions = test.extensions
gotExtension, gotFilename, gotMimeType, gotIsDocument := f.findExportFormat(item) gotExtension, gotFilename, gotMimeType, gotIsDocument := f.findExportFormat(item)
assert.Equal(t, test.wantExtension, gotExtension) assert.Equal(t, test.wantExtension, gotExtension)
if test.wantExtension != "" { if test.wantExtension != "" {
assert.Equal(t, item.Name+"."+gotExtension, gotFilename) assert.Equal(t, item.Name+gotExtension, gotFilename)
} else { } else {
assert.Equal(t, "", gotFilename) assert.Equal(t, "", gotFilename)
} }
@ -148,3 +138,85 @@ func TestExtensionsForExportFormats(t *testing.T) {
} }
} }
} }
func TestExtensionsForImportFormats(t *testing.T) {
t.Skip()
if _importFormats == nil {
t.Error("_importFormats == nil")
}
for fromMT := range _importFormats {
if !isInternalMimeType(fromMT) {
extensions, err := mime.ExtensionsByType(fromMT)
assert.NoError(t, err, "invalid MIME type %q", fromMT)
assert.NotEmpty(t, extensions, "No extension found for %q", fromMT)
}
}
}
func (f *Fs) InternalTestDocumentImport(t *testing.T) {
oldAllow := f.opt.AllowImportNameChange
f.opt.AllowImportNameChange = true
defer func() {
f.opt.AllowImportNameChange = oldAllow
}()
testFilesPath, err := filepath.Abs(filepath.FromSlash("test/files"))
require.NoError(t, err)
testFilesFs, err := fs.NewFs(testFilesPath)
require.NoError(t, err)
_, f.importMimeTypes, err = parseExtensions("odt,ods,doc")
require.NoError(t, err)
err = operations.CopyFile(f, testFilesFs, "example2.doc", "example2.doc")
require.NoError(t, err)
}
func (f *Fs) InternalTestDocumentUpdate(t *testing.T) {
testFilesPath, err := filepath.Abs(filepath.FromSlash("test/files"))
require.NoError(t, err)
testFilesFs, err := fs.NewFs(testFilesPath)
require.NoError(t, err)
_, f.importMimeTypes, err = parseExtensions("odt,ods,doc")
require.NoError(t, err)
err = operations.CopyFile(f, testFilesFs, "example2.xlsx", "example1.ods")
require.NoError(t, err)
}
func (f *Fs) InternalTestDocumentExport(t *testing.T) {
var buf bytes.Buffer
var err error
f.exportExtensions, _, err = parseExtensions("txt")
require.NoError(t, err)
obj, err := f.NewObject("example2.txt")
require.NoError(t, err)
rc, err := obj.Open()
require.NoError(t, err)
defer func() { require.NoError(t, rc.Close()) }()
_, err = io.Copy(&buf, rc)
require.NoError(t, err)
text := buf.String()
for _, excerpt := range []string{
"Lorem ipsum dolor sit amet, consectetur",
"porta at ultrices in, consectetur at augue.",
} {
require.Contains(t, text, excerpt)
}
}
func (f *Fs) InternalTest(t *testing.T) {
t.Run("DocumentImport", f.InternalTestDocumentImport)
t.Run("DocumentUpdate", f.InternalTestDocumentUpdate)
t.Run("DocumentExport", f.InternalTestDocumentExport)
}
var _ fstests.InternalTester = (*Fs)(nil)

View file

@ -0,0 +1,178 @@
{
"importFormats": {
"text/tab-separated-values": [
"application/vnd.google-apps.spreadsheet"
],
"application/x-vnd.oasis.opendocument.presentation": [
"application/vnd.google-apps.presentation"
],
"image/jpeg": [
"application/vnd.google-apps.document"
],
"image/bmp": [
"application/vnd.google-apps.document"
],
"image/gif": [
"application/vnd.google-apps.document"
],
"application/vnd.ms-excel.sheet.macroenabled.12": [
"application/vnd.google-apps.spreadsheet"
],
"application/vnd.openxmlformats-officedocument.wordprocessingml.template": [
"application/vnd.google-apps.document"
],
"application/vnd.ms-powerpoint.presentation.macroenabled.12": [
"application/vnd.google-apps.presentation"
],
"application/vnd.ms-word.template.macroenabled.12": [
"application/vnd.google-apps.document"
],
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [
"application/vnd.google-apps.document"
],
"image/pjpeg": [
"application/vnd.google-apps.document"
],
"application/vnd.google-apps.script+text/plain": [
"application/vnd.google-apps.script"
],
"application/vnd.ms-excel": [
"application/vnd.google-apps.spreadsheet"
],
"application/vnd.sun.xml.writer": [
"application/vnd.google-apps.document"
],
"application/vnd.ms-word.document.macroenabled.12": [
"application/vnd.google-apps.document"
],
"application/vnd.ms-powerpoint.slideshow.macroenabled.12": [
"application/vnd.google-apps.presentation"
],
"text/rtf": [
"application/vnd.google-apps.document"
],
"text/plain": [
"application/vnd.google-apps.document"
],
"application/vnd.oasis.opendocument.spreadsheet": [
"application/vnd.google-apps.spreadsheet"
],
"application/x-vnd.oasis.opendocument.spreadsheet": [
"application/vnd.google-apps.spreadsheet"
],
"image/png": [
"application/vnd.google-apps.document"
],
"application/x-vnd.oasis.opendocument.text": [
"application/vnd.google-apps.document"
],
"application/msword": [
"application/vnd.google-apps.document"
],
"application/pdf": [
"application/vnd.google-apps.document"
],
"application/json": [
"application/vnd.google-apps.script"
],
"application/x-msmetafile": [
"application/vnd.google-apps.drawing"
],
"application/vnd.openxmlformats-officedocument.spreadsheetml.template": [
"application/vnd.google-apps.spreadsheet"
],
"application/vnd.ms-powerpoint": [
"application/vnd.google-apps.presentation"
],
"application/vnd.ms-excel.template.macroenabled.12": [
"application/vnd.google-apps.spreadsheet"
],
"image/x-bmp": [
"application/vnd.google-apps.document"
],
"application/rtf": [
"application/vnd.google-apps.document"
],
"application/vnd.openxmlformats-officedocument.presentationml.template": [
"application/vnd.google-apps.presentation"
],
"image/x-png": [
"application/vnd.google-apps.document"
],
"text/html": [
"application/vnd.google-apps.document"
],
"application/vnd.oasis.opendocument.text": [
"application/vnd.google-apps.document"
],
"application/vnd.openxmlformats-officedocument.presentationml.presentation": [
"application/vnd.google-apps.presentation"
],
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": [
"application/vnd.google-apps.spreadsheet"
],
"application/vnd.google-apps.script+json": [
"application/vnd.google-apps.script"
],
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": [
"application/vnd.google-apps.presentation"
],
"application/vnd.ms-powerpoint.template.macroenabled.12": [
"application/vnd.google-apps.presentation"
],
"text/csv": [
"application/vnd.google-apps.spreadsheet"
],
"application/vnd.oasis.opendocument.presentation": [
"application/vnd.google-apps.presentation"
],
"image/jpg": [
"application/vnd.google-apps.document"
],
"text/richtext": [
"application/vnd.google-apps.document"
]
},
"exportFormats": {
"application/vnd.google-apps.document": [
"application/rtf",
"application/vnd.oasis.opendocument.text",
"text/html",
"application/pdf",
"application/epub+zip",
"application/zip",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"text/plain"
],
"application/vnd.google-apps.spreadsheet": [
"application/x-vnd.oasis.opendocument.spreadsheet",
"text/tab-separated-values",
"application/pdf",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"text/csv",
"application/zip",
"application/vnd.oasis.opendocument.spreadsheet"
],
"application/vnd.google-apps.jam": [
"application/pdf"
],
"application/vnd.google-apps.script": [
"application/vnd.google-apps.script+json"
],
"application/vnd.google-apps.presentation": [
"application/vnd.oasis.opendocument.presentation",
"application/pdf",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"text/plain"
],
"application/vnd.google-apps.form": [
"application/zip"
],
"application/vnd.google-apps.drawing": [
"image/svg+xml",
"image/png",
"application/pdf",
"image/jpeg"
]
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -414,34 +414,69 @@ is buffered in memory one per transfer.
Reducing this will reduce memory usage but decrease performance. Reducing this will reduce memory usage but decrease performance.
#### --drive-formats #### #### --drive-export-formats / --drive-import-formats ####
Google documents can only be exported from Google drive. When rclone Google documents can be exported from and uploaded to Google Drive.
downloads a Google doc it chooses a format to download depending upon
this setting.
By default the formats are `docx,xlsx,pptx,svg` which are a sensible When rclone downloads a Google doc it chooses a format to download
default for an editable document. depending upon the `--drive-export-formats` setting.
By default the export formats are `docx,xlsx,pptx,svg` which are a
sensible default for an editable document.
When choosing a format, rclone runs down the list provided in order When choosing a format, rclone runs down the list provided in order
and chooses the first file format the doc can be exported as from the and chooses the first file format the doc can be exported as from the
list. If the file can't be exported to a format on the formats list, list. If the file can't be exported to a format on the formats list,
then rclone will choose a format from the default list. then rclone will choose a format from the default list.
If you prefer an archive copy then you might use `--drive-formats If you prefer an archive copy then you might use `--drive-export-formats
pdf`, or if you prefer openoffice/libreoffice formats you might use pdf`, or if you prefer openoffice/libreoffice formats you might use
`--drive-formats ods,odt,odp`. `--drive-export-formats ods,odt,odp`.
Note that rclone adds the extension to the google doc, so if it is Note that rclone adds the extension to the google doc, so if it is
calles `My Spreadsheet` on google docs, it will be exported as `My calles `My Spreadsheet` on google docs, it will be exported as `My
Spreadsheet.xlsx` or `My Spreadsheet.pdf` etc. Spreadsheet.xlsx` or `My Spreadsheet.pdf` etc.
Here are the possible extensions with their corresponding mime types. When importing files into Google Drive, rclone will conververt all
files with an extension in `--drive-import-formats` to their
associated document type.
rclone will not convert any files by default, since the conversion
is lossy process.
The conversion must result in a file with the same extension when
the `--drive-export-formats` rules are applied to the uploded document.
Here are some examples for allowed and prohibited conversions.
| export-formats | import-formats | Upload Ext | Document Ext | Allowed |
| -------------- | -------------- | ---------- | ------------ | ------- |
| odt | odt | odt | odt | Yes |
| odt | docx,odt | odt | odt | Yes |
| | docx | docx | docx | Yes |
| | odt | odt | docx | No |
| odt,docx | docx,odt | docx | odt | No |
| docx,odt | docx,odt | docx | docx | Yes |
| docx,odt | docx,odt | odt | docx | No |
This limitation can be disabled by specifying `--drive-allow-import-name-change`.
When using this flag, rclone can convert multiple files types resulting
in the same document type at once, eg with `--drive-import-formats docx,odt,txt`,
all files having these extension would result in a doument represented as a docx file.
This brings the additional risk of overwriting a document, if multiple files
have the same stem. Many rclone operations will not handle this name change
in any way. They assume an equal name when copying files and might copy the
file again or delete them when the name changes.
Here are the possible export extensions with their corresponding mime types.
Most of these can also be used for importing, but there more that are not
listed here. Some of these additional ones might only be available when
the operating system provides the correct MIME type entries.
This list can be changed by Google Drive at any time and might not
represent the currently available converions.
| Extension | Mime Type | Description | | Extension | Mime Type | Description |
| --------- |-----------| ------------| | --------- |-----------| ------------|
| csv | text/csv | Standard CSV format for Spreadsheets | | csv | text/csv | Standard CSV format for Spreadsheets |
| doc | application/msword | Micosoft Office Document |
| docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document | Microsoft Office Document | | docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document | Microsoft Office Document |
| epub | application/epub+zip | E-book format | | epub | application/epub+zip | E-book format |
| html | text/html | An HTML Document | | html | text/html | An HTML Document |
@ -457,7 +492,6 @@ Here are the possible extensions with their corresponding mime types.
| svg | image/svg+xml | Scalable Vector Graphics Format | | svg | image/svg+xml | Scalable Vector Graphics Format |
| tsv | text/tab-separated-values | Standard TSV format for spreadsheets | | tsv | text/tab-separated-values | Standard TSV format for spreadsheets |
| txt | text/plain | Plain Text | | txt | text/plain | Plain Text |
| xls | application/vnd.ms-excel | Microsoft Office Spreadsheet |
| xlsx | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | Microsoft Office Spreadsheet | | xlsx | application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | Microsoft Office Spreadsheet |
| zip | application/zip | A ZIP file of HTML, Images CSS | | zip | application/zip | A ZIP file of HTML, Images CSS |