rclone/fs/encodings/encodings.go

454 lines
14 KiB
Go
Raw Normal View History

// +build !noencode
package encodings
import (
"runtime"
"strings"
"github.com/rclone/rclone/lib/encoder"
)
// Base only encodes the zero byte and slash
const Base = encoder.MultiEncoder(
encoder.EncodeZero |
encoder.EncodeSlash |
encoder.EncodeDot)
// Display is the internal encoding for logging and output
const Display = encoder.Standard
// LocalUnix is the encoding used by the local backend for non windows platforms
const LocalUnix = Base
2019-07-27 18:01:52 +00:00
// LocalMacOS is the encoding used by the local backend for macOS
//
// macOS can't store invalid UTF-8, it converts them into %XX encoding
const LocalMacOS = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeInvalidUtf8)
// LocalWindows is the encoding used by the local backend for windows platforms
//
// List of replaced characters:
// < (less than) -> '<' // FULLWIDTH LESS-THAN SIGN
// > (greater than) -> '>' // FULLWIDTH GREATER-THAN SIGN
// : (colon) -> ':' // FULLWIDTH COLON
// " (double quote) -> '"' // FULLWIDTH QUOTATION MARK
// \ (backslash) -> '\' // FULLWIDTH REVERSE SOLIDUS
// | (vertical line) -> '|' // FULLWIDTH VERTICAL LINE
// ? (question mark) -> '?' // FULLWIDTH QUESTION MARK
// * (asterisk) -> '*' // FULLWIDTH ASTERISK
//
// Additionally names can't end with a period (.) or space ( ).
// List of replaced characters:
// . (period) -> '.' // FULLWIDTH FULL STOP
// (space) -> '␠' // SYMBOL FOR SPACE
//
// Also encode invalid UTF-8 bytes as Go can't convert them to UTF-16.
//
// https://docs.microsoft.com/de-de/windows/desktop/FileIO/naming-a-file#naming-conventions
const LocalWindows = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeWin |
encoder.EncodeBackSlash |
encoder.EncodeCtl |
encoder.EncodeRightSpace |
encoder.EncodeRightPeriod |
encoder.EncodeInvalidUtf8)
// AmazonCloudDrive is the encoding used by the amazonclouddrive backend
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
const AmazonCloudDrive = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeInvalidUtf8)
// B2 is the encoding used by the b2 backend
//
// See: https://www.backblaze.com/b2/docs/files.html
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
// FIXME: allow /, but not leading, trailing or double
const B2 = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeInvalidUtf8)
// Box is the encoding used by the box backend
//
// From https://developer.box.com/docs/error-codes#section-400-bad-request :
// > Box only supports file or folder names that are 255 characters or less.
// > File names containing non-printable ascii, "/" or "\", names with leading
// > or trailing spaces, and the special names “.” and “..” are also unsupported.
//
// Testing revealed names with leading spaces work fine.
// Also encode invalid UTF-8 bytes as json doesn't handle them properly.
const Box = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeRightSpace |
encoder.EncodeInvalidUtf8)
// Drive is the encoding used by the drive backend
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
// Don't encode / as it's a valid name character in drive.
const Drive = encoder.MultiEncoder(
encoder.EncodeInvalidUtf8)
// Dropbox is the encoding used by the dropbox backend
//
// https://www.dropbox.com/help/syncing-uploads/files-not-syncing lists / and \
// as invalid characters.
// Testing revealed names with trailing spaces and the DEL character don't work.
// Also encode invalid UTF-8 bytes as json doesn't handle them properly.
const Dropbox = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeBackSlash |
encoder.EncodeDel |
encoder.EncodeRightSpace |
encoder.EncodeInvalidUtf8)
// GoogleCloudStorage is the encoding used by the googlecloudstorage backend
const GoogleCloudStorage = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeCrLf |
encoder.EncodeInvalidUtf8)
// JottaCloud is the encoding used by the jottacloud backend
//
// Encode invalid UTF-8 bytes as xml doesn't handle them properly.
2018-11-02 12:13:47 +00:00
//
// Also: '*', '/', ':', '<', '>', '?', '\"', '\x00', '|'
const JottaCloud = encoder.MultiEncoder(
uint(Display) |
2018-11-02 12:13:47 +00:00
encoder.EncodeWin | // :?"*<>|
encoder.EncodeInvalidUtf8)
// Koofr is the encoding used by the koofr backend
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
const Koofr = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeInvalidUtf8)
2019-10-02 18:36:25 +00:00
// Mailru is the encoding used by the mailru backend
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
const Mailru = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeWin | // :?"*<>|
encoder.EncodeBackSlash |
encoder.EncodeInvalidUtf8)
// Mega is the encoding used by the mega backend
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
const Mega = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeInvalidUtf8)
// OneDrive is the encoding used by the onedrive backend
//
// List of replaced characters:
// < (less than) -> '<' // FULLWIDTH LESS-THAN SIGN
// > (greater than) -> '>' // FULLWIDTH GREATER-THAN SIGN
// : (colon) -> ':' // FULLWIDTH COLON
// " (double quote) -> '"' // FULLWIDTH QUOTATION MARK
// \ (backslash) -> '\' // FULLWIDTH REVERSE SOLIDUS
// | (vertical line) -> '|' // FULLWIDTH VERTICAL LINE
// ? (question mark) -> '?' // FULLWIDTH QUESTION MARK
// * (asterisk) -> '*' // FULLWIDTH ASTERISK
// # (number sign) -> '#' // FULLWIDTH NUMBER SIGN
// % (percent sign) -> '%' // FULLWIDTH PERCENT SIGN
//
// Folder names cannot begin with a tilde ('~')
// List of replaced characters:
// ~ (tilde) -> '~' // FULLWIDTH TILDE
//
// Additionally names can't begin with a space ( ) or end with a period (.) or space ( ).
// List of replaced characters:
// . (period) -> '.' // FULLWIDTH FULL STOP
// (space) -> '␠' // SYMBOL FOR SPACE
//
// Also encode invalid UTF-8 bytes as json doesn't handle them.
//
// The OneDrive API documentation lists the set of reserved characters, but
// testing showed this list is incomplete. This are the differences:
// - " (double quote) is rejected, but missing in the documentation
// - space at the end of file and folder names is rejected, but missing in the documentation
// - period at the end of file names is rejected, but missing in the documentation
//
// Adding these restrictions to the OneDrive API documentation yields exactly
// the same rules as the Windows naming conventions.
//
// https://docs.microsoft.com/en-us/onedrive/developer/rest-api/concepts/addressing-driveitems?view=odsp-graph-online#path-encoding
const OneDrive = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeHashPercent |
encoder.EncodeLeftSpace |
encoder.EncodeLeftTilde |
encoder.EncodeRightPeriod |
encoder.EncodeRightSpace |
encoder.EncodeWin |
encoder.EncodeInvalidUtf8)
// OpenDrive is the encoding used by the opendrive backend
//
// List of replaced characters:
// < (less than) -> '<' // FULLWIDTH LESS-THAN SIGN
// > (greater than) -> '>' // FULLWIDTH GREATER-THAN SIGN
// : (colon) -> ':' // FULLWIDTH COLON
// " (double quote) -> '"' // FULLWIDTH QUOTATION MARK
// \ (backslash) -> '\' // FULLWIDTH REVERSE SOLIDUS
// | (vertical line) -> '|' // FULLWIDTH VERTICAL LINE
// ? (question mark) -> '?' // FULLWIDTH QUESTION MARK
// * (asterisk) -> '*' // FULLWIDTH ASTERISK
//
// Additionally names can't begin or end with a ASCII whitespace.
// List of replaced characters:
// (space) -> '␠' // SYMBOL FOR SPACE
// (horizontal tab) -> '␉' // SYMBOL FOR HORIZONTAL TABULATION
// (line feed) -> '␊' // SYMBOL FOR LINE FEED
// (vertical tab) -> '␋' // SYMBOL FOR VERTICAL TABULATION
// (carriage return) -> '␍' // SYMBOL FOR CARRIAGE RETURN
//
// Also encode invalid UTF-8 bytes as json doesn't handle them properly.
//
// https://www.opendrive.com/wp-content/uploads/guides/OpenDrive_API_guide.pdf
const OpenDrive = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeWin |
encoder.EncodeLeftCrLfHtVt |
encoder.EncodeRightCrLfHtVt |
encoder.EncodeBackSlash |
encoder.EncodeLeftSpace |
encoder.EncodeRightSpace |
encoder.EncodeInvalidUtf8)
2019-10-02 08:54:36 +00:00
// PremiumizeMe is the encoding used by the premiumizeme backend
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
const PremiumizeMe = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeDoubleQuote |
encoder.EncodeInvalidUtf8)
// Pcloud is the encoding used by the pcloud backend
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
//
// TODO: Investigate Unicode simplification (\ gets converted to \ server-side)
const Pcloud = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeInvalidUtf8)
// Putio is the encoding used by the putio backend
//
// Note that \ is renamed to -
//
// Encode invalid UTF-8 bytes as json doesn't handle them properly.
const Putio = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeInvalidUtf8)
2019-09-23 11:55:49 +00:00
// Fichier is the encoding used by the fichier backend
//
// Characters that need escaping
//
// '\\': '\', // FULLWIDTH REVERSE SOLIDUS
// '<': '<', // FULLWIDTH LESS-THAN SIGN
// '>': '>', // FULLWIDTH GREATER-THAN SIGN
// '"': '"', // FULLWIDTH QUOTATION MARK - not on the list but seems to be reserved
// '\'': ''', // FULLWIDTH APOSTROPHE
// '$': '$', // FULLWIDTH DOLLAR SIGN
// '`': '`', // FULLWIDTH GRAVE ACCENT
//
// Leading space and trailing space
const Fichier = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeBackSlash |
encoder.EncodeSingleQuote |
encoder.EncodeBackQuote |
encoder.EncodeDoubleQuote |
encoder.EncodeLtGt |
encoder.EncodeDollar |
encoder.EncodeLeftSpace |
encoder.EncodeRightSpace |
encoder.EncodeInvalidUtf8)
2019-07-27 15:58:33 +00:00
// FTP is the encoding used by the ftp backend
//
// The FTP protocal can't handle trailing spaces (for instance
// pureftpd turns them into _)
//
// proftpd can't handle '*' in file names
// pureftpd can't handle '[', ']' or '*'
const FTP = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeRightSpace)
// S3 is the encoding used by the s3 backend
//
// Any UTF-8 character is valid in a key, however it can't handle
// invalid UTF-8 and / have a special meaning.
//
// The SDK can't seem to handle uploading files called '.'
//
// FIXME would be nice to add
// - initial / encoding
// - doubled / encoding
// - trailing / encoding
// so that AWS keys are always valid file names
const S3 = encoder.MultiEncoder(
encoder.EncodeInvalidUtf8 |
encoder.EncodeSlash |
encoder.EncodeDot)
2019-09-21 10:47:59 +00:00
// Swift is the encoding used by the swift backend
const Swift = encoder.MultiEncoder(
encoder.EncodeInvalidUtf8 |
encoder.EncodeSlash)
2019-09-21 11:36:16 +00:00
// AzureBlob is the encoding used by the azureblob backend
const AzureBlob = encoder.MultiEncoder(
encoder.EncodeInvalidUtf8 |
encoder.EncodeSlash |
encoder.EncodeCtl |
encoder.EncodeDel |
encoder.EncodeBackSlash |
encoder.EncodeRightPeriod)
2019-09-21 10:47:59 +00:00
2019-09-21 12:24:30 +00:00
// QingStor is the encoding used by the qingstor backend
const QingStor = encoder.MultiEncoder(
encoder.EncodeInvalidUtf8 |
encoder.EncodeCtl |
encoder.EncodeSlash)
2019-09-30 15:41:18 +00:00
// Sharefile is the encoding used by the sharefile backend
const Sharefile = encoder.MultiEncoder(
uint(Base) |
encoder.EncodeWin | // :?"*<>|
encoder.EncodeBackSlash | // \
encoder.EncodeCtl |
encoder.EncodeRightSpace |
encoder.EncodeRightPeriod |
encoder.EncodeLeftSpace |
encoder.EncodeLeftPeriod |
encoder.EncodeInvalidUtf8)
2019-10-05 09:22:43 +00:00
// Yandex is the encoding used by the yandex backend
//
// Of the control characters \t \n \r are allowed
// it doesn't seem worth making an exception for this
const Yandex = encoder.MultiEncoder(
uint(Display) |
encoder.EncodeInvalidUtf8)
// ByName returns the encoder for a give backend name or nil
func ByName(name string) encoder.Encoder {
switch strings.ToLower(name) {
case "base":
return Base
case "display":
return Display
case "amazonclouddrive":
return AmazonCloudDrive
2019-09-21 11:36:16 +00:00
case "azureblob":
return AzureBlob
case "b2":
return B2
case "box":
return Box
//case "cache":
case "drive":
return Drive
case "dropbox":
return Dropbox
//case "ftp":
2019-09-23 11:55:49 +00:00
case "ficher":
return Fichier
case "googlecloudstorage":
return GoogleCloudStorage
//case "http":
//case "hubic":
case "jottacloud":
return JottaCloud
case "koofr":
return Koofr
case "local":
return Local()
case "local-windows", "windows":
return LocalWindows
case "local-unix", "unix":
return LocalUnix
2019-07-27 18:01:52 +00:00
case "local-macos", "macos":
return LocalMacOS
2019-10-02 18:36:25 +00:00
case "mailru":
return Mailru
case "mega":
return Mega
case "onedrive":
return OneDrive
case "opendrive":
return OpenDrive
2019-10-02 08:54:36 +00:00
case "premiumizeme":
return PremiumizeMe
case "pcloud":
return Pcloud
2019-09-21 12:24:30 +00:00
case "qingstor":
return QingStor
2019-09-21 10:47:59 +00:00
case "s3":
return S3
2019-09-30 15:41:18 +00:00
case "sharefile":
return Sharefile
//case "sftp":
2019-09-21 10:47:59 +00:00
case "swift":
return Swift
//case "webdav":
2019-10-05 09:22:43 +00:00
case "yandex":
return Yandex
default:
return nil
}
}
// Local returns the local encoding for the current platform
func Local() encoder.MultiEncoder {
if runtime.GOOS == "windows" {
return LocalWindows
}
return LocalUnix
}
// Names returns the list of known encodings as accepted by ByName
func Names() []string {
return []string{
"base",
"display",
"amazonclouddrive",
"b2",
"box",
"drive",
"dropbox",
"googlecloudstorage",
"jottacloud",
"koofr",
"local-unix",
"local-windows",
2019-07-27 18:01:52 +00:00
"local-macos",
"local",
"mega",
"onedrive",
"opendrive",
2019-10-02 08:54:36 +00:00
"premiumizeme",
"pcloud",
2019-09-30 15:41:18 +00:00
"sharefile",
}
}