fspath: Implement a connection string parser #4996
This is implemented as a state machine parser so it can emit sensible error messages. It does not use the connection strings elsewhere in rclone yet - see subsequent commits. An optional fuzzer is implemented for the Parse function.
This commit is contained in:
parent
3b21857097
commit
8a46dd1b57
8 changed files with 613 additions and 70 deletions
|
@ -8,28 +8,37 @@ import (
|
|||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/rclone/rclone/fs/config/configmap"
|
||||
"github.com/rclone/rclone/fs/driveletter"
|
||||
)
|
||||
|
||||
const (
|
||||
configNameRe = `[\w_ -]+`
|
||||
remoteNameRe = `^(:?` + configNameRe + `):`
|
||||
remoteNameRe = `^(:?` + configNameRe + `)`
|
||||
)
|
||||
|
||||
var (
|
||||
errInvalidCharacters = errors.New("config name contains invalid characters - may only contain 0-9, A-Z ,a-z ,_ , - and space")
|
||||
errInvalidCharacters = errors.New("config name contains invalid characters - may only contain `0-9`, `A-Z`, `a-z`, `_`, `-` and space")
|
||||
errCantBeEmpty = errors.New("can't use empty string as a path")
|
||||
errCantStartWithDash = errors.New("config name starts with -")
|
||||
|
||||
// urlMatcher is a pattern to match an rclone URL
|
||||
// note that this matches invalid remoteNames
|
||||
urlMatcher = regexp.MustCompile(`^(:?[^\\/:]*):(.*)$`)
|
||||
errCantStartWithDash = errors.New("config name starts with `-`")
|
||||
errBadConfigParam = errors.New("config parameters may only contain `0-9`, `A-Z`, `a-z` and `_`")
|
||||
errEmptyConfigParam = errors.New("config parameters can't be empty")
|
||||
errConfigNameEmpty = errors.New("config name can't be empty")
|
||||
errConfigName = errors.New("config name needs a trailing `:`")
|
||||
errParam = errors.New("config parameter must end with `,` or `:`")
|
||||
errValue = errors.New("unquoted config value must end with `,` or `:`")
|
||||
errQuotedValue = errors.New("unterminated quoted config value")
|
||||
errAfterQuote = errors.New("expecting `:` or `,` or another quote after a quote")
|
||||
errSyntax = errors.New("syntax error in config string")
|
||||
|
||||
// configNameMatcher is a pattern to match an rclone config name
|
||||
configNameMatcher = regexp.MustCompile(`^` + configNameRe + `$`)
|
||||
|
||||
// remoteNameMatcher is a pattern to match an rclone remote name
|
||||
remoteNameMatcher = regexp.MustCompile(remoteNameRe + `$`)
|
||||
// remoteNameMatcher is a pattern to match an rclone remote name at the start of a config
|
||||
remoteNameMatcher = regexp.MustCompile(`^` + remoteNameRe + `(:$|,)`)
|
||||
|
||||
// Function to check if string is a drive letter to be overriden in the tests
|
||||
isDriveLetter = driveletter.IsDriveLetter
|
||||
)
|
||||
|
||||
// CheckConfigName returns an error if configName is invalid
|
||||
|
@ -44,41 +53,210 @@ func CheckConfigName(configName string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// CheckRemoteName returns an error if remoteName is invalid
|
||||
func CheckRemoteName(remoteName string) error {
|
||||
// checkRemoteName returns an error if remoteName is invalid
|
||||
func checkRemoteName(remoteName string) error {
|
||||
if remoteName == ":" || remoteName == "::" {
|
||||
return errConfigNameEmpty
|
||||
}
|
||||
if !remoteNameMatcher.MatchString(remoteName) {
|
||||
return errInvalidCharacters
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse deconstructs a remote path into configName and fsPath
|
||||
// Return true if c is a valid character for a config parameter
|
||||
func isConfigParam(c rune) bool {
|
||||
return ((c >= 'a' && c <= 'z') ||
|
||||
(c >= 'A' && c <= 'Z') ||
|
||||
(c >= '0' && c <= '9') ||
|
||||
c == '_')
|
||||
}
|
||||
|
||||
// Parsed is returned from Parse with the results of the connection string decomposition
|
||||
//
|
||||
// If the path is a local path then configName will be returned as "".
|
||||
// If Name is "" then it is a local path in Path
|
||||
//
|
||||
// So "remote:path/to/dir" will return "remote", "path/to/dir"
|
||||
// and "/path/to/local" will return ("", "/path/to/local")
|
||||
// Note that ConfigString + ":" + Path is equal to the input of Parse except that Path may have had
|
||||
// \ converted to /
|
||||
type Parsed struct {
|
||||
Name string // Just the name of the config: "remote" or ":backend"
|
||||
ConfigString string // The whole config string: "remote:" or ":backend,value=6:"
|
||||
Path string // The file system path, may be empty
|
||||
Config configmap.Simple // key/value config parsed out of ConfigString may be nil
|
||||
}
|
||||
|
||||
// Parse deconstructs a path into a Parsed structure
|
||||
//
|
||||
// If the path is a local path then parsed.Name will be returned as "".
|
||||
//
|
||||
// So "remote:path/to/dir" will return Parsed{Name:"remote", Path:"path/to/dir"},
|
||||
// and "/path/to/local" will return Parsed{Name:"", Path:"/path/to/local"}
|
||||
//
|
||||
// Note that this will turn \ into / in the fsPath on Windows
|
||||
//
|
||||
// An error may be returned if the remote name has invalid characters
|
||||
// in it or if the path is empty.
|
||||
func Parse(path string) (configName, fsPath string, err error) {
|
||||
// An error may be returned if the remote name has invalid characters or the
|
||||
// parameters are invalid or the path is empty.
|
||||
func Parse(path string) (parsed Parsed, err error) {
|
||||
parsed.Path = path
|
||||
if path == "" {
|
||||
return "", "", errCantBeEmpty
|
||||
return parsed, errCantBeEmpty
|
||||
}
|
||||
parts := urlMatcher.FindStringSubmatch(path)
|
||||
configName, fsPath = "", path
|
||||
if parts != nil && !driveletter.IsDriveLetter(parts[1]) {
|
||||
configName, fsPath = parts[1], parts[2]
|
||||
err = CheckRemoteName(configName + ":")
|
||||
if err != nil {
|
||||
return configName, fsPath, errInvalidCharacters
|
||||
// If path has no `:` in, it must be a local path
|
||||
if strings.IndexRune(path, ':') < 0 {
|
||||
return parsed, nil
|
||||
}
|
||||
// States for parser
|
||||
const (
|
||||
stateConfigName = uint8(iota)
|
||||
stateParam
|
||||
stateValue
|
||||
stateQuotedValue
|
||||
stateAfterQuote
|
||||
stateDone
|
||||
)
|
||||
var (
|
||||
state = stateConfigName // current state of parser
|
||||
i int // position in path
|
||||
prev int // previous position in path
|
||||
c rune // current rune under consideration
|
||||
quote rune // kind of quote to end this quoted string
|
||||
param string // current parameter value
|
||||
doubled bool // set if had doubled quotes
|
||||
)
|
||||
loop:
|
||||
for i, c = range path {
|
||||
// Example Parse
|
||||
// remote,param=value,param2="qvalue":/path/to/file
|
||||
switch state {
|
||||
// Parses "remote,"
|
||||
case stateConfigName:
|
||||
if i == 0 && c == ':' {
|
||||
continue
|
||||
} else if c == '/' || c == '\\' {
|
||||
// `:` or `,` not before a path separator must be a local path,
|
||||
// except if the path started with `:` in which case it was intended
|
||||
// to be an on the fly remote so return an error.
|
||||
if path[0] == ':' {
|
||||
return parsed, errInvalidCharacters
|
||||
}
|
||||
return parsed, nil
|
||||
} else if c == ':' || c == ',' {
|
||||
parsed.Name = path[:i]
|
||||
err := checkRemoteName(parsed.Name + ":")
|
||||
if err != nil {
|
||||
return parsed, err
|
||||
}
|
||||
prev = i + 1
|
||||
if c == ':' {
|
||||
// If we parsed a drive letter, must be a local path
|
||||
if isDriveLetter(parsed.Name) {
|
||||
parsed.Name = ""
|
||||
return parsed, nil
|
||||
}
|
||||
state = stateDone
|
||||
break loop
|
||||
}
|
||||
state = stateParam
|
||||
parsed.Config = make(configmap.Simple)
|
||||
}
|
||||
// Parses param= and param2=
|
||||
case stateParam:
|
||||
if c == ':' || c == ',' || c == '=' {
|
||||
param = path[prev:i]
|
||||
if len(param) == 0 {
|
||||
return parsed, errEmptyConfigParam
|
||||
}
|
||||
prev = i + 1
|
||||
if c == '=' {
|
||||
state = stateValue
|
||||
break
|
||||
}
|
||||
parsed.Config[param] = "true"
|
||||
if c == ':' {
|
||||
state = stateDone
|
||||
break loop
|
||||
}
|
||||
state = stateParam
|
||||
} else if !isConfigParam(c) {
|
||||
return parsed, errBadConfigParam
|
||||
}
|
||||
// Parses value
|
||||
case stateValue:
|
||||
if c == '\'' || c == '"' {
|
||||
if i == prev {
|
||||
quote = c
|
||||
state = stateQuotedValue
|
||||
prev = i + 1
|
||||
doubled = false
|
||||
break
|
||||
}
|
||||
} else if c == ':' || c == ',' {
|
||||
value := path[prev:i]
|
||||
prev = i + 1
|
||||
parsed.Config[param] = value
|
||||
if c == ':' {
|
||||
state = stateDone
|
||||
break loop
|
||||
}
|
||||
state = stateParam
|
||||
}
|
||||
// Parses "qvalue"
|
||||
case stateQuotedValue:
|
||||
if c == quote {
|
||||
state = stateAfterQuote
|
||||
}
|
||||
// Parses : or , or quote after "qvalue"
|
||||
case stateAfterQuote:
|
||||
if c == ':' || c == ',' {
|
||||
value := path[prev : i-1]
|
||||
// replace any doubled quotes if there were any
|
||||
if doubled {
|
||||
value = strings.Replace(value, string(quote)+string(quote), string(quote), -1)
|
||||
}
|
||||
prev = i + 1
|
||||
parsed.Config[param] = value
|
||||
if c == ':' {
|
||||
state = stateDone
|
||||
break loop
|
||||
} else {
|
||||
state = stateParam
|
||||
}
|
||||
} else if c == quote {
|
||||
// Here is a doubled quote to indicate a literal quote
|
||||
state = stateQuotedValue
|
||||
doubled = true
|
||||
} else {
|
||||
return parsed, errAfterQuote
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Depending on which state we were in when we fell off the
|
||||
// end of the state machine we can return a sensible error.
|
||||
switch state {
|
||||
default:
|
||||
return parsed, errSyntax
|
||||
case stateConfigName:
|
||||
return parsed, errConfigName
|
||||
case stateParam:
|
||||
return parsed, errParam
|
||||
case stateValue:
|
||||
return parsed, errValue
|
||||
case stateQuotedValue:
|
||||
return parsed, errQuotedValue
|
||||
case stateAfterQuote:
|
||||
return parsed, errAfterQuote
|
||||
case stateDone:
|
||||
break
|
||||
}
|
||||
|
||||
parsed.ConfigString = path[:i]
|
||||
parsed.Path = path[i+1:]
|
||||
|
||||
// change native directory separators to / if there are any
|
||||
fsPath = filepath.ToSlash(fsPath)
|
||||
return configName, fsPath, nil
|
||||
parsed.Path = filepath.ToSlash(parsed.Path)
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
// Split splits a remote into a parent and a leaf
|
||||
|
@ -90,10 +268,11 @@ func Parse(path string) (configName, fsPath string, err error) {
|
|||
// The returned values have the property that parent + leaf == remote
|
||||
// (except under Windows where \ will be translated into /)
|
||||
func Split(remote string) (parent string, leaf string, err error) {
|
||||
remoteName, remotePath, err := Parse(remote)
|
||||
parsed, err := Parse(remote)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
remoteName, remotePath := parsed.ConfigString, parsed.Path
|
||||
if remoteName != "" {
|
||||
remoteName += ":"
|
||||
}
|
||||
|
@ -130,7 +309,8 @@ func JoinRootPath(remote, filePath string) string {
|
|||
if strings.HasPrefix(remote, "//") {
|
||||
return "/" + path.Join(remote, filePath)
|
||||
}
|
||||
remoteName, remotePath, err := Parse(remote)
|
||||
parsed, err := Parse(remote)
|
||||
remoteName, remotePath := parsed.ConfigString, parsed.Path
|
||||
if err != nil {
|
||||
// Couldn't parse so assume it is a path
|
||||
remoteName = ""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue