rclone/fs/filter/glob.go

257 lines
5.9 KiB
Go

// rsync style glob parser
package filter
import (
"bytes"
"fmt"
"regexp"
"strings"
"github.com/rclone/rclone/fs"
)
// GlobPathToRegexp converts an rsync style glob path to a regexp
func GlobPathToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
return globToRegexp(glob, true, true, ignoreCase)
}
// GlobStringToRegexp converts an rsync style glob string to a regexp
//
// Without adding of anchors but with ignoring of case, i.e. called
// `GlobStringToRegexp(glob, false, true)`, it takes a lenient approach
// where the glob "sum" would match "CheckSum", more similar to text
// search functions than strict glob filtering.
//
// With adding of anchors and not ignoring case, i.e. called
// `GlobStringToRegexp(glob, true, false)`, it uses a strict glob
// interpretation where the previous example would have to be changed to
// "*Sum" to match "CheckSum".
func GlobStringToRegexp(glob string, addAnchors bool, ignoreCase bool) (*regexp.Regexp, error) {
return globToRegexp(glob, false, addAnchors, ignoreCase)
}
// globToRegexp converts an rsync style glob to a regexp
//
// Set pathMode true for matching of path/file names, e.g.
// special treatment of path separator `/` and double asterisk `**`,
// see filtering.md for details.
//
// Set addAnchors true to add start of string `^` and end of string `$` anchors.
func globToRegexp(glob string, pathMode bool, addAnchors bool, ignoreCase bool) (*regexp.Regexp, error) {
var re bytes.Buffer
if ignoreCase {
_, _ = re.WriteString("(?i)")
}
if addAnchors {
if pathMode {
if strings.HasPrefix(glob, "/") {
glob = glob[1:]
_ = re.WriteByte('^')
} else {
_, _ = re.WriteString("(^|/)")
}
} else {
_, _ = re.WriteString("^")
}
}
consecutiveStars := 0
insertStars := func() error {
if consecutiveStars > 0 {
if pathMode {
switch consecutiveStars {
case 1:
_, _ = re.WriteString(`[^/]*`)
case 2:
_, _ = re.WriteString(`.*`)
default:
return fmt.Errorf("too many stars in %q", glob)
}
} else {
switch consecutiveStars {
case 1:
_, _ = re.WriteString(`.*`)
default:
return fmt.Errorf("too many stars in %q", glob)
}
}
}
consecutiveStars = 0
return nil
}
overwriteLastChar := func(c byte) {
buf := re.Bytes()
buf[len(buf)-1] = c
}
inBraces := false
inBrackets := 0
slashed := false
inRegexp := false // inside {{ ... }}
inRegexpEnd := false // have received }} waiting for more
var next, last rune
for _, c := range glob {
next, last = c, next
if slashed {
_, _ = re.WriteRune(c)
slashed = false
continue
}
if inRegexpEnd {
if c == '}' {
// Regexp is ending with }} choose longest segment
// Replace final ) with }
overwriteLastChar('}')
_ = re.WriteByte(')')
continue
} else {
inRegexpEnd = false
}
}
if inRegexp {
if c == '}' && last == '}' {
inRegexp = false
inRegexpEnd = true
// Replace final } with )
overwriteLastChar(')')
} else {
_, _ = re.WriteRune(c)
}
continue
}
if c != '*' {
err := insertStars()
if err != nil {
return nil, err
}
}
if inBrackets > 0 {
_, _ = re.WriteRune(c)
if c == '[' {
inBrackets++
}
if c == ']' {
inBrackets--
}
continue
}
switch c {
case '\\':
_, _ = re.WriteRune(c)
slashed = true
case '*':
consecutiveStars++
case '?':
if pathMode {
_, _ = re.WriteString(`[^/]`)
} else {
_, _ = re.WriteString(`.`)
}
case '[':
_, _ = re.WriteRune(c)
inBrackets++
case ']':
return nil, fmt.Errorf("mismatched ']' in glob %q", glob)
case '{':
if inBraces {
if last == '{' {
inRegexp = true
inBraces = false
} else {
return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob)
}
} else {
inBraces = true
_ = re.WriteByte('(')
}
case '}':
if !inBraces {
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
}
_ = re.WriteByte(')')
inBraces = false
case ',':
if inBraces {
_ = re.WriteByte('|')
} else {
_, _ = re.WriteRune(c)
}
case '.', '+', '(', ')', '|', '^', '$': // regexp meta characters not dealt with above
_ = re.WriteByte('\\')
_, _ = re.WriteRune(c)
default:
_, _ = re.WriteRune(c)
}
}
err := insertStars()
if err != nil {
return nil, err
}
if inBrackets > 0 {
return nil, fmt.Errorf("mismatched '[' and ']' in glob %q", glob)
}
if inBraces {
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
}
if inRegexp {
return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob)
}
if addAnchors {
_ = re.WriteByte('$')
}
result, err := regexp.Compile(re.String())
if err != nil {
return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err)
}
return result, nil
}
var (
// Can't deal with
// / or ** in {}
// {{ regexp }}
tooHardRe = regexp.MustCompile(`({[^{}]*(\*\*|/)[^{}]*})|\{\{|\}\}`)
// Squash all /
squashSlash = regexp.MustCompile(`/{2,}`)
)
// globToDirGlobs takes a file glob and turns it into a series of
// directory globs. When matched with a directory (with a trailing /)
// this should answer the question as to whether this glob could be in
// this directory.
func globToDirGlobs(glob string) (out []string) {
if tooHardRe.MatchString(glob) {
// Can't figure this one out so return any directory might match
fs.Infof(nil, "Can't figure out directory filters from %q: looking in all directories", glob)
out = append(out, "/**")
return out
}
// Get rid of multiple /s
glob = squashSlash.ReplaceAllString(glob, "/")
// Split on / or **
// (** can contain /)
for {
i := strings.LastIndex(glob, "/")
j := strings.LastIndex(glob, "**")
what := ""
if j > i {
i = j
what = "**"
}
if i < 0 {
if len(out) == 0 {
out = append(out, "/**")
}
break
}
glob = glob[:i]
newGlob := glob + what + "/"
if len(out) == 0 || out[len(out)-1] != newGlob {
out = append(out, newGlob)
}
}
return out
}