filter: add options for glob to regexp without anchors and special path rules

This commit is contained in:
albertony 2023-11-03 20:45:37 +01:00 committed by Nick Craig-Wood
parent 16d642825d
commit 731947f3ca
5 changed files with 191 additions and 34 deletions

View file

@ -1304,7 +1304,7 @@ func touchFiles(ctx context.Context, dateStr string, f fs.Fs, dir, glob string)
return files, fmt.Errorf("invalid date %q: %w", dateStr, err)
}
matcher, firstErr := filter.GlobToRegexp(glob, false)
matcher, firstErr := filter.GlobPathToRegexp(glob, false)
if firstErr != nil {
return files, fmt.Errorf("invalid glob %q", glob)
}

View file

@ -258,7 +258,7 @@ func (f *Filter) addDirGlobs(Include bool, glob string) error {
if dirGlob == "/" {
continue
}
dirRe, err := GlobToRegexp(dirGlob, f.Opt.IgnoreCase)
dirRe, err := GlobPathToRegexp(dirGlob, f.Opt.IgnoreCase)
if err != nil {
return err
}
@ -278,7 +278,7 @@ func (f *Filter) Add(Include bool, glob string) error {
if strings.Contains(glob, "**") {
isDirRule, isFileRule = true, true
}
re, err := GlobToRegexp(glob, f.Opt.IgnoreCase)
re, err := GlobPathToRegexp(glob, f.Opt.IgnoreCase)
if err != nil {
return err
}

View file

@ -11,23 +11,44 @@ import (
"github.com/rclone/rclone/fs"
)
// GlobToRegexp converts an rsync style glob to a regexp
// GlobPathToRegexp converts an rsync style glob path to a regexp
func GlobPathToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
return globToRegexp(glob, true, true, ignoreCase)
}
// GlobStringToRegexp converts an rsync style glob string to a regexp
func GlobStringToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
return globToRegexp(glob, false, true, ignoreCase)
}
// globToRegexp converts an rsync style glob to a regexp
//
// documented in filtering.md
func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
// Set pathMode true for matching of path/file names, e.g.
// special treatment of path separator `/` and double asterisk `**`,
// see filtering.md for details.
//
// Set addAnchors true to add start of string `^` and end of string `$` anchors.
func globToRegexp(glob string, pathMode bool, addAnchors bool, ignoreCase bool) (*regexp.Regexp, error) {
var re bytes.Buffer
if ignoreCase {
_, _ = re.WriteString("(?i)")
}
if addAnchors {
if pathMode {
if strings.HasPrefix(glob, "/") {
glob = glob[1:]
_ = re.WriteByte('^')
} else {
_, _ = re.WriteString("(^|/)")
}
} else {
_, _ = re.WriteString("^")
}
}
consecutiveStars := 0
insertStars := func() error {
if consecutiveStars > 0 {
if pathMode {
switch consecutiveStars {
case 1:
_, _ = re.WriteString(`[^/]*`)
@ -36,6 +57,14 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
default:
return fmt.Errorf("too many stars in %q", glob)
}
} else {
switch consecutiveStars {
case 1:
_, _ = re.WriteString(`.*`)
default:
return fmt.Errorf("too many stars in %q", glob)
}
}
}
consecutiveStars = 0
return nil
@ -102,7 +131,11 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
case '*':
consecutiveStars++
case '?':
if pathMode {
_, _ = re.WriteString(`[^/]`)
} else {
_, _ = re.WriteString(`.`)
}
case '[':
_, _ = re.WriteRune(c)
inBrackets++
@ -152,7 +185,9 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
if inRegexp {
return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob)
}
if addAnchors {
_ = re.WriteByte('$')
}
result, err := regexp.Compile(re.String())
if err != nil {
return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err)

View file

@ -7,7 +7,129 @@ import (
"github.com/stretchr/testify/require"
)
func TestGlobToRegexp(t *testing.T) {
func TestGlobStringToRegexp(t *testing.T) {
for _, test := range []struct {
in string
want string
error string
}{
{``, `^$`, ``},
{`potato`, `^potato$`, ``},
{`potato,sausage`, `^potato,sausage$`, ``},
{`/potato`, `^/potato$`, ``},
{`potato?sausage`, `^potato.sausage$`, ``},
{`potat[oa]`, `^potat[oa]$`, ``},
{`potat[a-z]or`, `^potat[a-z]or$`, ``},
{`potat[[:alpha:]]or`, `^potat[[:alpha:]]or$`, ``},
{`'.' '+' '(' ')' '|' '^' '$'`, `^'\.' '\+' '\(' '\)' '\|' '\^' '\$'$`, ``},
{`*.jpg`, `^.*\.jpg$`, ``},
{`a{b,c,d}e`, `^a(b|c|d)e$`, ``},
{`potato**`, ``, `too many stars`},
{`potato**sausage`, ``, `too many stars`},
{`*.p[lm]`, `^.*\.p[lm]$`, ``},
{`[\[\]]`, `^[\[\]]$`, ``},
{`***potato`, ``, `too many stars`},
{`***`, ``, `too many stars`},
{`ab]c`, ``, `mismatched ']'`},
{`ab[c`, ``, `mismatched '[' and ']'`},
{`ab{x{cd`, ``, `can't nest`},
{`ab{}}cd`, ``, `mismatched '{' and '}'`},
{`ab}c`, ``, `mismatched '{' and '}'`},
{`ab{c`, ``, `mismatched '{' and '}'`},
{`*.{jpg,png,gif}`, `^.*\.(jpg|png|gif)$`, ``},
{`[a--b]`, ``, `bad glob pattern`},
{`a\*b`, `^a\*b$`, ``},
{`a\\b`, `^a\\b$`, ``},
{`a{{.*}}b`, `^a(.*)b$`, ``},
{`a{{.*}`, ``, `mismatched '{{' and '}}'`},
{`{{regexp}}`, `^(regexp)$`, ``},
{`\{{{regexp}}`, `^\{(regexp)$`, ``},
{`/{{regexp}}`, `^/(regexp)$`, ``},
{`/{{\d{8}}}`, `^/(\d{8})$`, ``},
{`/{{\}}}`, `^/(\})$`, ``},
{`{{(?i)regexp}}`, `^((?i)regexp)$`, ``},
} {
for _, ignoreCase := range []bool{false, true} {
gotRe, err := GlobStringToRegexp(test.in, ignoreCase)
if test.error == "" {
require.NoError(t, err, test.in)
prefix := ""
if ignoreCase {
prefix = "(?i)"
}
got := gotRe.String()
assert.Equal(t, prefix+test.want, got, test.in)
} else {
require.Error(t, err, test.in)
assert.Contains(t, err.Error(), test.error, test.in)
assert.Nil(t, gotRe)
}
}
}
}
func TestGlobStringToRegexpWithoutAnchors(t *testing.T) {
for _, test := range []struct {
in string
want string
error string
}{
{``, ``, ``},
{`potato`, `potato`, ``},
{`potato,sausage`, `potato,sausage`, ``},
{`/potato`, `/potato`, ``},
{`potato?sausage`, `potato.sausage`, ``},
{`potat[oa]`, `potat[oa]`, ``},
{`potat[a-z]or`, `potat[a-z]or`, ``},
{`potat[[:alpha:]]or`, `potat[[:alpha:]]or`, ``},
{`'.' '+' '(' ')' '|' '^' '$'`, `'\.' '\+' '\(' '\)' '\|' '\^' '\$'`, ``},
{`*.jpg`, `.*\.jpg`, ``},
{`a{b,c,d}e`, `a(b|c|d)e`, ``},
{`potato**`, ``, `too many stars`},
{`potato**sausage`, ``, `too many stars`},
{`*.p[lm]`, `.*\.p[lm]`, ``},
{`[\[\]]`, `[\[\]]`, ``},
{`***potato`, ``, `too many stars`},
{`***`, ``, `too many stars`},
{`ab]c`, ``, `mismatched ']'`},
{`ab[c`, ``, `mismatched '[' and ']'`},
{`ab{x{cd`, ``, `can't nest`},
{`ab{}}cd`, ``, `mismatched '{' and '}'`},
{`ab}c`, ``, `mismatched '{' and '}'`},
{`ab{c`, ``, `mismatched '{' and '}'`},
{`*.{jpg,png,gif}`, `.*\.(jpg|png|gif)`, ``},
{`[a--b]`, ``, `bad glob pattern`},
{`a\*b`, `a\*b`, ``},
{`a\\b`, `a\\b`, ``},
{`a{{.*}}b`, `a(.*)b`, ``},
{`a{{.*}`, ``, `mismatched '{{' and '}}'`},
{`{{regexp}}`, `(regexp)`, ``},
{`\{{{regexp}}`, `\{(regexp)`, ``},
{`/{{regexp}}`, `/(regexp)`, ``},
{`/{{\d{8}}}`, `/(\d{8})`, ``},
{`/{{\}}}`, `/(\})`, ``},
{`{{(?i)regexp}}`, `((?i)regexp)`, ``},
} {
for _, ignoreCase := range []bool{false, true} {
gotRe, err := globToRegexp(test.in, false, false, ignoreCase)
if test.error == "" {
require.NoError(t, err, test.in)
prefix := ""
if ignoreCase {
prefix = "(?i)"
}
got := gotRe.String()
assert.Equal(t, prefix+test.want, got, test.in)
} else {
require.Error(t, err, test.in)
assert.Contains(t, err.Error(), test.error, test.in)
assert.Nil(t, gotRe)
}
}
}
}
func TestGlobPathToRegexp(t *testing.T) {
for _, test := range []struct {
in string
want string
@ -28,20 +150,20 @@ func TestGlobToRegexp(t *testing.T) {
{`potato**sausage`, `(^|/)potato.*sausage$`, ``},
{`*.p[lm]`, `(^|/)[^/]*\.p[lm]$`, ``},
{`[\[\]]`, `(^|/)[\[\]]$`, ``},
{`***potato`, `(^|/)`, `too many stars`},
{`***`, `(^|/)`, `too many stars`},
{`ab]c`, `(^|/)`, `mismatched ']'`},
{`ab[c`, `(^|/)`, `mismatched '[' and ']'`},
{`ab{x{cd`, `(^|/)`, `can't nest`},
{`ab{}}cd`, `(^|/)`, `mismatched '{' and '}'`},
{`ab}c`, `(^|/)`, `mismatched '{' and '}'`},
{`ab{c`, `(^|/)`, `mismatched '{' and '}'`},
{`***potato`, ``, `too many stars`},
{`***`, ``, `too many stars`},
{`ab]c`, ``, `mismatched ']'`},
{`ab[c`, ``, `mismatched '[' and ']'`},
{`ab{x{cd`, ``, `can't nest`},
{`ab{}}cd`, ``, `mismatched '{' and '}'`},
{`ab}c`, ``, `mismatched '{' and '}'`},
{`ab{c`, ``, `mismatched '{' and '}'`},
{`*.{jpg,png,gif}`, `(^|/)[^/]*\.(jpg|png|gif)$`, ``},
{`[a--b]`, `(^|/)`, `bad glob pattern`},
{`[a--b]`, ``, `bad glob pattern`},
{`a\*b`, `(^|/)a\*b$`, ``},
{`a\\b`, `(^|/)a\\b$`, ``},
{`a{{.*}}b`, `(^|/)a(.*)b$`, ``},
{`a{{.*}`, `(^|/)a(.*)b$`, `mismatched '{{' and '}}'`},
{`a{{.*}`, ``, `mismatched '{{' and '}}'`},
{`{{regexp}}`, `(^|/)(regexp)$`, ``},
{`\{{{regexp}}`, `(^|/)\{(regexp)$`, ``},
{`/{{regexp}}`, `^(regexp)$`, ``},
@ -50,7 +172,7 @@ func TestGlobToRegexp(t *testing.T) {
{`{{(?i)regexp}}`, `(^|/)((?i)regexp)$`, ``},
} {
for _, ignoreCase := range []bool{false, true} {
gotRe, err := GlobToRegexp(test.in, ignoreCase)
gotRe, err := GlobPathToRegexp(test.in, ignoreCase)
if test.error == "" {
require.NoError(t, err, test.in)
prefix := ""
@ -111,7 +233,7 @@ func TestGlobToDirGlobs(t *testing.T) {
{"/sausage3**", []string{`/sausage3**/`, "/"}},
{"/a/*.jpg", []string{`/a/`, "/"}},
} {
_, err := GlobToRegexp(test.in, false)
_, err := GlobPathToRegexp(test.in, false)
assert.NoError(t, err)
got := globToDirGlobs(test.in)
assert.Equal(t, test.want, got, test.in)

View file

@ -67,7 +67,7 @@ func (rs *rules) add(Include bool, re *regexp.Regexp) {
// Add adds a filter rule with include or exclude status indicated
func (rs *rules) Add(Include bool, glob string) error {
re, err := GlobToRegexp(glob, false /* f.Opt.IgnoreCase */)
re, err := GlobPathToRegexp(glob, false /* f.Opt.IgnoreCase */)
if err != nil {
return err
}