From 731947f3cabe9c08a02ab8188d1fc83ffaa74a88 Mon Sep 17 00:00:00 2001 From: albertony <12441419+albertony@users.noreply.github.com> Date: Fri, 3 Nov 2023 20:45:37 +0100 Subject: [PATCH] filter: add options for glob to regexp without anchors and special path rules --- cmd/bisync/bisync_test.go | 2 +- fs/filter/filter.go | 4 +- fs/filter/glob.go | 69 +++++++++++++----- fs/filter/glob_test.go | 148 ++++++++++++++++++++++++++++++++++---- fs/filter/rules.go | 2 +- 5 files changed, 191 insertions(+), 34 deletions(-) diff --git a/cmd/bisync/bisync_test.go b/cmd/bisync/bisync_test.go index dd9f91ee3..5321eab9a 100644 --- a/cmd/bisync/bisync_test.go +++ b/cmd/bisync/bisync_test.go @@ -1304,7 +1304,7 @@ func touchFiles(ctx context.Context, dateStr string, f fs.Fs, dir, glob string) return files, fmt.Errorf("invalid date %q: %w", dateStr, err) } - matcher, firstErr := filter.GlobToRegexp(glob, false) + matcher, firstErr := filter.GlobPathToRegexp(glob, false) if firstErr != nil { return files, fmt.Errorf("invalid glob %q", glob) } diff --git a/fs/filter/filter.go b/fs/filter/filter.go index 408165080..61c1a2556 100644 --- a/fs/filter/filter.go +++ b/fs/filter/filter.go @@ -258,7 +258,7 @@ func (f *Filter) addDirGlobs(Include bool, glob string) error { if dirGlob == "/" { continue } - dirRe, err := GlobToRegexp(dirGlob, f.Opt.IgnoreCase) + dirRe, err := GlobPathToRegexp(dirGlob, f.Opt.IgnoreCase) if err != nil { return err } @@ -278,7 +278,7 @@ func (f *Filter) Add(Include bool, glob string) error { if strings.Contains(glob, "**") { isDirRule, isFileRule = true, true } - re, err := GlobToRegexp(glob, f.Opt.IgnoreCase) + re, err := GlobPathToRegexp(glob, f.Opt.IgnoreCase) if err != nil { return err } diff --git a/fs/filter/glob.go b/fs/filter/glob.go index 1ad53ccdb..de48da6d7 100644 --- a/fs/filter/glob.go +++ b/fs/filter/glob.go @@ -11,30 +11,59 @@ import ( "github.com/rclone/rclone/fs" ) -// GlobToRegexp converts an rsync style glob to a regexp +// GlobPathToRegexp converts an rsync style glob path to a regexp +func GlobPathToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) { + return globToRegexp(glob, true, true, ignoreCase) +} + +// GlobStringToRegexp converts an rsync style glob string to a regexp +func GlobStringToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) { + return globToRegexp(glob, false, true, ignoreCase) +} + +// globToRegexp converts an rsync style glob to a regexp // -// documented in filtering.md -func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) { +// Set pathMode true for matching of path/file names, e.g. +// special treatment of path separator `/` and double asterisk `**`, +// see filtering.md for details. +// +// Set addAnchors true to add start of string `^` and end of string `$` anchors. +func globToRegexp(glob string, pathMode bool, addAnchors bool, ignoreCase bool) (*regexp.Regexp, error) { var re bytes.Buffer if ignoreCase { _, _ = re.WriteString("(?i)") } - if strings.HasPrefix(glob, "/") { - glob = glob[1:] - _ = re.WriteByte('^') - } else { - _, _ = re.WriteString("(^|/)") + if addAnchors { + if pathMode { + if strings.HasPrefix(glob, "/") { + glob = glob[1:] + _ = re.WriteByte('^') + } else { + _, _ = re.WriteString("(^|/)") + } + } else { + _, _ = re.WriteString("^") + } } consecutiveStars := 0 insertStars := func() error { if consecutiveStars > 0 { - switch consecutiveStars { - case 1: - _, _ = re.WriteString(`[^/]*`) - case 2: - _, _ = re.WriteString(`.*`) - default: - return fmt.Errorf("too many stars in %q", glob) + if pathMode { + switch consecutiveStars { + case 1: + _, _ = re.WriteString(`[^/]*`) + case 2: + _, _ = re.WriteString(`.*`) + default: + return fmt.Errorf("too many stars in %q", glob) + } + } else { + switch consecutiveStars { + case 1: + _, _ = re.WriteString(`.*`) + default: + return fmt.Errorf("too many stars in %q", glob) + } } } consecutiveStars = 0 @@ -102,7 +131,11 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) { case '*': consecutiveStars++ case '?': - _, _ = re.WriteString(`[^/]`) + if pathMode { + _, _ = re.WriteString(`[^/]`) + } else { + _, _ = re.WriteString(`.`) + } case '[': _, _ = re.WriteRune(c) inBrackets++ @@ -152,7 +185,9 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) { if inRegexp { return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob) } - _ = re.WriteByte('$') + if addAnchors { + _ = re.WriteByte('$') + } result, err := regexp.Compile(re.String()) if err != nil { return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err) diff --git a/fs/filter/glob_test.go b/fs/filter/glob_test.go index 5946a5a0a..04a07581d 100644 --- a/fs/filter/glob_test.go +++ b/fs/filter/glob_test.go @@ -7,7 +7,129 @@ import ( "github.com/stretchr/testify/require" ) -func TestGlobToRegexp(t *testing.T) { +func TestGlobStringToRegexp(t *testing.T) { + for _, test := range []struct { + in string + want string + error string + }{ + {``, `^$`, ``}, + {`potato`, `^potato$`, ``}, + {`potato,sausage`, `^potato,sausage$`, ``}, + {`/potato`, `^/potato$`, ``}, + {`potato?sausage`, `^potato.sausage$`, ``}, + {`potat[oa]`, `^potat[oa]$`, ``}, + {`potat[a-z]or`, `^potat[a-z]or$`, ``}, + {`potat[[:alpha:]]or`, `^potat[[:alpha:]]or$`, ``}, + {`'.' '+' '(' ')' '|' '^' '$'`, `^'\.' '\+' '\(' '\)' '\|' '\^' '\$'$`, ``}, + {`*.jpg`, `^.*\.jpg$`, ``}, + {`a{b,c,d}e`, `^a(b|c|d)e$`, ``}, + {`potato**`, ``, `too many stars`}, + {`potato**sausage`, ``, `too many stars`}, + {`*.p[lm]`, `^.*\.p[lm]$`, ``}, + {`[\[\]]`, `^[\[\]]$`, ``}, + {`***potato`, ``, `too many stars`}, + {`***`, ``, `too many stars`}, + {`ab]c`, ``, `mismatched ']'`}, + {`ab[c`, ``, `mismatched '[' and ']'`}, + {`ab{x{cd`, ``, `can't nest`}, + {`ab{}}cd`, ``, `mismatched '{' and '}'`}, + {`ab}c`, ``, `mismatched '{' and '}'`}, + {`ab{c`, ``, `mismatched '{' and '}'`}, + {`*.{jpg,png,gif}`, `^.*\.(jpg|png|gif)$`, ``}, + {`[a--b]`, ``, `bad glob pattern`}, + {`a\*b`, `^a\*b$`, ``}, + {`a\\b`, `^a\\b$`, ``}, + {`a{{.*}}b`, `^a(.*)b$`, ``}, + {`a{{.*}`, ``, `mismatched '{{' and '}}'`}, + {`{{regexp}}`, `^(regexp)$`, ``}, + {`\{{{regexp}}`, `^\{(regexp)$`, ``}, + {`/{{regexp}}`, `^/(regexp)$`, ``}, + {`/{{\d{8}}}`, `^/(\d{8})$`, ``}, + {`/{{\}}}`, `^/(\})$`, ``}, + {`{{(?i)regexp}}`, `^((?i)regexp)$`, ``}, + } { + for _, ignoreCase := range []bool{false, true} { + gotRe, err := GlobStringToRegexp(test.in, ignoreCase) + if test.error == "" { + require.NoError(t, err, test.in) + prefix := "" + if ignoreCase { + prefix = "(?i)" + } + got := gotRe.String() + assert.Equal(t, prefix+test.want, got, test.in) + } else { + require.Error(t, err, test.in) + assert.Contains(t, err.Error(), test.error, test.in) + assert.Nil(t, gotRe) + } + } + } +} + +func TestGlobStringToRegexpWithoutAnchors(t *testing.T) { + for _, test := range []struct { + in string + want string + error string + }{ + {``, ``, ``}, + {`potato`, `potato`, ``}, + {`potato,sausage`, `potato,sausage`, ``}, + {`/potato`, `/potato`, ``}, + {`potato?sausage`, `potato.sausage`, ``}, + {`potat[oa]`, `potat[oa]`, ``}, + {`potat[a-z]or`, `potat[a-z]or`, ``}, + {`potat[[:alpha:]]or`, `potat[[:alpha:]]or`, ``}, + {`'.' '+' '(' ')' '|' '^' '$'`, `'\.' '\+' '\(' '\)' '\|' '\^' '\$'`, ``}, + {`*.jpg`, `.*\.jpg`, ``}, + {`a{b,c,d}e`, `a(b|c|d)e`, ``}, + {`potato**`, ``, `too many stars`}, + {`potato**sausage`, ``, `too many stars`}, + {`*.p[lm]`, `.*\.p[lm]`, ``}, + {`[\[\]]`, `[\[\]]`, ``}, + {`***potato`, ``, `too many stars`}, + {`***`, ``, `too many stars`}, + {`ab]c`, ``, `mismatched ']'`}, + {`ab[c`, ``, `mismatched '[' and ']'`}, + {`ab{x{cd`, ``, `can't nest`}, + {`ab{}}cd`, ``, `mismatched '{' and '}'`}, + {`ab}c`, ``, `mismatched '{' and '}'`}, + {`ab{c`, ``, `mismatched '{' and '}'`}, + {`*.{jpg,png,gif}`, `.*\.(jpg|png|gif)`, ``}, + {`[a--b]`, ``, `bad glob pattern`}, + {`a\*b`, `a\*b`, ``}, + {`a\\b`, `a\\b`, ``}, + {`a{{.*}}b`, `a(.*)b`, ``}, + {`a{{.*}`, ``, `mismatched '{{' and '}}'`}, + {`{{regexp}}`, `(regexp)`, ``}, + {`\{{{regexp}}`, `\{(regexp)`, ``}, + {`/{{regexp}}`, `/(regexp)`, ``}, + {`/{{\d{8}}}`, `/(\d{8})`, ``}, + {`/{{\}}}`, `/(\})`, ``}, + {`{{(?i)regexp}}`, `((?i)regexp)`, ``}, + } { + for _, ignoreCase := range []bool{false, true} { + gotRe, err := globToRegexp(test.in, false, false, ignoreCase) + if test.error == "" { + require.NoError(t, err, test.in) + prefix := "" + if ignoreCase { + prefix = "(?i)" + } + got := gotRe.String() + assert.Equal(t, prefix+test.want, got, test.in) + } else { + require.Error(t, err, test.in) + assert.Contains(t, err.Error(), test.error, test.in) + assert.Nil(t, gotRe) + } + } + } +} + +func TestGlobPathToRegexp(t *testing.T) { for _, test := range []struct { in string want string @@ -28,20 +150,20 @@ func TestGlobToRegexp(t *testing.T) { {`potato**sausage`, `(^|/)potato.*sausage$`, ``}, {`*.p[lm]`, `(^|/)[^/]*\.p[lm]$`, ``}, {`[\[\]]`, `(^|/)[\[\]]$`, ``}, - {`***potato`, `(^|/)`, `too many stars`}, - {`***`, `(^|/)`, `too many stars`}, - {`ab]c`, `(^|/)`, `mismatched ']'`}, - {`ab[c`, `(^|/)`, `mismatched '[' and ']'`}, - {`ab{x{cd`, `(^|/)`, `can't nest`}, - {`ab{}}cd`, `(^|/)`, `mismatched '{' and '}'`}, - {`ab}c`, `(^|/)`, `mismatched '{' and '}'`}, - {`ab{c`, `(^|/)`, `mismatched '{' and '}'`}, + {`***potato`, ``, `too many stars`}, + {`***`, ``, `too many stars`}, + {`ab]c`, ``, `mismatched ']'`}, + {`ab[c`, ``, `mismatched '[' and ']'`}, + {`ab{x{cd`, ``, `can't nest`}, + {`ab{}}cd`, ``, `mismatched '{' and '}'`}, + {`ab}c`, ``, `mismatched '{' and '}'`}, + {`ab{c`, ``, `mismatched '{' and '}'`}, {`*.{jpg,png,gif}`, `(^|/)[^/]*\.(jpg|png|gif)$`, ``}, - {`[a--b]`, `(^|/)`, `bad glob pattern`}, + {`[a--b]`, ``, `bad glob pattern`}, {`a\*b`, `(^|/)a\*b$`, ``}, {`a\\b`, `(^|/)a\\b$`, ``}, {`a{{.*}}b`, `(^|/)a(.*)b$`, ``}, - {`a{{.*}`, `(^|/)a(.*)b$`, `mismatched '{{' and '}}'`}, + {`a{{.*}`, ``, `mismatched '{{' and '}}'`}, {`{{regexp}}`, `(^|/)(regexp)$`, ``}, {`\{{{regexp}}`, `(^|/)\{(regexp)$`, ``}, {`/{{regexp}}`, `^(regexp)$`, ``}, @@ -50,7 +172,7 @@ func TestGlobToRegexp(t *testing.T) { {`{{(?i)regexp}}`, `(^|/)((?i)regexp)$`, ``}, } { for _, ignoreCase := range []bool{false, true} { - gotRe, err := GlobToRegexp(test.in, ignoreCase) + gotRe, err := GlobPathToRegexp(test.in, ignoreCase) if test.error == "" { require.NoError(t, err, test.in) prefix := "" @@ -111,7 +233,7 @@ func TestGlobToDirGlobs(t *testing.T) { {"/sausage3**", []string{`/sausage3**/`, "/"}}, {"/a/*.jpg", []string{`/a/`, "/"}}, } { - _, err := GlobToRegexp(test.in, false) + _, err := GlobPathToRegexp(test.in, false) assert.NoError(t, err) got := globToDirGlobs(test.in) assert.Equal(t, test.want, got, test.in) diff --git a/fs/filter/rules.go b/fs/filter/rules.go index dce5851a6..cffca0d20 100644 --- a/fs/filter/rules.go +++ b/fs/filter/rules.go @@ -67,7 +67,7 @@ func (rs *rules) add(Include bool, re *regexp.Regexp) { // Add adds a filter rule with include or exclude status indicated func (rs *rules) Add(Include bool, glob string) error { - re, err := GlobToRegexp(glob, false /* f.Opt.IgnoreCase */) + re, err := GlobPathToRegexp(glob, false /* f.Opt.IgnoreCase */) if err != nil { return err }