diff --git a/docs/content/filtering.md b/docs/content/filtering.md index ccd099f0b..6c33d2c82 100644 --- a/docs/content/filtering.md +++ b/docs/content/filtering.md @@ -81,13 +81,32 @@ Special characters can be escaped with a `\` before them. \*.jpg - matches "*.jpg" \\.jpg - matches "\.jpg" \[one\].jpg - matches "[one].jpg" - + +### Directories ### + +Rclone keeps track of directories that could match any file patterns. + +Eg if you add the include rule + + \a\*.jpg + +Rclone will synthesize the directory include rule + + \a\ + +If you put any rules which end in `\` then it will only match +directories. + +Directory matches are **only** used to optimise directory access +patterns - you must still match the files that you want to match. +Directory matches won't optimise anything on bucket based remotes (eg +s3, swift, google compute storage, b2) which don't have a concept of +directory. + ### Differences between rsync and rclone patterns ### Rclone implements bash style `{a,b,c}` glob matching which rsync doesn't. -Rclone ignores `/` at the end of a pattern. - Rclone always does a wildcard match so `\` must always escape a `\`. ## How the rules are used ## @@ -120,6 +139,11 @@ This would exclude * `secret17.jpg` * non `*.jpg` and `*.png` +A similar process is done on directory entries before recursing into +them. This only works on remotes which have a concept of directory +(Eg local, drive, onedrive, amazon cloud drive) and not on bucket +based remotes (eg s3, swift, google compute storage, b2). + ## Adding filtering rules ## Filtering rules are added with the following command line flags. diff --git a/fs/filter.go b/fs/filter.go index c1ff1c496..79578caa7 100644 --- a/fs/filter.go +++ b/fs/filter.go @@ -59,6 +59,40 @@ func (r *rule) String() string { return fmt.Sprintf("%s %s", c, r.Regexp.String()) } +// rules is a slice of rules +type rules struct { + rules []rule + existing map[string]struct{} +} + +// add adds a rule if it doesn't exist already +func (rs *rules) add(Include bool, re *regexp.Regexp) { + if rs.existing == nil { + rs.existing = make(map[string]struct{}) + } + newRule := rule{ + Include: Include, + Regexp: re, + } + newRuleString := newRule.String() + if _, ok := rs.existing[newRuleString]; ok { + return // rule already exists + } + rs.rules = append(rs.rules, newRule) + rs.existing[newRuleString] = struct{}{} +} + +// clear clears all the rules +func (rs *rules) clear() { + rs.rules = nil + rs.existing = nil +} + +// len returns the number of rules +func (rs *rules) len() int { + return len(rs.rules) +} + // filesMap describes the map of files to transfer type filesMap map[string]struct{} @@ -69,7 +103,8 @@ type Filter struct { MaxSize int64 ModTimeFrom time.Time ModTimeTo time.Time - rules []rule + fileRules rules + dirRules rules files filesMap // files if filesFrom dirs filesMap // dirs from filesFrom } @@ -172,7 +207,7 @@ func NewFilter() (f *Filter, err error) { } } if addImplicitExclude { - err = f.Add(false, "*") + err = f.Add(false, "/**") if err != nil { return nil, err } @@ -204,17 +239,49 @@ func NewFilter() (f *Filter, err error) { return f, nil } +// addDirGlobs adds directory globs from the file glob passed in +func (f *Filter) addDirGlobs(Include bool, glob string) error { + for _, dirGlob := range globToDirGlobs(glob) { + // Don't add "/" as we always include the root + if dirGlob == "/" { + continue + } + dirRe, err := globToRegexp(dirGlob) + if err != nil { + return err + } + f.dirRules.add(Include, dirRe) + } + return nil +} + // Add adds a filter rule with include or exclude status indicated func (f *Filter) Add(Include bool, glob string) error { + isDirRule := strings.HasSuffix(glob, "/") + isFileRule := !isDirRule + if strings.HasSuffix(glob, "**") { + isDirRule, isFileRule = true, true + } re, err := globToRegexp(glob) if err != nil { return err } - rule := rule{ - Include: Include, - Regexp: re, + if isFileRule { + f.fileRules.add(Include, re) + // If include rule work out what directories are needed to scan + // if exclude rule, we can't rule anything out + // Unless it is `*` which matches everything + // NB ** and /** are DirRules + if Include || glob == "*" { + err = f.addDirGlobs(Include, glob) + if err != nil { + return err + } + } + } + if isDirRule { + f.dirRules.add(Include, re) } - f.rules = append(f.rules, rule) return nil } @@ -266,7 +333,8 @@ func (f *Filter) AddFile(file string) error { // Clear clears all the filter rules func (f *Filter) Clear() { - f.rules = nil + f.fileRules.clear() + f.dirRules.clear() } // InActive returns false if any filters are active @@ -276,12 +344,13 @@ func (f *Filter) InActive() bool { f.ModTimeTo.IsZero() && f.MinSize == 0 && f.MaxSize == 0 && - len(f.rules) == 0) + f.fileRules.len() == 0 && + f.dirRules.len() == 0) } // includeRemote returns whether this remote passes the filter rules. func (f *Filter) includeRemote(remote string) bool { - for _, rule := range f.rules { + for _, rule := range f.fileRules.rules { if rule.Match(remote) { return rule.Include } @@ -298,7 +367,13 @@ func (f *Filter) IncludeDirectory(remote string) bool { _, include := f.dirs[remote] return include } - return f.includeRemote(remote + "/") + remote += "/" + for _, rule := range f.dirRules.rules { + if rule.Match(remote) { + return rule.Include + } + } + return true } // Include returns whether this object should be included into the @@ -372,8 +447,13 @@ func (f *Filter) DumpFilters() string { if !f.ModTimeTo.IsZero() { rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String())) } - for _, rule := range f.rules { + rules = append(rules, "--- File filter rules ---") + for _, rule := range f.fileRules.rules { rules = append(rules, rule.String()) } + rules = append(rules, "--- Directory filter rules ---") + for _, dirRule := range f.dirRules.rules { + rules = append(rules, dirRule.String()) + } return strings.Join(rules, "\n") } diff --git a/fs/filter_test.go b/fs/filter_test.go index b2f4743c1..9159e7fba 100644 --- a/fs/filter_test.go +++ b/fs/filter_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestAgeSuffix(t *testing.T) { @@ -46,27 +47,14 @@ func TestAgeSuffix(t *testing.T) { func TestNewFilterDefault(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } - if f.DeleteExcluded != false { - t.Errorf("DeleteExcluded want false got %v", f.DeleteExcluded) - } - if f.MinSize != 0 { - t.Errorf("MinSize want 0 got %v", f.MinSize) - } - if f.MaxSize != 0 { - t.Errorf("MaxSize want 0 got %v", f.MaxSize) - } - if len(f.rules) != 0 { - t.Errorf("rules want non got %v", f.rules) - } - if f.files != nil { - t.Errorf("files want none got %v", f.files) - } - if !f.InActive() { - t.Errorf("want InActive") - } + require.NoError(t, err) + assert.False(t, f.DeleteExcluded) + assert.Equal(t, int64(0), f.MinSize) + assert.Equal(t, int64(0), f.MaxSize) + assert.Len(t, f.fileRules.rules, 0) + assert.Len(t, f.dirRules.rules, 0) + assert.Nil(t, f.files) + assert.True(t, f.InActive()) } // return a pointer to the string @@ -77,9 +65,7 @@ func stringP(s string) *string { // testFile creates a temp file with the contents func testFile(t *testing.T, contents string) *string { out, err := ioutil.TempFile("", "filter_test") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) defer func() { err := out.Close() if err != nil { @@ -87,9 +73,7 @@ func testFile(t *testing.T, contents string) *string { } }() _, err = out.Write([]byte(contents)) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) s := out.Name() return &s } @@ -138,20 +122,13 @@ func TestNewFilterFull(t *testing.T) { }() f, err := NewFilter() - if err != nil { - t.Fatal(err) - } - if f.DeleteExcluded != true { - t.Errorf("DeleteExcluded want true got %v", f.DeleteExcluded) - } - if f.MinSize != mins { - t.Errorf("MinSize want %v got %v", mins, f.MinSize) - } - if f.MaxSize != maxs { - t.Errorf("MaxSize want %v got %v", maxs, f.MaxSize) - } + require.NoError(t, err) + assert.True(t, f.DeleteExcluded) + assert.Equal(t, f.MinSize, mins) + assert.Equal(t, f.MaxSize, maxs) got := f.DumpFilters() - want := `+ (^|/)include1$ + want := `--- File filter rules --- ++ (^|/)include1$ + (^|/)include2$ + (^|/)include3$ - (^|/)exclude1$ @@ -160,22 +137,19 @@ func TestNewFilterFull(t *testing.T) { - (^|/)filter1$ + (^|/)filter2$ - (^|/)filter3$ -- (^|/)[^/]*$` - if got != want { - t.Errorf("rules want %s got %s", want, got) - } - if len(f.files) != 2 { - t.Errorf("files want 2 got %v", f.files) - } +- ^.*$ +--- Directory filter rules --- ++ ^.*$ +- ^.*$` + assert.Equal(t, want, got) + assert.Len(t, f.files, 2) for _, name := range []string{"files1", "files2"} { _, ok := f.files[name] if !ok { t.Errorf("Didn't find file %q in f.files", name) } } - if f.InActive() { - t.Errorf("want !InActive") - } + assert.False(t, f.InActive()) } type includeTest struct { @@ -188,9 +162,7 @@ type includeTest struct { func testInclude(t *testing.T, f *Filter, tests []includeTest) { for _, test := range tests { got := f.Include(test.in, test.size, time.Unix(test.modTime, 0)) - if test.want != got { - t.Errorf("%q,%d,%d: want %v got %v", test.in, test.size, test.modTime, test.want, got) - } + assert.Equal(t, test.want, got, test.in, test.size, test.modTime) } } @@ -202,17 +174,13 @@ type includeDirTest struct { func testDirInclude(t *testing.T, f *Filter, tests []includeDirTest) { for _, test := range tests { got := f.IncludeDirectory(test.in) - if test.want != got { - t.Errorf("%q: want %v got %v", test.in, test.want, got) - } + assert.Equal(t, test.want, got, test.in) } } func TestNewFilterIncludeFiles(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) err = f.AddFile("file1.jpg") if err != nil { t.Error(err) @@ -239,9 +207,7 @@ func TestNewFilterIncludeFiles(t *testing.T) { func TestNewFilterIncludeFilesDirs(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) for _, path := range []string{ "path/to/dir/file1.png", "/path/to/dir/file2.png", @@ -275,9 +241,7 @@ func TestNewFilterIncludeFilesDirs(t *testing.T) { func TestNewFilterMinSize(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f.MinSize = 100 testInclude(t, f, []includeTest{ {"file1.jpg", 100, 0, true}, @@ -291,9 +255,7 @@ func TestNewFilterMinSize(t *testing.T) { func TestNewFilterMaxSize(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f.MaxSize = 100 testInclude(t, f, []includeTest{ {"file1.jpg", 100, 0, true}, @@ -307,9 +269,7 @@ func TestNewFilterMaxSize(t *testing.T) { func TestNewFilterMinAndMaxAge(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f.ModTimeFrom = time.Unix(1440000002, 0) f.ModTimeTo = time.Unix(1440000003, 0) testInclude(t, f, []includeTest{ @@ -326,9 +286,7 @@ func TestNewFilterMinAndMaxAge(t *testing.T) { func TestNewFilterMinAge(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f.ModTimeTo = time.Unix(1440000002, 0) testInclude(t, f, []includeTest{ {"file1.jpg", 100, 1440000000, true}, @@ -344,9 +302,7 @@ func TestNewFilterMinAge(t *testing.T) { func TestNewFilterMaxAge(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f.ModTimeFrom = time.Unix(1440000002, 0) testInclude(t, f, []includeTest{ {"file1.jpg", 100, 1440000000, false}, @@ -362,25 +318,22 @@ func TestNewFilterMaxAge(t *testing.T) { func TestNewFilterMatches(t *testing.T) { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) add := func(s string) { err := f.AddRule(s) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) } add("+ cleared") add("!") - add("- file1.jpg") - add("+ file2.png") - add("+ *.jpg") - add("- *.png") + add("- /file1.jpg") + add("+ /file2.png") + add("+ /*.jpg") + add("- /*.png") add("- /potato") add("+ /sausage1") add("+ /sausage2*") add("+ /sausage3**") + add("+ /a/*.jpg") add("- *") testInclude(t, f, []includeTest{ {"cleared", 100, 0, false}, @@ -395,8 +348,11 @@ func TestNewFilterMatches(t *testing.T) { {"sausage2potato", 101, 0, true}, {"sausage2/potato", 101, 0, false}, {"sausage3/potato", 101, 0, true}, + {"a/one.jpg", 101, 0, true}, + {"a/one.png", 101, 0, false}, {"unicorn", 99, 0, false}, }) + t.Log(f.DumpFilters()) testDirInclude(t, f, []includeDirTest{ {"sausage1", false}, {"sausage2", false}, @@ -406,6 +362,7 @@ func TestNewFilterMatches(t *testing.T) { {"sausage3/sub", true}, {"sausage3/sub/dir", true}, {"sausage4", false}, + {"a", true}, }) if f.InActive() { t.Errorf("want !InActive") @@ -480,17 +437,11 @@ func TestFilterMatchesFromDocs(t *testing.T) { {"\\[one\\].jpg", true, "[one].jpg"}, } { f, err := NewFilter() - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) err = f.Add(true, test.glob) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) err = f.Add(false, "*") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) included := f.Include(test.file, 0, time.Unix(0, 0)) if included != test.included { t.Logf("%q match %q: want %v got %v", test.glob, test.file, test.included, included) diff --git a/fs/glob.go b/fs/glob.go index a4e08d0d5..e314aa99d 100644 --- a/fs/glob.go +++ b/fs/glob.go @@ -115,3 +115,51 @@ func globToRegexp(glob string) (*regexp.Regexp, error) { } return result, nil } + +var ( + // Can't deal with / or ** in {} + tooHardRe = regexp.MustCompile(`{[^{}]*(\*\*|/)[^{}]*}`) + + // Squash all / + squashSlash = regexp.MustCompile(`/{2,}`) +) + +// globToDirGlobs takes a file glob and turns it into a series of +// directory globs. When matched with a directory (with a trailing /) +// this should answer the question as to whether this glob could be in +// this directory. +func globToDirGlobs(glob string) (out []string) { + if tooHardRe.MatchString(glob) { + // Can't figure this one out so return any directory might match + out = append(out, "/**") + return out + } + + // Get rid of multiple /s + glob = squashSlash.ReplaceAllString(glob, "/") + + // Split on / or ** + // (** can contain /) + for { + i := strings.LastIndex(glob, "/") + j := strings.LastIndex(glob, "**") + what := "" + if j > i { + i = j + what = "**" + } + if i < 0 { + if len(out) == 0 { + out = append(out, "/**") + } + break + } + glob = glob[:i] + newGlob := glob + what + "/" + if len(out) == 0 || out[len(out)-1] != newGlob { + out = append(out, newGlob) + } + } + + return out +} diff --git a/fs/glob_test.go b/fs/glob_test.go index 74b005eeb..878c9665f 100644 --- a/fs/glob_test.go +++ b/fs/glob_test.go @@ -1,8 +1,10 @@ package fs import ( - "strings" "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestGlobToRegexp(t *testing.T) { @@ -41,24 +43,62 @@ func TestGlobToRegexp(t *testing.T) { } { gotRe, err := globToRegexp(test.in) if test.error == "" { - if err != nil { - t.Errorf("%q: not expecting error: %v", test.in, err) - } else { - got := gotRe.String() - if test.want != got { - t.Errorf("%q: want %q got %q", test.in, test.want, got) - } - } + got := gotRe.String() + require.NoError(t, err, test.in) + assert.Equal(t, test.want, got, test.in) } else { - if err == nil { - t.Errorf("%q: expecting error but didn't get one", test.in) - } else { - got := err.Error() - if !strings.Contains(got, test.error) { - t.Errorf("%q: want error %q got %q", test.in, test.error, got) - } - } + require.Error(t, err, test.in) + assert.Contains(t, err.Error(), test.error, test.in) + assert.Nil(t, gotRe) } } - +} + +func TestGlobToDirGlobs(t *testing.T) { + for _, test := range []struct { + in string + want []string + }{ + {`*`, []string{"/**"}}, + {`/*`, []string{"/"}}, + {`*.jpg`, []string{"/**"}}, + {`/*.jpg`, []string{"/"}}, + {`//*.jpg`, []string{"/"}}, + {`///*.jpg`, []string{"/"}}, + {`/a/*.jpg`, []string{"/a/", "/"}}, + {`/a//*.jpg`, []string{"/a/", "/"}}, + {`/a///*.jpg`, []string{"/a/", "/"}}, + {`/a/b/*.jpg`, []string{"/a/b/", "/a/", "/"}}, + {`a/*.jpg`, []string{"a/"}}, + {`a/b/*.jpg`, []string{"a/b/", "a/"}}, + {`*/*/*.jpg`, []string{"*/*/", "*/"}}, + {`a/b/`, []string{"a/b/", "a/"}}, + {`a/b`, []string{"a/"}}, + {`a/b/*.{jpg,png,gif}`, []string{"a/b/", "a/"}}, + {`/a/{jpg,png,gif}/*.{jpg,png,gif}`, []string{"/a/{jpg,png,gif}/", "/a/", "/"}}, + {`a/{a,a*b,a**c}/d/`, []string{"/**"}}, + {`/a/{a,a*b,a/c,d}/d/`, []string{"/**"}}, + {`**`, []string{"**/"}}, + {`a**`, []string{"a**/"}}, + {`a**b`, []string{"a**/"}}, + {`a**b**c**d`, []string{"a**b**c**/", "a**b**/", "a**/"}}, + {`a**b/c**d`, []string{"a**b/c**/", "a**b/", "a**/"}}, + {`/A/a**b/B/c**d/C/`, []string{"/A/a**b/B/c**d/C/", "/A/a**b/B/c**d/", "/A/a**b/B/c**/", "/A/a**b/B/", "/A/a**b/", "/A/a**/", "/A/", "/"}}, + {`/var/spool/**/ncw`, []string{"/var/spool/**/", "/var/spool/", "/var/", "/"}}, + {`var/spool/**/ncw/`, []string{"var/spool/**/ncw/", "var/spool/**/", "var/spool/", "var/"}}, + {"/file1.jpg", []string{`/`}}, + {"/file2.png", []string{`/`}}, + {"/*.jpg", []string{`/`}}, + {"/*.png", []string{`/`}}, + {"/potato", []string{`/`}}, + {"/sausage1", []string{`/`}}, + {"/sausage2*", []string{`/`}}, + {"/sausage3**", []string{`/sausage3**/`, "/"}}, + {"/a/*.jpg", []string{`/a/`, "/"}}, + } { + _, err := globToRegexp(test.in) + assert.NoError(t, err) + got := globToDirGlobs(test.in) + assert.Equal(t, test.want, got, test.in) + } }