Merge pull request #2311 from vincentbernat/feature/negative-pattern

filter: ability to use negative patterns
This commit is contained in:
Alexander Neumann 2022-03-20 14:02:30 +01:00 committed by GitHub
commit 3a285f91bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 139 additions and 45 deletions

View file

@ -0,0 +1,31 @@
Enhancement: Add negative patterns for include/exclude
If a pattern starts with an exclamation mark and it matches a file that
was previously matched by a regular pattern, the match is cancelled.
Notably, this can be used with `--exclude-file` to cancel the
exclusion of some files.
It works similarly to `gitignore`, with the same limitation: once a
directory is excluded, it is not possible to include files inside the
directory.
Example of use (as an exclude pattern for backup):
$HOME/**/*
!$HOME/Documents
!$HOME/code
!$HOME/.emacs.d
!$HOME/games
# [...]
node_modules
*~
*.o
*.lo
*.pyc
# [...]
$HOME/code/linux/*
!$HOME/code/linux/.git
# [...]
https://github.com/restic/restic/issues/233
https://github.com/restic/restic/pull/2311

View file

@ -289,6 +289,28 @@ On most Unixy shells, you can either quote or use backslashes. For example:
* ``--exclude="foo bar star/foo.txt"`` * ``--exclude="foo bar star/foo.txt"``
* ``--exclude=foo\ bar\ star/foo.txt`` * ``--exclude=foo\ bar\ star/foo.txt``
If a pattern starts with exclamation mark and matches a file that
was previously matched by a regular pattern, the match is cancelled.
It works similarly to ``gitignore``, with the same limitation: once a
directory is excluded, it is not possible to include files inside the
directory. Here is a complete example to backup a selection of
directories inside the home directory. It works by excluding any
directory, then selectively add back some of them.
::
$HOME/**/*
!$HOME/Documents
!$HOME/code
!$HOME/.emacs.d
!$HOME/games
# [...]
node_modules
*~
*.o
*.lo
*.pyc
By specifying the option ``--one-file-system`` you can instruct restic By specifying the option ``--one-file-system`` you can instruct restic
to only backup files from the file systems the initially specified files to only backup files from the file systems the initially specified files
or directories reside on. In other words, it will prevent restic from crossing or directories reside on. In other words, it will prevent restic from crossing

View file

@ -17,7 +17,10 @@ type patternPart struct {
} }
// Pattern represents a preparsed filter pattern // Pattern represents a preparsed filter pattern
type Pattern []patternPart type Pattern struct {
parts []patternPart
isNegated bool
}
func prepareStr(str string) ([]string, error) { func prepareStr(str string) ([]string, error) {
if str == "" { if str == "" {
@ -26,20 +29,26 @@ func prepareStr(str string) ([]string, error) {
return splitPath(str), nil return splitPath(str), nil
} }
func preparePattern(pattern string) Pattern { func preparePattern(patternStr string) Pattern {
parts := splitPath(filepath.Clean(pattern)) var negate bool
patterns := make([]patternPart, len(parts)) if patternStr[0] == '!' {
for i, part := range parts { negate = true
patternStr = patternStr[1:]
}
pathParts := splitPath(filepath.Clean(patternStr))
parts := make([]patternPart, len(pathParts))
for i, part := range pathParts {
isSimple := !strings.ContainsAny(part, "\\[]*?") isSimple := !strings.ContainsAny(part, "\\[]*?")
// Replace "**" with the empty string to get faster comparisons // Replace "**" with the empty string to get faster comparisons
// (length-check only) in hasDoubleWildcard. // (length-check only) in hasDoubleWildcard.
if part == "**" { if part == "**" {
part = "" part = ""
} }
patterns[i] = patternPart{part, isSimple} parts[i] = patternPart{part, isSimple}
} }
return patterns return Pattern{parts, negate}
} }
// Split p into path components. Assuming p has been Cleaned, no component // Split p into path components. Assuming p has been Cleaned, no component
@ -62,19 +71,19 @@ func splitPath(p string) []string {
// In addition patterns suitable for filepath.Match, pattern accepts a // In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of // recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories. // intermediate directories.
func Match(pattern, str string) (matched bool, err error) { func Match(patternStr, str string) (matched bool, err error) {
if pattern == "" { if patternStr == "" {
return true, nil return true, nil
} }
patterns := preparePattern(pattern) pattern := preparePattern(patternStr)
strs, err := prepareStr(str) strs, err := prepareStr(str)
if err != nil { if err != nil {
return false, err return false, err
} }
return match(patterns, strs) return match(pattern, strs)
} }
// ChildMatch returns true if children of str can match the pattern. When the pattern is // ChildMatch returns true if children of str can match the pattern. When the pattern is
@ -87,28 +96,28 @@ func Match(pattern, str string) (matched bool, err error) {
// In addition patterns suitable for filepath.Match, pattern accepts a // In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of // recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories. // intermediate directories.
func ChildMatch(pattern, str string) (matched bool, err error) { func ChildMatch(patternStr, str string) (matched bool, err error) {
if pattern == "" { if patternStr == "" {
return true, nil return true, nil
} }
patterns := preparePattern(pattern) pattern := preparePattern(patternStr)
strs, err := prepareStr(str) strs, err := prepareStr(str)
if err != nil { if err != nil {
return false, err return false, err
} }
return childMatch(patterns, strs) return childMatch(pattern, strs)
} }
func childMatch(patterns Pattern, strs []string) (matched bool, err error) { func childMatch(pattern Pattern, strs []string) (matched bool, err error) {
if patterns[0].pattern != "/" { if pattern.parts[0].pattern != "/" {
// relative pattern can always be nested down // relative pattern can always be nested down
return true, nil return true, nil
} }
ok, pos := hasDoubleWildcard(patterns) ok, pos := hasDoubleWildcard(pattern)
if ok && len(strs) >= pos { if ok && len(strs) >= pos {
// cut off at the double wildcard // cut off at the double wildcard
strs = strs[:pos] strs = strs[:pos]
@ -116,16 +125,16 @@ func childMatch(patterns Pattern, strs []string) (matched bool, err error) {
// match path against absolute pattern prefix // match path against absolute pattern prefix
l := 0 l := 0
if len(strs) > len(patterns) { if len(strs) > len(pattern.parts) {
l = len(patterns) l = len(pattern.parts)
} else { } else {
l = len(strs) l = len(strs)
} }
return match(patterns[0:l], strs) return match(Pattern{pattern.parts[0:l], pattern.isNegated}, strs)
} }
func hasDoubleWildcard(list Pattern) (ok bool, pos int) { func hasDoubleWildcard(list Pattern) (ok bool, pos int) {
for i, item := range list { for i, item := range list.parts {
if item.pattern == "" { if item.pattern == "" {
return true, i return true, i
} }
@ -134,22 +143,22 @@ func hasDoubleWildcard(list Pattern) (ok bool, pos int) {
return false, 0 return false, 0
} }
func match(patterns Pattern, strs []string) (matched bool, err error) { func match(pattern Pattern, strs []string) (matched bool, err error) {
if ok, pos := hasDoubleWildcard(patterns); ok { if ok, pos := hasDoubleWildcard(pattern); ok {
// gradually expand '**' into separate wildcards // gradually expand '**' into separate wildcards
newPat := make(Pattern, len(strs)) newPat := make([]patternPart, len(strs))
// copy static prefix once // copy static prefix once
copy(newPat, patterns[:pos]) copy(newPat, pattern.parts[:pos])
for i := 0; i <= len(strs)-len(patterns)+1; i++ { for i := 0; i <= len(strs)-len(pattern.parts)+1; i++ {
// limit to static prefix and already appended '*' // limit to static prefix and already appended '*'
newPat := newPat[:pos+i] newPat := newPat[:pos+i]
// in the first iteration the wildcard expands to nothing // in the first iteration the wildcard expands to nothing
if i > 0 { if i > 0 {
newPat[pos+i-1] = patternPart{"*", false} newPat[pos+i-1] = patternPart{"*", false}
} }
newPat = append(newPat, patterns[pos+1:]...) newPat = append(newPat, pattern.parts[pos+1:]...)
matched, err := match(newPat, strs) matched, err := match(Pattern{newPat, pattern.isNegated}, strs)
if err != nil { if err != nil {
return false, err return false, err
} }
@ -162,20 +171,20 @@ func match(patterns Pattern, strs []string) (matched bool, err error) {
return false, nil return false, nil
} }
if len(patterns) == 0 && len(strs) == 0 { if len(pattern.parts) == 0 && len(strs) == 0 {
return true, nil return true, nil
} }
// an empty pattern never matches a non-empty path // an empty pattern never matches a non-empty path
if len(patterns) == 0 { if len(pattern.parts) == 0 {
return false, nil return false, nil
} }
if len(patterns) <= len(strs) { if len(pattern.parts) <= len(strs) {
minOffset := 0 minOffset := 0
maxOffset := len(strs) - len(patterns) maxOffset := len(strs) - len(pattern.parts)
// special case absolute patterns // special case absolute patterns
if patterns[0].pattern == "/" { if pattern.parts[0].pattern == "/" {
maxOffset = 0 maxOffset = 0
} else if strs[0] == "/" { } else if strs[0] == "/" {
// skip absolute path marker if pattern is not rooted // skip absolute path marker if pattern is not rooted
@ -184,12 +193,12 @@ func match(patterns Pattern, strs []string) (matched bool, err error) {
outer: outer:
for offset := maxOffset; offset >= minOffset; offset-- { for offset := maxOffset; offset >= minOffset; offset-- {
for i := len(patterns) - 1; i >= 0; i-- { for i := len(pattern.parts) - 1; i >= 0; i-- {
var ok bool var ok bool
if patterns[i].isSimple { if pattern.parts[i].isSimple {
ok = patterns[i].pattern == strs[offset+i] ok = pattern.parts[i].pattern == strs[offset+i]
} else { } else {
ok, err = filepath.Match(patterns[i].pattern, strs[offset+i]) ok, err = filepath.Match(pattern.parts[i].pattern, strs[offset+i])
if err != nil { if err != nil {
return false, errors.Wrap(err, "Match") return false, errors.Wrap(err, "Match")
} }
@ -208,9 +217,9 @@ func match(patterns Pattern, strs []string) (matched bool, err error) {
} }
// ParsePatterns prepares a list of patterns for use with List. // ParsePatterns prepares a list of patterns for use with List.
func ParsePatterns(patterns []string) []Pattern { func ParsePatterns(pattern []string) []Pattern {
patpat := make([]Pattern, 0) patpat := make([]Pattern, 0)
for _, pat := range patterns { for _, pat := range pattern {
if pat == "" { if pat == "" {
continue continue
} }
@ -232,7 +241,9 @@ func ListWithChild(patterns []Pattern, str string) (matched bool, childMayMatch
return list(patterns, true, str) return list(patterns, true, str)
} }
// List returns true if str matches one of the patterns. Empty patterns are ignored. // list returns true if str matches one of the patterns. Empty patterns are ignored.
// Patterns prefixed by "!" are negated: any matching file excluded by a previous pattern
// will become included again.
func list(patterns []Pattern, checkChildMatches bool, str string) (matched bool, childMayMatch bool, err error) { func list(patterns []Pattern, checkChildMatches bool, str string) (matched bool, childMayMatch bool, err error) {
if len(patterns) == 0 { if len(patterns) == 0 {
return false, false, nil return false, false, nil
@ -242,6 +253,12 @@ func list(patterns []Pattern, checkChildMatches bool, str string) (matched bool,
if err != nil { if err != nil {
return false, false, err return false, false, err
} }
hasNegatedPattern := false
for _, pat := range patterns {
hasNegatedPattern = hasNegatedPattern || pat.isNegated
}
for _, pat := range patterns { for _, pat := range patterns {
m, err := match(pat, strs) m, err := match(pat, strs)
if err != nil { if err != nil {
@ -258,11 +275,17 @@ func list(patterns []Pattern, checkChildMatches bool, str string) (matched bool,
c = true c = true
} }
matched = matched || m if pat.isNegated {
childMayMatch = childMayMatch || c matched = matched && !m
childMayMatch = childMayMatch && !m
} else {
matched = matched || m
childMayMatch = childMayMatch || c
if matched && childMayMatch { if matched && childMayMatch && !hasNegatedPattern {
return true, true, nil // without negative patterns the result cannot change any more
break
}
} }
} }

View file

@ -248,6 +248,7 @@ var filterListTests = []struct {
}{ }{
{[]string{}, "/foo/bar/test.go", false, false}, {[]string{}, "/foo/bar/test.go", false, false},
{[]string{"*.go"}, "/foo/bar/test.go", true, true}, {[]string{"*.go"}, "/foo/bar/test.go", true, true},
{[]string{"*.go"}, "/foo/bar", false, true},
{[]string{"*.c"}, "/foo/bar/test.go", false, true}, {[]string{"*.c"}, "/foo/bar/test.go", false, true},
{[]string{"*.go", "*.c"}, "/foo/bar/test.go", true, true}, {[]string{"*.go", "*.c"}, "/foo/bar/test.go", true, true},
{[]string{"*"}, "/foo/bar/test.go", true, true}, {[]string{"*"}, "/foo/bar/test.go", true, true},
@ -255,8 +256,25 @@ var filterListTests = []struct {
{[]string{"?"}, "/foo/bar/test.go", false, true}, {[]string{"?"}, "/foo/bar/test.go", false, true},
{[]string{"?", "x"}, "/foo/bar/x", true, true}, {[]string{"?", "x"}, "/foo/bar/x", true, true},
{[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false, false}, {[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false, false},
{[]string{"/*/*/bar/test.*"}, "/foo/bar/bar", false, true},
{[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true, true}, {[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true, true},
{[]string{"", "*.c"}, "/foo/bar/test.go", false, true}, {[]string{"", "*.c"}, "/foo/bar/test.go", false, true},
{[]string{"!**", "*.go"}, "/foo/bar/test.go", true, true},
{[]string{"!**", "*.c"}, "/foo/bar/test.go", false, true},
{[]string{"/foo/*/test.*", "!*.c"}, "/foo/bar/test.c", false, false},
{[]string{"/foo/*/test.*", "!*.c"}, "/foo/bar/test.go", true, true},
{[]string{"/foo/*/*", "!test.*", "*.c"}, "/foo/bar/test.go", false, true},
{[]string{"/foo/*/*", "!test.*", "*.c"}, "/foo/bar/test.c", true, true},
{[]string{"/foo/*/*", "!test.*", "*.c"}, "/foo/bar/file.go", true, true},
{[]string{"/**/*", "!/foo", "/foo/*", "!/foo/bar"}, "/foo/other/test.go", true, true},
{[]string{"/**/*", "!/foo", "/foo/*", "!/foo/bar"}, "/foo/bar", false, false},
{[]string{"/**/*", "!/foo", "/foo/*", "!/foo/bar"}, "/foo/bar/test.go", false, false},
{[]string{"/**/*", "!/foo", "/foo/*", "!/foo/bar"}, "/foo/bar/test.go/child", false, false},
{[]string{"/**/*", "!/foo", "/foo/*", "!/foo/bar", "/foo/bar/test*"}, "/foo/bar/test.go/child", true, true},
{[]string{"/foo/bar/*"}, "/foo", false, true},
{[]string{"/foo/bar/*", "!/foo/bar/[a-m]*"}, "/foo", false, true},
{[]string{"/foo/**/test.c"}, "/foo/bar/foo/bar/test.c", true, true},
{[]string{"/foo/*/test.c"}, "/foo/bar/foo/bar/test.c", false, false},
} }
func TestList(t *testing.T) { func TestList(t *testing.T) {