restic/internal/filter/filter.go
Michael Eischer bcc3bddcf4 filter: only check whether a child path could match when necessary
When checking excludes there is no need to test whether a child path
could also match the pattern, as it is by definition excluded.
Previously childMayMatch was calculated but then discarded. For simple
absolute paths this can account for half the time spent for checking
pattern matches.

name                          old time/op    new time/op    delta
FilterPatterns/Relative-4       23.3ms ± 9%    21.7ms ± 6%   -6.68%  (p=0.004 n=10+10)
FilterPatterns/Absolute-4       13.9ms ± 7%    10.0ms ± 5%  -27.61%  (p=0.000 n=10+10)
FilterPatterns/Wildcard-4       51.4ms ± 7%    47.0ms ± 7%   -8.51%  (p=0.001 n=9+9)
FilterPatterns/ManyNoMatch-4     551ms ± 9%     190ms ± 1%  -65.41%  (p=0.000 n=10+8)

name                          old alloc/op   new alloc/op   delta
FilterPatterns/Relative-4       3.57MB ± 0%    3.57MB ± 0%     ~     (p=0.665 n=10+9)
FilterPatterns/Absolute-4       3.57MB ± 0%    3.57MB ± 0%     ~     (p=0.480 n=9+10)
FilterPatterns/Wildcard-4       14.3MB ± 0%    14.3MB ± 0%     ~     (p=0.431 n=9+10)
FilterPatterns/ManyNoMatch-4    3.57MB ± 0%    3.57MB ± 0%     ~     (all equal)

name                          old allocs/op  new allocs/op  delta
FilterPatterns/Relative-4        22.2k ± 0%     22.2k ± 0%     ~     (all equal)
FilterPatterns/Absolute-4        22.2k ± 0%     22.2k ± 0%     ~     (all equal)
FilterPatterns/Wildcard-4        88.7k ± 0%     88.7k ± 0%     ~     (all equal)
FilterPatterns/ManyNoMatch-4     22.2k ± 0%     22.2k ± 0%     ~     (all equal)
2020-10-07 20:47:52 +02:00

242 lines
5.7 KiB
Go

package filter
import (
"path/filepath"
"strings"
"github.com/restic/restic/internal/errors"
)
// ErrBadString is returned when Match is called with the empty string as the
// second argument.
var ErrBadString = errors.New("filter.Match: string is empty")
// Pattern represents a preparsed filter pattern
type Pattern []string
func prepareStr(str string) ([]string, error) {
if str == "" {
return nil, ErrBadString
}
// convert file path separator to '/'
if filepath.Separator != '/' {
str = strings.Replace(str, string(filepath.Separator), "/", -1)
}
return strings.Split(str, "/"), nil
}
func preparePattern(pattern string) Pattern {
pattern = filepath.Clean(pattern)
// convert file path separator to '/'
if filepath.Separator != '/' {
pattern = strings.Replace(pattern, string(filepath.Separator), "/", -1)
}
return strings.Split(pattern, "/")
}
// Match returns true if str matches the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
//
// In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories.
func Match(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if err != nil {
return false, err
}
return match(patterns, strs)
}
// ChildMatch returns true if children of str can match the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
//
// In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories.
func ChildMatch(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if err != nil {
return false, err
}
return childMatch(patterns, strs)
}
func childMatch(patterns Pattern, strs []string) (matched bool, err error) {
if patterns[0] != "" {
// relative pattern can always be nested down
return true, nil
}
ok, pos := hasDoubleWildcard(patterns)
if ok && len(strs) >= pos {
// cut off at the double wildcard
strs = strs[:pos]
}
// match path against absolute pattern prefix
l := 0
if len(strs) > len(patterns) {
l = len(patterns)
} else {
l = len(strs)
}
return match(patterns[0:l], strs)
}
func hasDoubleWildcard(list Pattern) (ok bool, pos int) {
for i, item := range list {
if item == "**" {
return true, i
}
}
return false, 0
}
func match(patterns Pattern, strs []string) (matched bool, err error) {
if ok, pos := hasDoubleWildcard(patterns); ok {
// gradually expand '**' into separate wildcards
newPat := make(Pattern, len(strs))
// copy static prefix once
copy(newPat, patterns[:pos])
for i := 0; i <= len(strs)-len(patterns)+1; i++ {
// limit to static prefix and already appended '*'
newPat := newPat[:pos+i]
// in the first iteration the wildcard expands to nothing
if i > 0 {
newPat[pos+i-1] = "*"
}
newPat = append(newPat, patterns[pos+1:]...)
matched, err := match(newPat, strs)
if err != nil {
return false, err
}
if matched {
return true, nil
}
}
return false, nil
}
if len(patterns) == 0 && len(strs) == 0 {
return true, nil
}
if len(patterns) <= len(strs) {
maxOffset := len(strs) - len(patterns)
// special case absolute patterns
if patterns[0] == "" {
maxOffset = 0
}
outer:
for offset := maxOffset; offset >= 0; offset-- {
for i := len(patterns) - 1; i >= 0; i-- {
ok, err := filepath.Match(patterns[i], strs[offset+i])
if err != nil {
return false, errors.Wrap(err, "Match")
}
if !ok {
continue outer
}
}
return true, nil
}
}
return false, nil
}
// ParsePatterns prepares a list of patterns for use with List.
func ParsePatterns(patterns []string) []Pattern {
patpat := make([]Pattern, 0)
for _, pat := range patterns {
if pat == "" {
continue
}
pats := preparePattern(pat)
patpat = append(patpat, pats)
}
return patpat
}
// List returns true if str matches one of the patterns. Empty patterns are ignored.
func List(patterns []Pattern, str string) (matched bool, err error) {
matched, _, err = list(patterns, false, str)
return matched, err
}
// ListWithChild returns true if str matches one of the patterns. Empty patterns are ignored.
func ListWithChild(patterns []Pattern, str string) (matched bool, childMayMatch bool, err error) {
return list(patterns, true, str)
}
// List returns true if str matches one of the patterns. Empty patterns are ignored.
func list(patterns []Pattern, checkChildMatches bool, str string) (matched bool, childMayMatch bool, err error) {
if len(patterns) == 0 {
return false, false, nil
}
strs, err := prepareStr(str)
if err != nil {
return false, false, err
}
for _, pat := range patterns {
m, err := match(pat, strs)
if err != nil {
return false, false, err
}
var c bool
if checkChildMatches {
c, err = childMatch(pat, strs)
if err != nil {
return false, false, err
}
} else {
c = true
}
matched = matched || m
childMayMatch = childMayMatch || c
if matched && childMayMatch {
return true, true, nil
}
}
return matched, childMayMatch, nil
}