restic/internal/filter/filter.go
Michael Eischer b8eacd1364 filter: Reduce redundant path and pattern splitting
A single call to filter.List will split the path only once and also
split each search pattern only once and use it for both match and
childMatch.

name                          old time/op    new time/op    delta
FilterPatterns/Relative-4       62.1ms ±15%    30.3ms ±10%  -51.22%  (p=0.000 n=9+10)
FilterPatterns/Absolute-4        111ms ±10%      49ms ± 3%  -56.08%  (p=0.000 n=10+8)
FilterPatterns/Wildcard-4        393ms ±15%     345ms ± 9%  -12.30%  (p=0.000 n=10+10)
FilterPatterns/ManyNoMatch-4     10.0s ± 3%      3.9s ± 2%  -60.53%  (p=0.000 n=10+9)

name                          old alloc/op   new alloc/op   delta
FilterPatterns/Relative-4       16.4MB ± 0%     4.6MB ± 0%  -71.76%  (p=0.000 n=10+9)
FilterPatterns/Absolute-4       31.4MB ± 0%     8.5MB ± 0%  -72.77%  (p=0.000 n=9+10)
FilterPatterns/Wildcard-4        168MB ± 0%     146MB ± 0%  -13.19%  (p=0.000 n=10+9)
FilterPatterns/ManyNoMatch-4    3.23GB ± 0%    0.91GB ± 0%  -71.96%  (p=0.000 n=10+9)

name                          old allocs/op  new allocs/op  delta
FilterPatterns/Relative-4         178k ± 0%       67k ± 0%  -62.50%  (p=0.000 n=10+10)
FilterPatterns/Absolute-4         266k ± 0%       89k ± 0%  -66.67%  (p=0.000 n=10+10)
FilterPatterns/Wildcard-4        1.87M ± 0%     1.70M ± 0%   -9.47%  (p=0.000 n=10+10)
FilterPatterns/ManyNoMatch-4     17.7M ± 0%      4.5M ± 0%  -74.87%  (p=0.000 n=9+10)
2020-10-07 18:13:19 +02:00

208 lines
4.6 KiB
Go

package filter
import (
"path/filepath"
"strings"
"github.com/restic/restic/internal/errors"
)
// ErrBadString is returned when Match is called with the empty string as the
// second argument.
var ErrBadString = errors.New("filter.Match: string is empty")
type filterPattern []string
func prepareStr(str string) ([]string, error) {
if str == "" {
return nil, ErrBadString
}
// convert file path separator to '/'
if filepath.Separator != '/' {
str = strings.Replace(str, string(filepath.Separator), "/", -1)
}
return strings.Split(str, "/"), nil
}
func preparePattern(pattern string) filterPattern {
pattern = filepath.Clean(pattern)
// convert file path separator to '/'
if filepath.Separator != '/' {
pattern = strings.Replace(pattern, string(filepath.Separator), "/", -1)
}
return strings.Split(pattern, "/")
}
// Match returns true if str matches the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
//
// In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories.
func Match(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if err != nil {
return false, err
}
return match(patterns, strs)
}
// ChildMatch returns true if children of str can match the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
//
// In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories.
func ChildMatch(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if err != nil {
return false, err
}
return childMatch(patterns, strs)
}
func childMatch(patterns, strs []string) (matched bool, err error) {
if patterns[0] != "" {
// relative pattern can always be nested down
return true, nil
}
ok, pos := hasDoubleWildcard(patterns)
if ok && len(strs) >= pos {
// cut off at the double wildcard
strs = strs[:pos]
}
// match path against absolute pattern prefix
l := 0
if len(strs) > len(patterns) {
l = len(patterns)
} else {
l = len(strs)
}
return match(patterns[0:l], strs)
}
func hasDoubleWildcard(list []string) (ok bool, pos int) {
for i, item := range list {
if item == "**" {
return true, i
}
}
return false, 0
}
func match(patterns, strs []string) (matched bool, err error) {
if ok, pos := hasDoubleWildcard(patterns); ok {
// gradually expand '**' into separate wildcards
for i := 0; i <= len(strs)-len(patterns)+1; i++ {
newPat := make([]string, pos)
copy(newPat, patterns[:pos])
for k := 0; k < i; k++ {
newPat = append(newPat, "*")
}
newPat = append(newPat, patterns[pos+1:]...)
matched, err := match(newPat, strs)
if err != nil {
return false, err
}
if matched {
return true, nil
}
}
return false, nil
}
if len(patterns) == 0 && len(strs) == 0 {
return true, nil
}
if len(patterns) <= len(strs) {
outer:
for offset := len(strs) - len(patterns); offset >= 0; offset-- {
for i := len(patterns) - 1; i >= 0; i-- {
ok, err := filepath.Match(patterns[i], strs[offset+i])
if err != nil {
return false, errors.Wrap(err, "Match")
}
if !ok {
continue outer
}
}
return true, nil
}
}
return false, nil
}
// List returns true if str matches one of the patterns. Empty patterns are
// ignored.
func List(patterns []string, str string) (matched bool, childMayMatch bool, err error) {
if len(patterns) == 0 {
return false, false, nil
}
strs, err := prepareStr(str)
if err != nil {
return false, false, err
}
for _, pat := range patterns {
if pat == "" {
continue
}
pats := preparePattern(pat)
m, err := match(pats, strs)
if err != nil {
return false, false, err
}
c, err := childMatch(pats, strs)
if err != nil {
return false, false, err
}
matched = matched || m
childMayMatch = childMayMatch || c
if matched && childMayMatch {
return true, true, nil
}
}
return matched, childMayMatch, nil
}