Smarter filter when children won't match

This improves restore performance by several orders of magniture by not
going through the whole tree recursively when we can anticipate that no
match will ever occur.
This commit is contained in:
Loic Nageleisen 2017-06-16 16:46:16 +02:00
parent d87b2f189d
commit 4a36993c19
5 changed files with 145 additions and 21 deletions

View file

@ -417,7 +417,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, args []string) error {
}
selectFilter := func(item string, fi os.FileInfo) bool {
matched, err := filter.List(opts.Excludes, item)
matched, _, err := filter.List(opts.Excludes, item)
if err != nil {
Warnf("error for exclude pattern: %v", err)
}

View file

@ -113,22 +113,22 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
return nil
}
selectExcludeFilter := func(item string, dstpath string, node *restic.Node) bool {
matched, err := filter.List(opts.Exclude, item)
selectExcludeFilter := func(item string, dstpath string, node *restic.Node) (bool, bool) {
matched, childMayMatch, err := filter.List(opts.Exclude, item)
if err != nil {
Warnf("error for exclude pattern: %v", err)
}
return !matched
return !matched, childMayMatch
}
selectIncludeFilter := func(item string, dstpath string, node *restic.Node) bool {
matched, err := filter.List(opts.Include, item)
selectIncludeFilter := func(item string, dstpath string, node *restic.Node) (bool, bool) {
matched, childMayMatch, err := filter.List(opts.Include, item)
if err != nil {
Warnf("error for include pattern: %v", err)
}
return matched
return matched, childMayMatch
}
if len(opts.Exclude) > 0 {

View file

@ -40,6 +40,51 @@ func Match(pattern, str string) (matched bool, err error) {
return match(patterns, strs)
}
// ChildMatch returns true if children of str can match the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
func ChildMatch(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
pattern = filepath.Clean(pattern)
if str == "" {
return false, ErrBadString
}
// convert file path separator to '/'
if filepath.Separator != '/' {
pattern = strings.Replace(pattern, string(filepath.Separator), "/", -1)
str = strings.Replace(str, string(filepath.Separator), "/", -1)
}
patterns := strings.Split(pattern, "/")
strs := strings.Split(str, "/")
return childMatch(patterns, strs)
}
func childMatch(patterns, strs []string) (matched bool, err error) {
if patterns[0] != "" {
// relative pattern can always be nested down
return true, nil
}
// match path against absolute pattern prefix
l := 0
if len(strs) > len(patterns) {
l = len(patterns)
} else {
l = len(strs)
}
return match(patterns[0:l], strs)
}
func hasDoubleWildcard(list []string) (ok bool, pos int) {
for i, item := range list {
if item == "**" {
@ -102,21 +147,29 @@ func match(patterns, strs []string) (matched bool, err error) {
// List returns true if str matches one of the patterns. Empty patterns are
// ignored.
func List(patterns []string, str string) (matched bool, err error) {
func List(patterns []string, str string) (matched bool, childMayMatch bool, err error) {
for _, pat := range patterns {
if pat == "" {
continue
}
matched, err = Match(pat, str)
m, err := Match(pat, str)
if err != nil {
return false, err
return false, false, err
}
if matched {
return true, nil
c, err := ChildMatch(pat, str)
if err != nil {
return false, false, err
}
matched = matched || m
childMayMatch = childMayMatch || c
if matched && childMayMatch {
return true, true, nil
}
}
return false, nil
return matched, childMayMatch, nil
}

View file

@ -124,6 +124,77 @@ func TestMatch(t *testing.T) {
}
}
var childMatchTests = []struct {
pattern string
path string
match bool
}{
{"", "", true},
{"", "/foo", true},
{"", "/x/y/z/foo", true},
{"foo/bar", "/foo", true},
{"baz/bar", "/foo", true},
{"foo", "/foo/bar", true},
{"bar", "/foo", true},
{"baz", "/foo/bar", true},
{"*", "/foo", true},
{"*", "/foo/bar", true},
{"/foo/bar", "/foo", true},
{"/foo/bar/baz", "/foo", true},
{"/foo/bar/baz", "/foo/bar", true},
{"/foo/bar/baz", "/foo/baz", false},
{"/foo/**/baz", "/foo/bar/baz", true},
{"/foo/**/qux", "/foo/bar/baz/qux", true},
{"/baz/bar", "/foo", false},
{"/foo", "/foo/bar", true},
{"/*", "/foo", true},
{"/*", "/foo/bar", true},
{"/foo", "/foo/bar", true},
{"/**", "/foo", true},
{"/*/**", "/foo", true},
{"/*/**", "/foo/bar", true},
{"/*/bar", "/foo", true},
{"/bar/*", "/foo", false},
{"/foo/*/baz", "/foo/bar", true},
{"/foo/*/baz", "/foo/baz", true},
{"/foo/*/baz", "/bar/baz", false},
{"/**/*", "/foo", true},
{"/**/bar", "/foo/bar", true},
}
func testchildpattern(t *testing.T, pattern, path string, shouldMatch bool) {
match, err := filter.ChildMatch(pattern, path)
if err != nil {
t.Errorf("test child pattern %q failed: expected no error for path %q, but error returned: %v",
pattern, path, err)
}
if match != shouldMatch {
t.Errorf("test: filter.ChildMatch(%q, %q): expected %v, got %v",
pattern, path, shouldMatch, match)
}
}
func TestChildMatch(t *testing.T) {
for _, test := range childMatchTests {
testchildpattern(t, test.pattern, test.path, test.match)
// Test with native path separator
if filepath.Separator != '/' {
// Test with pattern as native
pattern := strings.Replace(test.pattern, "/", string(filepath.Separator), -1)
testchildpattern(t, pattern, test.path, test.match)
// Test with path as native
path := strings.Replace(test.path, "/", string(filepath.Separator), -1)
testchildpattern(t, test.pattern, path, test.match)
// Test with both pattern and path as native
testchildpattern(t, pattern, path, test.match)
}
}
}
func ExampleMatch() {
match, _ := filter.Match("*.go", "/home/user/file.go")
fmt.Printf("match: %v\n", match)
@ -157,7 +228,7 @@ var filterListTests = []struct {
func TestList(t *testing.T) {
for i, test := range filterListTests {
match, err := filter.List(test.patterns, test.path)
match, _, err := filter.List(test.patterns, test.path)
if err != nil {
t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v",
i, test.patterns, err)
@ -172,7 +243,7 @@ func TestList(t *testing.T) {
}
func ExampleList() {
match, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go")
match, _, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go")
fmt.Printf("match: %v\n", match)
// Output:
// match: true
@ -271,7 +342,7 @@ func BenchmarkFilterPatterns(b *testing.B) {
for i := 0; i < b.N; i++ {
c = 0
for _, line := range lines {
match, err := filter.List(patterns, line)
match, _, err := filter.List(patterns, line)
if err != nil {
b.Fatal(err)
}

View file

@ -17,7 +17,7 @@ type Restorer struct {
sn *Snapshot
Error func(dir string, node *Node, err error) error
SelectFilter func(item string, dstpath string, node *Node) bool
SelectFilter func(item string, dstpath string, node *Node) (bool, bool)
}
var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { return err }
@ -26,7 +26,7 @@ var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { r
func NewRestorer(repo Repository, id ID) (*Restorer, error) {
r := &Restorer{
repo: repo, Error: restorerAbortOnAllErrors,
SelectFilter: func(string, string, *Node) bool { return true },
SelectFilter: func(string, string, *Node) (bool, bool) { return true, true },
}
var err error
@ -46,9 +46,9 @@ func (res *Restorer) restoreTo(ctx context.Context, dst string, dir string, tree
}
for _, node := range tree.Nodes {
selectedForRestore := res.SelectFilter(filepath.Join(dir, node.Name),
selectedForRestore, childMayMatch := res.SelectFilter(filepath.Join(dir, node.Name),
filepath.Join(dst, dir, node.Name), node)
debug.Log("SelectForRestore returned %v", selectedForRestore)
debug.Log("SelectFilter returned %v %v", selectedForRestore, childMayMatch)
if selectedForRestore {
err := res.restoreNodeTo(ctx, node, dir, dst, idx)
@ -57,7 +57,7 @@ func (res *Restorer) restoreTo(ctx context.Context, dst string, dir string, tree
}
}
if node.Type == "dir" {
if node.Type == "dir" && childMayMatch {
if node.Subtree == nil {
return errors.Errorf("Dir without subtree in tree %v", treeID.Str())
}