Add filter implementation for files
This commit is contained in:
parent
7e0a9aa565
commit
bbdb2ebfa0
4 changed files with 481 additions and 0 deletions
filter
5
filter/doc.go
Normal file
5
filter/doc.go
Normal file
|
@ -0,0 +1,5 @@
|
|||
// Package filter implements filters for files similar to filepath.Glob, but
|
||||
// in contrast to filepath.Glob a pattern may specify directories.
|
||||
//
|
||||
// For a list of valid patterns please see the documentation on filepath.Glob.
|
||||
package filter
|
151
filter/filter.go
Normal file
151
filter/filter.go
Normal file
|
@ -0,0 +1,151 @@
|
|||
package filter
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ErrBadString is returned when Match is called with the empty string as the
|
||||
// second argument.
|
||||
var ErrBadString = errors.New("filter.Match: string is empty")
|
||||
|
||||
// Match returns true if str matches the pattern. When the pattern is
|
||||
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
|
||||
// everything, when str is the empty string ErrBadString is returned.
|
||||
//
|
||||
// Pattern can be a combination of patterns suitable for filepath.Match, joined
|
||||
// by filepath.Separator.
|
||||
func Match(pattern, str string) (matched bool, err error) {
|
||||
if pattern == "" {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if str == "" {
|
||||
return false, ErrBadString
|
||||
}
|
||||
|
||||
patterns := strings.Split(pattern, string(filepath.Separator))
|
||||
strs := strings.Split(str, string(filepath.Separator))
|
||||
|
||||
return match(patterns, strs)
|
||||
}
|
||||
|
||||
func match(patterns, strs []string) (matched bool, err error) {
|
||||
if len(patterns) == 0 && len(strs) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if len(patterns) <= len(strs) {
|
||||
outer:
|
||||
for offset := len(strs) - len(patterns); offset >= 0; offset-- {
|
||||
|
||||
for i := len(patterns) - 1; i >= 0; i-- {
|
||||
ok, err := filepath.Match(patterns[i], strs[offset+i])
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if !ok {
|
||||
continue outer
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// MatchList returns true if str matches one of the patterns.
|
||||
func MatchList(patterns []string, str string) (matched bool, err error) {
|
||||
for _, pat := range patterns {
|
||||
matched, err = Match(pat, str)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if matched {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// matchList returns true if str matches one of the patterns.
|
||||
func matchList(patterns [][]string, str []string) (matched bool, err error) {
|
||||
for _, pat := range patterns {
|
||||
matched, err = match(pat, str)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if matched {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Filter contains include and exclude patterns. If both lists of patterns are
|
||||
// empty, all files are accepted.
|
||||
type Filter struct {
|
||||
include, exclude [][]string
|
||||
}
|
||||
|
||||
// New returns a new filter with the given include/exclude lists of patterns.
|
||||
func New(include, exclude []string) *Filter {
|
||||
f := &Filter{}
|
||||
|
||||
for _, pat := range include {
|
||||
f.include = append(f.include, strings.Split(pat, string(filepath.Separator)))
|
||||
}
|
||||
|
||||
for _, pat := range exclude {
|
||||
f.exclude = append(f.exclude, strings.Split(pat, string(filepath.Separator)))
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// Match tests a filename against the filter. If include and exclude patterns
|
||||
// are both empty, true is returned.
|
||||
//
|
||||
// If only include patterns and no exclude patterns are configured, true is
|
||||
// returned iff name matches one of the include patterns.
|
||||
//
|
||||
// If only exclude patterns and no include patterns are configured, true is
|
||||
// returned iff name does not match all of the exclude patterns.
|
||||
func (f Filter) Match(name string) (matched bool, err error) {
|
||||
if name == "" {
|
||||
return false, ErrBadString
|
||||
}
|
||||
|
||||
if len(f.include) == 0 && len(f.exclude) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
names := strings.Split(name, string(filepath.Separator))
|
||||
if len(f.exclude) == 0 {
|
||||
return matchList(f.include, names)
|
||||
}
|
||||
|
||||
if len(f.include) == 0 {
|
||||
match, err := matchList(f.exclude, names)
|
||||
return !match, err
|
||||
}
|
||||
|
||||
excluded, err := matchList(f.exclude, names)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if !excluded {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return matchList(f.include, names)
|
||||
}
|
325
filter/filter_test.go
Normal file
325
filter/filter_test.go
Normal file
|
@ -0,0 +1,325 @@
|
|||
package filter_test
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/bzip2"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/restic/restic/filter"
|
||||
)
|
||||
|
||||
var matchTests = []struct {
|
||||
pattern string
|
||||
path string
|
||||
match bool
|
||||
}{
|
||||
{"", "", true},
|
||||
{"", "foo", true},
|
||||
{"", "/x/y/z/foo", true},
|
||||
{"*.go", "/foo/bar/test.go", true},
|
||||
{"*.c", "/foo/bar/test.go", false},
|
||||
{"*", "/foo/bar/test.go", true},
|
||||
{"foo*", "/foo/bar/test.go", true},
|
||||
{"bar*", "/foo/bar/test.go", true},
|
||||
{"/bar*", "/foo/bar/test.go", false},
|
||||
{"bar/*", "/foo/bar/test.go", true},
|
||||
{"baz/*", "/foo/bar/test.go", false},
|
||||
{"bar/test.go", "/foo/bar/test.go", true},
|
||||
{"bar/*.go", "/foo/bar/test.go", true},
|
||||
{"ba*/*.go", "/foo/bar/test.go", true},
|
||||
{"bb*/*.go", "/foo/bar/test.go", false},
|
||||
{"test.*", "/foo/bar/test.go", true},
|
||||
{"tesT.*", "/foo/bar/test.go", false},
|
||||
{"bar/*", "/foo/bar/baz", true},
|
||||
{"bar", "/foo/bar", true},
|
||||
{"bar", "/foo/bar/baz", true},
|
||||
{"bar", "/foo/bar/test.go", true},
|
||||
{"/foo/*test.*", "/foo/bar/test.go", false},
|
||||
{"/foo/*/test.*", "/foo/bar/test.go", true},
|
||||
{"/foo/*/bar/test.*", "/foo/bar/test.go", false},
|
||||
{"/*/*/bar/test.*", "/foo/bar/test.go", false},
|
||||
{"/*/*/bar/test.*", "/foo/bar/baz/test.go", false},
|
||||
{"/*/*/baz/test.*", "/foo/bar/baz/test.go", true},
|
||||
{"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false},
|
||||
{"/*/foo/bar/test.*", "/foo/bar/baz/test.go", false},
|
||||
{"/foo/bar/test.*", "bar/baz/test.go", false},
|
||||
{"/x/y/bar/baz/test.*", "bar/baz/test.go", false},
|
||||
{"/x/y/bar/baz/test.c", "bar/baz/test.go", false},
|
||||
{"baz/test.*", "bar/baz/test.go", true},
|
||||
{"baz/tesT.*", "bar/baz/test.go", false},
|
||||
{"test.go", "bar/baz/test.go", true},
|
||||
{"*.go", "bar/baz/test.go", true},
|
||||
{"*.c", "bar/baz/test.go", false},
|
||||
{"sdk", "/foo/bar/sdk", true},
|
||||
{"sdk", "/foo/bar/sdk/test/sdk_foo.go", true},
|
||||
{"sdk/*/cpp/*/*vars*.html", "/usr/share/doc/libreoffice/sdk/docs/cpp/ref/a00517.html", false},
|
||||
}
|
||||
|
||||
func TestMatch(t *testing.T) {
|
||||
for i, test := range matchTests {
|
||||
match, err := filter.Match(test.pattern, test.path)
|
||||
if err != nil {
|
||||
t.Errorf("test %d failed: expected no error for pattern %q, but error returned: %v",
|
||||
i, test.pattern, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if match != test.match {
|
||||
t.Errorf("test %d: filter.Match(%q, %q): expected %v, got %v",
|
||||
i, test.pattern, test.path, test.match, match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleMatch() {
|
||||
match, _ := filter.Match("*.go", "/home/user/file.go")
|
||||
fmt.Printf("match: %v\n", match)
|
||||
// Output:
|
||||
// match: true
|
||||
}
|
||||
|
||||
func ExampleMatch_wildcards() {
|
||||
match, _ := filter.Match("/home/[uU]ser/?.go", "/home/user/F.go")
|
||||
fmt.Printf("match: %v\n", match)
|
||||
// Output:
|
||||
// match: true
|
||||
}
|
||||
|
||||
var filterListTests = []struct {
|
||||
patterns []string
|
||||
path string
|
||||
match bool
|
||||
}{
|
||||
{[]string{"*.go"}, "/foo/bar/test.go", true},
|
||||
{[]string{"*.c"}, "/foo/bar/test.go", false},
|
||||
{[]string{"*.go", "*.c"}, "/foo/bar/test.go", true},
|
||||
{[]string{"*"}, "/foo/bar/test.go", true},
|
||||
{[]string{"x"}, "/foo/bar/test.go", false},
|
||||
{[]string{"?"}, "/foo/bar/test.go", false},
|
||||
{[]string{"?", "x"}, "/foo/bar/x", true},
|
||||
{[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false},
|
||||
{[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true},
|
||||
}
|
||||
|
||||
func TestMatchList(t *testing.T) {
|
||||
for i, test := range filterListTests {
|
||||
match, err := filter.MatchList(test.patterns, test.path)
|
||||
if err != nil {
|
||||
t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v",
|
||||
i, test.patterns, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if match != test.match {
|
||||
t.Errorf("test %d: filter.MatchList(%q, %q): expected %v, got %v",
|
||||
i, test.patterns, test.path, test.match, match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleMatchList() {
|
||||
match, _ := filter.MatchList([]string{"*.c", "*.go"}, "/home/user/file.go")
|
||||
fmt.Printf("match: %v\n", match)
|
||||
// Output:
|
||||
// match: true
|
||||
}
|
||||
|
||||
func extractTestLines(t testing.TB) (lines []string) {
|
||||
f, err := os.Open("testdata/libreoffice.txt.bz2")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err := f.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
sc := bufio.NewScanner(bzip2.NewReader(f))
|
||||
for sc.Scan() {
|
||||
lines = append(lines, sc.Text())
|
||||
}
|
||||
|
||||
return lines
|
||||
}
|
||||
|
||||
func TestFilterPatternsFile(t *testing.T) {
|
||||
lines := extractTestLines(t)
|
||||
|
||||
var testPatterns = []struct {
|
||||
pattern string
|
||||
hits uint
|
||||
}{
|
||||
{"*.html", 18249},
|
||||
{"sdk", 22186},
|
||||
{"sdk/*/cpp/*/*vars.html", 3},
|
||||
}
|
||||
|
||||
for _, test := range testPatterns {
|
||||
var c uint
|
||||
for _, line := range lines {
|
||||
match, err := filter.Match(test.pattern, line)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
continue
|
||||
}
|
||||
|
||||
if match {
|
||||
c++
|
||||
// fmt.Printf("pattern %q, line %q\n", test.pattern, line)
|
||||
}
|
||||
}
|
||||
|
||||
if c != test.hits {
|
||||
t.Errorf("wrong number of hits for pattern %q: want %d, got %d",
|
||||
test.pattern, test.hits, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFilterLines(b *testing.B) {
|
||||
pattern := "sdk/*/cpp/*/*vars.html"
|
||||
lines := extractTestLines(b)
|
||||
var c uint
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
c = 0
|
||||
for _, line := range lines {
|
||||
match, err := filter.Match(pattern, line)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
if match {
|
||||
c++
|
||||
}
|
||||
}
|
||||
|
||||
if c != 3 {
|
||||
b.Fatalf("wrong number of matches: expected 3, got %d", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFilterSingle(b *testing.B) {
|
||||
pattern := "sdk/*/cpp/*/*vars.html"
|
||||
line := "/usr/share/doc/libreoffice/sdk/docs/cpp/ref/a00517.html"
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
filter.Match(pattern, line)
|
||||
}
|
||||
}
|
||||
|
||||
type test struct {
|
||||
path string
|
||||
match bool
|
||||
}
|
||||
|
||||
var filterTests = []struct {
|
||||
include, exclude []string
|
||||
tests []test
|
||||
}{
|
||||
{
|
||||
[]string{"*.go", "/home/user"},
|
||||
[]string{},
|
||||
[]test{
|
||||
{"/home/user/foo/test.c", true},
|
||||
{"/home/user/foo/test.go", true},
|
||||
{"/home/foo/test.go", true},
|
||||
{"/home/foo/test.doc", false},
|
||||
{"/x", false},
|
||||
{"main.go", true},
|
||||
},
|
||||
},
|
||||
{
|
||||
nil,
|
||||
[]string{"*.docx", "*.xlsx"},
|
||||
[]test{
|
||||
{"/home/user/foo/test.c", true},
|
||||
{"/home/user/foo/test.docx", false},
|
||||
{"/home/foo/test.xlsx", false},
|
||||
{"/home/foo/test.doc", true},
|
||||
{"/x", true},
|
||||
{"main.go", true},
|
||||
},
|
||||
},
|
||||
{
|
||||
[]string{"accounting.*", "*Partner*"},
|
||||
[]string{"*.docx", "*.xlsx"},
|
||||
[]test{
|
||||
// {"/home/user/foo/test.c", true},
|
||||
{"/home/user/Partner/test.docx", true},
|
||||
{"/home/user/bar/test.docx", false},
|
||||
{"/home/user/test.xlsx", false},
|
||||
{"/home/foo/test.doc", true},
|
||||
{"/x", true},
|
||||
{"main.go", true},
|
||||
{"/users/A/accounting.xlsx", true},
|
||||
{"/users/A/Calculation Partner.xlsx", true},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestFilter(t *testing.T) {
|
||||
for i, test := range filterTests {
|
||||
f := filter.New(test.include, test.exclude)
|
||||
|
||||
for _, testfile := range test.tests {
|
||||
matched, err := f.Match(testfile.path)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if matched != testfile.match {
|
||||
t.Errorf("test %d: filter.Match(%q): expected %v, got %v",
|
||||
i, testfile.path, testfile.match, matched)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFilter(b *testing.B) {
|
||||
lines := extractTestLines(b)
|
||||
f := filter.New([]string{"sdk", "*.html"}, []string{"*.png"})
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, line := range lines {
|
||||
f.Match(line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFilterInclude(b *testing.B) {
|
||||
lines := extractTestLines(b)
|
||||
f := filter.New([]string{"sdk", "*.html"}, nil)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, line := range lines {
|
||||
f.Match(line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFilterExclude(b *testing.B) {
|
||||
lines := extractTestLines(b)
|
||||
f := filter.New(nil, []string{"*.png"})
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, line := range lines {
|
||||
f.Match(line)
|
||||
}
|
||||
}
|
||||
}
|
BIN
filter/testdata/libreoffice.txt.bz2
vendored
Normal file
BIN
filter/testdata/libreoffice.txt.bz2
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue