From 7fd52f9f5701151bffa7fc5a9503cc8024cac27c Mon Sep 17 00:00:00 2001
From: Alexander Neumann <alexander@bumpern.de>
Date: Mon, 20 Jul 2015 00:13:39 +0200
Subject: [PATCH] Add exclude filter to archiver and 'backup' command

---
 archiver.go                    | 19 +++++--
 cmd/restic/cmd_backup.go       | 24 ++++++---
 cmd/restic/cmd_ls.go           | 18 ++++---
 cmd/restic/integration_test.go | 97 +++++++++++++++++++++++++++++++++-
 filter/filter.go               |  4 +-
 filter/filter_test.go          |  6 +--
 pipe/pipe.go                   | 28 +++++++---
 pipe/pipe_test.go              | 12 +++--
 walk_test.go                   |  7 ++-
 9 files changed, 179 insertions(+), 36 deletions(-)

diff --git a/archiver.go b/archiver.go
index 69b1d0b4a..1182a85c5 100644
--- a/archiver.go
+++ b/archiver.go
@@ -34,8 +34,8 @@ type Archiver struct {
 
 	blobToken chan struct{}
 
-	Error  func(dir string, fi os.FileInfo, err error) error
-	Filter func(item string, fi os.FileInfo) bool
+	Error        func(dir string, fi os.FileInfo, err error) error
+	SelectFilter pipe.SelectFunc
 }
 
 // NewArchiver returns a new archiver.
@@ -50,7 +50,7 @@ func NewArchiver(repo *repository.Repository) *Archiver {
 	}
 
 	arch.Error = archiverAbortOnAllErrors
-	arch.Filter = archiverAllowAllFiles
+	arch.SelectFilter = archiverAllowAllFiles
 
 	return arch
 }
@@ -577,7 +577,7 @@ func (arch *Archiver) Snapshot(p *Progress, paths []string, parentID backend.ID)
 	pipeCh := make(chan pipe.Job)
 	resCh := make(chan pipe.Result, 1)
 	go func() {
-		err := pipe.Walk(paths, done, pipeCh, resCh)
+		err := pipe.Walk(paths, arch.SelectFilter, done, pipeCh, resCh)
 		if err != nil {
 			debug.Log("Archiver.Snapshot", "pipe.Walk returned error %v", err)
 			return
@@ -659,7 +659,7 @@ func isRegularFile(fi os.FileInfo) bool {
 
 // Scan traverses the dirs to collect Stat information while emitting progress
 // information with p.
-func Scan(dirs []string, p *Progress) (Stat, error) {
+func Scan(dirs []string, filter pipe.SelectFunc, p *Progress) (Stat, error) {
 	p.Start()
 	defer p.Done()
 
@@ -678,6 +678,15 @@ func Scan(dirs []string, p *Progress) (Stat, error) {
 				fmt.Fprintf(os.Stderr, "error for %v: FileInfo is nil\n", str)
 				return nil
 			}
+
+			if !filter(str, fi) {
+				debug.Log("Scan.Walk", "path %v excluded", str)
+				if fi.IsDir() {
+					return filepath.SkipDir
+				}
+				return nil
+			}
+
 			s := Stat{}
 			if fi.IsDir() {
 				s.Dirs++
diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go
index 99c91aef3..3e18c3402 100644
--- a/cmd/restic/cmd_backup.go
+++ b/cmd/restic/cmd_backup.go
@@ -10,13 +10,15 @@ import (
 
 	"github.com/restic/restic"
 	"github.com/restic/restic/backend"
+	"github.com/restic/restic/filter"
 	"github.com/restic/restic/repository"
 	"golang.org/x/crypto/ssh/terminal"
 )
 
 type CmdBackup struct {
-	Parent string `short:"p" long:"parent"    description:"use this parent snapshot (default: last snapshot in repo that has the same target)"`
-	Force  bool   `short:"f" long:"force" description:"Force re-reading the target. Overrides the \"parent\" flag"`
+	Parent  string   `short:"p" long:"parent"  description:"use this parent snapshot (default: last snapshot in repo that has the same target)"`
+	Force   bool     `short:"f" long:"force"   description:"Force re-reading the target. Overrides the \"parent\" flag"`
+	Exclude []string `short:"e" long:"exclude" description:"Exclude a pattern (can be specified multiple times)"`
 
 	global *GlobalOptions
 }
@@ -282,14 +284,22 @@ func (cmd CmdBackup) Execute(args []string) error {
 
 	cmd.global.Verbosef("scan %v\n", target)
 
-	stat, err := restic.Scan(target, cmd.newScanProgress())
+	selectFilter := func(item string, fi os.FileInfo) bool {
+		matched, err := filter.List(cmd.Exclude, item)
+		if err != nil {
+			cmd.global.Warnf("error for exclude pattern: %v", err)
+		}
 
-	// TODO: add filter
-	// arch.Filter = func(dir string, fi os.FileInfo) bool {
-	// 	return true
-	// }
+		return !matched
+	}
+
+	stat, err := restic.Scan(target, selectFilter, cmd.newScanProgress())
+	if err != nil {
+		return err
+	}
 
 	arch := restic.NewArchiver(repo)
+	arch.SelectFilter = selectFilter
 
 	arch.Error = func(dir string, fi os.FileInfo, err error) error {
 		// TODO: make ignoring errors configurable
diff --git a/cmd/restic/cmd_ls.go b/cmd/restic/cmd_ls.go
index 8ec904bbd..91c9507e3 100644
--- a/cmd/restic/cmd_ls.go
+++ b/cmd/restic/cmd_ls.go
@@ -11,6 +11,8 @@ import (
 )
 
 type CmdLs struct {
+	Long bool `short:"l" long:"long" description:"Use a long listing format showing size and mode"`
+
 	global *GlobalOptions
 }
 
@@ -24,7 +26,11 @@ func init() {
 	}
 }
 
-func printNode(prefix string, n *restic.Node) string {
+func (cmd CmdLs) printNode(prefix string, n *restic.Node) string {
+	if !cmd.Long {
+		return filepath.Join(prefix, n.Name)
+	}
+
 	switch n.Type {
 	case "file":
 		return fmt.Sprintf("%s %5d %5d %6d %s %s",
@@ -40,17 +46,17 @@ func printNode(prefix string, n *restic.Node) string {
 	}
 }
 
-func printTree(prefix string, repo *repository.Repository, id backend.ID) error {
+func (cmd CmdLs) printTree(prefix string, repo *repository.Repository, id backend.ID) error {
 	tree, err := restic.LoadTree(repo, id)
 	if err != nil {
 		return err
 	}
 
 	for _, entry := range tree.Nodes {
-		fmt.Println(printNode(prefix, entry))
+		cmd.global.Printf(cmd.printNode(prefix, entry) + "\n")
 
 		if entry.Type == "dir" && entry.Subtree != nil {
-			err = printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree)
+			err = cmd.printTree(filepath.Join(prefix, entry.Name), repo, entry.Subtree)
 			if err != nil {
 				return err
 			}
@@ -89,7 +95,7 @@ func (cmd CmdLs) Execute(args []string) error {
 		return err
 	}
 
-	fmt.Printf("snapshot of %v at %s:\n", sn.Paths, sn.Time)
+	cmd.global.Verbosef("snapshot of %v at %s:\n", sn.Paths, sn.Time)
 
-	return printTree("", repo, sn.Tree)
+	return cmd.printTree("", repo, sn.Tree)
 }
diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go
index b06b85745..21323e8cc 100644
--- a/cmd/restic/integration_test.go
+++ b/cmd/restic/integration_test.go
@@ -10,6 +10,7 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
+	"strings"
 	"syscall"
 	"testing"
 	"time"
@@ -44,7 +45,11 @@ func cmdInit(t testing.TB, global GlobalOptions) {
 }
 
 func cmdBackup(t testing.TB, global GlobalOptions, target []string, parentID backend.ID) {
-	cmd := &CmdBackup{global: &global}
+	cmdBackupExcludes(t, global, target, parentID, nil)
+}
+
+func cmdBackupExcludes(t testing.TB, global GlobalOptions, target []string, parentID backend.ID, excludes []string) {
+	cmd := &CmdBackup{global: &global, Exclude: excludes}
 	cmd.Parent = parentID.String()
 
 	t.Logf("backing up %v", target)
@@ -73,6 +78,16 @@ func cmdCheck(t testing.TB, global GlobalOptions) {
 	OK(t, cmd.Execute(nil))
 }
 
+func cmdLs(t testing.TB, global GlobalOptions, snapshotID string) []string {
+	var buf bytes.Buffer
+	global.stdout = &buf
+
+	cmd := &CmdLs{global: &global}
+	OK(t, cmd.Execute([]string{snapshotID}))
+
+	return strings.Split(string(buf.Bytes()), "\n")
+}
+
 func TestBackup(t *testing.T) {
 	withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) {
 		datafile := filepath.Join("testdata", "backup-data.tar.gz")
@@ -237,6 +252,86 @@ func TestBackupMissingFile2(t *testing.T) {
 	})
 }
 
+func includes(haystack []string, needle string) bool {
+	for _, s := range haystack {
+		if s == needle {
+			return true
+		}
+	}
+
+	return false
+}
+
+func loadSnapshotMap(t testing.TB, global GlobalOptions) map[string]struct{} {
+	snapshotIDs := cmdList(t, global, "snapshots")
+
+	m := make(map[string]struct{})
+	for _, id := range snapshotIDs {
+		m[id.String()] = struct{}{}
+	}
+
+	return m
+}
+
+func lastSnapshot(old, new map[string]struct{}) (map[string]struct{}, string) {
+	for k := range new {
+		if _, ok := old[k]; !ok {
+			old[k] = struct{}{}
+			return old, k
+		}
+	}
+
+	return old, ""
+}
+
+var backupExcludeFilenames = []string{
+	"testfile1",
+	"foo.tar.gz",
+	"private/secret/passwords.txt",
+	"work/source/test.c",
+}
+
+func TestBackupExclude(t *testing.T) {
+	withTestEnvironment(t, func(env *testEnvironment, global GlobalOptions) {
+		cmdInit(t, global)
+
+		datadir := filepath.Join(env.base, "testdata")
+
+		for _, filename := range backupExcludeFilenames {
+			fp := filepath.Join(datadir, filename)
+			OK(t, os.MkdirAll(filepath.Dir(fp), 0755))
+
+			f, err := os.Create(fp)
+			OK(t, err)
+
+			fmt.Fprintf(f, filename)
+			OK(t, f.Close())
+		}
+
+		snapshots := make(map[string]struct{})
+
+		cmdBackup(t, global, []string{datadir}, nil)
+		snapshots, snapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, global))
+		files := cmdLs(t, global, snapshotID)
+		Assert(t, includes(files, filepath.Join("testdata", "foo.tar.gz")),
+			"expected file %q in first snapshot, but it's not included", "foo.tar.gz")
+
+		cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz"})
+		snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global))
+		files = cmdLs(t, global, snapshotID)
+		Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")),
+			"expected file %q not in first snapshot, but it's included", "foo.tar.gz")
+
+		cmdBackupExcludes(t, global, []string{datadir}, nil, []string{"*.tar.gz", "private/secret"})
+		snapshots, snapshotID = lastSnapshot(snapshots, loadSnapshotMap(t, global))
+		files = cmdLs(t, global, snapshotID)
+		Assert(t, !includes(files, filepath.Join("testdata", "foo.tar.gz")),
+			"expected file %q not in first snapshot, but it's included", "foo.tar.gz")
+		Assert(t, !includes(files, filepath.Join("testdata", "private", "secret", "passwords.txt")),
+			"expected file %q not in first snapshot, but it's included", "passwords.txt")
+	})
+}
+
 const (
 	incrementalFirstWrite  = 20 * 1042 * 1024
 	incrementalSecondWrite = 12 * 1042 * 1024
diff --git a/filter/filter.go b/filter/filter.go
index 274d1f960..f8c335e34 100644
--- a/filter/filter.go
+++ b/filter/filter.go
@@ -91,8 +91,8 @@ func match(patterns, strs []string) (matched bool, err error) {
 	return false, nil
 }
 
-// MatchList returns true if str matches one of the patterns.
-func MatchList(patterns []string, str string) (matched bool, err error) {
+// List returns true if str matches one of the patterns.
+func List(patterns []string, str string) (matched bool, err error) {
 	for _, pat := range patterns {
 		matched, err = Match(pat, str)
 		if err != nil {
diff --git a/filter/filter_test.go b/filter/filter_test.go
index ce9474efc..78e731b68 100644
--- a/filter/filter_test.go
+++ b/filter/filter_test.go
@@ -121,7 +121,7 @@ var filterListTests = []struct {
 
 func TestMatchList(t *testing.T) {
 	for i, test := range filterListTests {
-		match, err := filter.MatchList(test.patterns, test.path)
+		match, err := filter.List(test.patterns, test.path)
 		if err != nil {
 			t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v",
 				i, test.patterns, err)
@@ -136,7 +136,7 @@ func TestMatchList(t *testing.T) {
 }
 
 func ExampleMatchList() {
-	match, _ := filter.MatchList([]string{"*.c", "*.go"}, "/home/user/file.go")
+	match, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go")
 	fmt.Printf("match: %v\n", match)
 	// Output:
 	// match: true
@@ -235,7 +235,7 @@ func BenchmarkFilterPatterns(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		c = 0
 		for _, line := range lines {
-			match, err := filter.MatchList(patterns, line)
+			match, err := filter.List(patterns, line)
 			if err != nil {
 				b.Fatal(err)
 			}
diff --git a/pipe/pipe.go b/pipe/pipe.go
index a419f082d..4e9908315 100644
--- a/pipe/pipe.go
+++ b/pipe/pipe.go
@@ -82,13 +82,22 @@ func isFile(fi os.FileInfo) bool {
 
 var errCancelled = errors.New("walk cancelled")
 
-func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
+// SelectFunc returns true for all items that should be included (files and
+// dirs). If false is returned, files are ignored and dirs are not even walked.
+type SelectFunc func(item string, fi os.FileInfo) bool
+
+func walk(basedir, dir string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
 	info, err := os.Lstat(dir)
 	if err != nil {
 		debug.Log("pipe.walk", "error for %v: %v", dir, err)
 		return err
 	}
 
+	if !selectFunc(dir, info) {
+		debug.Log("pipe.walk", "file %v excluded by filter", dir)
+		return nil
+	}
+
 	relpath, _ := filepath.Rel(basedir, dir)
 
 	if !info.IsDir() {
@@ -114,13 +123,18 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R
 	for _, name := range names {
 		subpath := filepath.Join(dir, name)
 
+		fi, statErr := os.Lstat(subpath)
+		if !selectFunc(subpath, fi) {
+			debug.Log("pipe.walk", "file %v excluded by filter", subpath)
+			continue
+		}
+
 		ch := make(chan Result, 1)
 		entries = append(entries, ch)
 
-		fi, err := os.Lstat(subpath)
-		if err != nil {
+		if statErr != nil {
 			select {
-			case jobs <- Entry{info: fi, error: err, basedir: basedir, path: filepath.Join(relpath, name), result: ch}:
+			case jobs <- Entry{info: fi, error: statErr, basedir: basedir, path: filepath.Join(relpath, name), result: ch}:
 			case <-done:
 				return errCancelled
 			}
@@ -132,7 +146,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R
 		debug.RunHook("pipe.walk2", filepath.Join(relpath, name))
 
 		if isDir(fi) {
-			err = walk(basedir, subpath, done, jobs, ch)
+			err = walk(basedir, subpath, selectFunc, done, jobs, ch)
 			if err != nil {
 				return err
 			}
@@ -156,7 +170,7 @@ func walk(basedir, dir string, done chan struct{}, jobs chan<- Job, res chan<- R
 
 // Walk sends a Job for each file and directory it finds below the paths. When
 // the channel done is closed, processing stops.
-func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
+func Walk(paths []string, selectFunc SelectFunc, done chan struct{}, jobs chan<- Job, res chan<- Result) error {
 	defer func() {
 		debug.Log("pipe.Walk", "output channel closed")
 		close(jobs)
@@ -166,7 +180,7 @@ func Walk(paths []string, done chan struct{}, jobs chan<- Job, res chan<- Result
 	for _, path := range paths {
 		debug.Log("pipe.Walk", "start walker for %v", path)
 		ch := make(chan Result, 1)
-		err := walk(filepath.Dir(path), path, done, jobs, ch)
+		err := walk(filepath.Dir(path), path, selectFunc, done, jobs, ch)
 		if err != nil {
 			debug.Log("pipe.Walk", "error for %v: %v", path, err)
 			continue
diff --git a/pipe/pipe_test.go b/pipe/pipe_test.go
index 42ff7c31d..001015938 100644
--- a/pipe/pipe_test.go
+++ b/pipe/pipe_test.go
@@ -19,6 +19,10 @@ type stats struct {
 	dirs, files int
 }
 
+func acceptAll(string, os.FileInfo) bool {
+	return true
+}
+
 func statPath(path string) (stats, error) {
 	var s stats
 
@@ -118,7 +122,7 @@ func TestPipelineWalkerWithSplit(t *testing.T) {
 	}()
 
 	resCh := make(chan pipe.Result, 1)
-	err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh)
+	err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh)
 	OK(t, err)
 
 	// wait for all workers to terminate
@@ -198,7 +202,7 @@ func TestPipelineWalker(t *testing.T) {
 	}
 
 	resCh := make(chan pipe.Result, 1)
-	err = pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh)
+	err = pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh)
 	OK(t, err)
 
 	// wait for all workers to terminate
@@ -298,7 +302,7 @@ func BenchmarkPipelineWalker(b *testing.B) {
 		}()
 
 		resCh := make(chan pipe.Result, 1)
-		err := pipe.Walk([]string{TestWalkerPath}, done, jobs, resCh)
+		err := pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh)
 		OK(b, err)
 
 		// wait for all workers to terminate
@@ -375,7 +379,7 @@ func TestPipelineWalkerMultiple(t *testing.T) {
 	}
 
 	resCh := make(chan pipe.Result, 1)
-	err = pipe.Walk(paths, done, jobs, resCh)
+	err = pipe.Walk(paths, acceptAll, done, jobs, resCh)
 	OK(t, err)
 
 	// wait for all workers to terminate
diff --git a/walk_test.go b/walk_test.go
index 397655978..4e0f8b930 100644
--- a/walk_test.go
+++ b/walk_test.go
@@ -1,6 +1,7 @@
 package restic_test
 
 import (
+	"os"
 	"path/filepath"
 	"testing"
 
@@ -33,7 +34,11 @@ func TestWalkTree(t *testing.T) {
 	// start filesystem walker
 	fsJobs := make(chan pipe.Job)
 	resCh := make(chan pipe.Result, 1)
-	go pipe.Walk(dirs, done, fsJobs, resCh)
+
+	f := func(string, os.FileInfo) bool {
+		return true
+	}
+	go pipe.Walk(dirs, f, done, fsJobs, resCh)
 
 	for {
 		// receive fs job