From ac4b8c98ac30858814db86ce2ea9fac82a4585ba Mon Sep 17 00:00:00 2001 From: yoshiera Date: Thu, 3 Sep 2020 09:18:58 +0800 Subject: [PATCH] Support excluding files by size --- changelog/unreleased/issue-2569 | 8 +++ cmd/restic/cmd_backup.go | 10 +++ cmd/restic/exclude.go | 48 ++++++++++++++ cmd/restic/exclude_test.go | 110 ++++++++++++++++++++++++++++++++ doc/040_backup.rst | 16 +++++ 5 files changed, 192 insertions(+) create mode 100644 changelog/unreleased/issue-2569 diff --git a/changelog/unreleased/issue-2569 b/changelog/unreleased/issue-2569 new file mode 100644 index 000000000..8a609039e --- /dev/null +++ b/changelog/unreleased/issue-2569 @@ -0,0 +1,8 @@ +Enhancement: Support excluding files by their size + +The `backup` command now supports the `--exclude-larger-than` option to exclude files which are +larger than the specified maximum size. This can for example be useful to exclude unimportant +files with a large file size. + +https://github.com/restic/restic/issues/2569 +https://github.com/restic/restic/pull/2914 diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 7aa9a1d09..e42ba81d9 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -87,6 +87,7 @@ type BackupOptions struct { ExcludeOtherFS bool ExcludeIfPresent []string ExcludeCaches bool + ExcludeLargerThan string Stdin bool StdinFilename string Tags []string @@ -115,6 +116,7 @@ func init() { f.BoolVarP(&backupOptions.ExcludeOtherFS, "one-file-system", "x", false, "exclude other file systems") f.StringArrayVar(&backupOptions.ExcludeIfPresent, "exclude-if-present", nil, "takes `filename[:header]`, exclude contents of directories containing filename (except filename itself) if header of that file is as provided (can be specified multiple times)") f.BoolVar(&backupOptions.ExcludeCaches, "exclude-caches", false, `excludes cache directories that are marked with a CACHEDIR.TAG file. See https://bford.info/cachedir/ for the Cache Directory Tagging Standard`) + f.StringVar(&backupOptions.ExcludeLargerThan, "exclude-larger-than", "", "max `size` of the files to be backed up (allowed suffixes: k/K, m/M, g/G, t/T)") f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin") f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin") f.StringArrayVar(&backupOptions.Tags, "tag", nil, "add a `tag` for the new snapshot (can be specified multiple times)") @@ -285,6 +287,14 @@ func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets fs = append(fs, f) } + if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin { + f, err := rejectBySize(opts.ExcludeLargerThan) + if err != nil { + return nil, err + } + fs = append(fs, f) + } + return fs, nil } diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index 2e5349611..cb96d7b5c 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -6,6 +6,7 @@ import ( "io" "os" "path/filepath" + "strconv" "strings" "sync" @@ -292,3 +293,50 @@ func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) { return false }, nil } + +func rejectBySize(maxSizeStr string) (RejectFunc, error) { + maxSize, err := parseSizeStr(maxSizeStr) + if err != nil { + return nil, err + } + + return func(item string, fi os.FileInfo) bool { + // directory will be ignored + if fi.IsDir() { + return false + } + + filesize := fi.Size() + if filesize > maxSize { + debug.Log("file %s is oversize: %d", item, filesize) + return true + } + + return false + }, nil +} + +func parseSizeStr(sizeStr string) (int64, error) { + numStr := sizeStr[:len(sizeStr)-1] + var unit int64 = 1 + + switch sizeStr[len(sizeStr)-1] { + case 'b', 'B': + // use initialized values, do nothing here + case 'k', 'K': + unit = 1024 + case 'm', 'M': + unit = 1024 * 1024 + case 'g', 'G': + unit = 1024 * 1024 * 1024 + case 't', 'T': + unit = 1024 * 1024 * 1024 * 1024 + default: + numStr = sizeStr + } + value, err := strconv.ParseInt(numStr, 10, 64) + if err != nil { + return 0, nil + } + return value * unit, nil +} diff --git a/cmd/restic/exclude_test.go b/cmd/restic/exclude_test.go index 6c8ce6e14..9c48e3e8f 100644 --- a/cmd/restic/exclude_test.go +++ b/cmd/restic/exclude_test.go @@ -189,3 +189,113 @@ func TestMultipleIsExcludedByFile(t *testing.T) { } } } + +func TestParseSizeStr(t *testing.T) { + sizeStrTests := []struct { + in string + expected int64 + }{ + {"1024", 1024}, + {"1024b", 1024}, + {"1024B", 1024}, + {"1k", 1024}, + {"100k", 102400}, + {"100K", 102400}, + {"10M", 10485760}, + {"100m", 104857600}, + {"20G", 21474836480}, + {"10g", 10737418240}, + {"2T", 2199023255552}, + {"2t", 2199023255552}, + } + + for _, tt := range sizeStrTests { + actual, err := parseSizeStr(tt.in) + test.OK(t, err) + + if actual != tt.expected { + t.Errorf("parseSizeStr(%s) = %d; expected %d", tt.in, actual, tt.expected) + } + } +} + +// TestIsExcludedByFileSize is for testing the instance of +// --exclude-larger-than parameters +func TestIsExcludedByFileSize(t *testing.T) { + tempDir, cleanup := test.TempDir(t) + defer cleanup() + + // Max size of file is set to be 1k + maxSizeStr := "1k" + + // Create some files in a temporary directory. + // Files in UPPERCASE will be used as exclusion triggers later on. + // We will test the inclusion later, so we add the expected value as + // a bool. + files := []struct { + path string + size int64 + incl bool + }{ + {"42", 100, true}, + + // everything in foodir except the FOOLARGE tagfile + // should not be included. + {"foodir/FOOLARGE", 2048, false}, + {"foodir/foo", 1002, true}, + {"foodir/foosub/underfoo", 100, true}, + + // everything in bardir except the BARLARGE tagfile + // should not be included. + {"bardir/BARLARGE", 1030, false}, + {"bardir/bar", 1000, true}, + {"bardir/barsub/underbar", 500, true}, + + // everything in bazdir should be included. + {"bazdir/baz", 100, true}, + {"bazdir/bazsub/underbaz", 200, true}, + } + var errs []error + for _, f := range files { + // create directories first, then the file + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) + file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600) + errs = append(errs, err) + if err == nil { + // create a file with given size + errs = append(errs, file.Truncate(f.size)) + } + errs = append(errs, file.Close()) + } + test.OKs(t, errs) // see if anything went wrong during the creation + + // create rejection function + sizeExclude, _ := rejectBySize(maxSizeStr) + + // To mock the archiver scanning walk, we create filepath.WalkFn + // that tests against the two rejection functions and stores + // the result in a map against we can test later. + m := make(map[string]bool) + walk := func(p string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + + excluded := sizeExclude(p, fi) + // the log message helps debugging in case the test fails + t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded) + m[p] = !excluded + return nil + } + // walk through the temporary file and check the error + test.OK(t, filepath.Walk(tempDir, walk)) + + // compare whether the walk gave the expected values for the test cases + for _, f := range files { + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + if m[p] != f.incl { + t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) + } + } +} diff --git a/doc/040_backup.rst b/doc/040_backup.rst index a3a354475..274949010 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -144,6 +144,7 @@ the exclude options are: - ``--exclude-file`` Specified one or more times to exclude items listed in a given file - ``--iexclude-file`` Same as ``exclude-file`` but ignores cases like in ``--iexclude`` - ``--exclude-if-present foo`` Specified one or more times to exclude a folder's content if it contains a file called ``foo`` (optionally having a given header, no wildcards for the file name supported) +- ``--exclude-larger-than size`` Specified once to excludes files larger than the given size Please see ``restic help backup`` for more specific information about each exclude option. @@ -240,6 +241,21 @@ include other filesystems like ``/sys`` and ``/proc``. .. note:: ``--one-file-system`` is currently unsupported on Windows, and will cause the backup to immediately fail with an error. +Files larger than a given size can be excluded using the `--exclude-larger-than` +option: + +.. code-block:: console + + $ restic -r /srv/restic-repo backup ~/work --exclude-larger-than 1M + +This excludes files in ``~/work`` which are larger than 1 MB from the backup. + +The default unit for the size value is bytes, so e.g. ``--exclude-larger-than 2048`` +would exclude files larger than 2048 bytes (2 kilobytes). To specify other units, +suffix the size value with one of ``k``/``K`` for kilobytes, ``m``/``M`` for megabytes, +``g``/``G`` for gigabytes and ``t``/``T`` for terabytes (e.g. ``1k``, ``10K``, ``20m``, +``20M``, ``30g``, ``30G``, ``2t`` or ``2T``). + Including Files ***************