Support excluding files by size

This commit is contained in:
yoshiera 2020-09-03 09:18:58 +08:00
parent 4dcd6abf37
commit ac4b8c98ac
5 changed files with 192 additions and 0 deletions

View file

@ -0,0 +1,8 @@
Enhancement: Support excluding files by their size
The `backup` command now supports the `--exclude-larger-than` option to exclude files which are
larger than the specified maximum size. This can for example be useful to exclude unimportant
files with a large file size.
https://github.com/restic/restic/issues/2569
https://github.com/restic/restic/pull/2914

View file

@ -87,6 +87,7 @@ type BackupOptions struct {
ExcludeOtherFS bool
ExcludeIfPresent []string
ExcludeCaches bool
ExcludeLargerThan string
Stdin bool
StdinFilename string
Tags []string
@ -115,6 +116,7 @@ func init() {
f.BoolVarP(&backupOptions.ExcludeOtherFS, "one-file-system", "x", false, "exclude other file systems")
f.StringArrayVar(&backupOptions.ExcludeIfPresent, "exclude-if-present", nil, "takes `filename[:header]`, exclude contents of directories containing filename (except filename itself) if header of that file is as provided (can be specified multiple times)")
f.BoolVar(&backupOptions.ExcludeCaches, "exclude-caches", false, `excludes cache directories that are marked with a CACHEDIR.TAG file. See https://bford.info/cachedir/ for the Cache Directory Tagging Standard`)
f.StringVar(&backupOptions.ExcludeLargerThan, "exclude-larger-than", "", "max `size` of the files to be backed up (allowed suffixes: k/K, m/M, g/G, t/T)")
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
f.StringArrayVar(&backupOptions.Tags, "tag", nil, "add a `tag` for the new snapshot (can be specified multiple times)")
@ -285,6 +287,14 @@ func collectRejectFuncs(opts BackupOptions, repo *repository.Repository, targets
fs = append(fs, f)
}
if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin {
f, err := rejectBySize(opts.ExcludeLargerThan)
if err != nil {
return nil, err
}
fs = append(fs, f)
}
return fs, nil
}

View file

@ -6,6 +6,7 @@ import (
"io"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
@ -292,3 +293,50 @@ func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) {
return false
}, nil
}
func rejectBySize(maxSizeStr string) (RejectFunc, error) {
maxSize, err := parseSizeStr(maxSizeStr)
if err != nil {
return nil, err
}
return func(item string, fi os.FileInfo) bool {
// directory will be ignored
if fi.IsDir() {
return false
}
filesize := fi.Size()
if filesize > maxSize {
debug.Log("file %s is oversize: %d", item, filesize)
return true
}
return false
}, nil
}
func parseSizeStr(sizeStr string) (int64, error) {
numStr := sizeStr[:len(sizeStr)-1]
var unit int64 = 1
switch sizeStr[len(sizeStr)-1] {
case 'b', 'B':
// use initialized values, do nothing here
case 'k', 'K':
unit = 1024
case 'm', 'M':
unit = 1024 * 1024
case 'g', 'G':
unit = 1024 * 1024 * 1024
case 't', 'T':
unit = 1024 * 1024 * 1024 * 1024
default:
numStr = sizeStr
}
value, err := strconv.ParseInt(numStr, 10, 64)
if err != nil {
return 0, nil
}
return value * unit, nil
}

View file

@ -189,3 +189,113 @@ func TestMultipleIsExcludedByFile(t *testing.T) {
}
}
}
func TestParseSizeStr(t *testing.T) {
sizeStrTests := []struct {
in string
expected int64
}{
{"1024", 1024},
{"1024b", 1024},
{"1024B", 1024},
{"1k", 1024},
{"100k", 102400},
{"100K", 102400},
{"10M", 10485760},
{"100m", 104857600},
{"20G", 21474836480},
{"10g", 10737418240},
{"2T", 2199023255552},
{"2t", 2199023255552},
}
for _, tt := range sizeStrTests {
actual, err := parseSizeStr(tt.in)
test.OK(t, err)
if actual != tt.expected {
t.Errorf("parseSizeStr(%s) = %d; expected %d", tt.in, actual, tt.expected)
}
}
}
// TestIsExcludedByFileSize is for testing the instance of
// --exclude-larger-than parameters
func TestIsExcludedByFileSize(t *testing.T) {
tempDir, cleanup := test.TempDir(t)
defer cleanup()
// Max size of file is set to be 1k
maxSizeStr := "1k"
// Create some files in a temporary directory.
// Files in UPPERCASE will be used as exclusion triggers later on.
// We will test the inclusion later, so we add the expected value as
// a bool.
files := []struct {
path string
size int64
incl bool
}{
{"42", 100, true},
// everything in foodir except the FOOLARGE tagfile
// should not be included.
{"foodir/FOOLARGE", 2048, false},
{"foodir/foo", 1002, true},
{"foodir/foosub/underfoo", 100, true},
// everything in bardir except the BARLARGE tagfile
// should not be included.
{"bardir/BARLARGE", 1030, false},
{"bardir/bar", 1000, true},
{"bardir/barsub/underbar", 500, true},
// everything in bazdir should be included.
{"bazdir/baz", 100, true},
{"bazdir/bazsub/underbaz", 200, true},
}
var errs []error
for _, f := range files {
// create directories first, then the file
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700))
file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600)
errs = append(errs, err)
if err == nil {
// create a file with given size
errs = append(errs, file.Truncate(f.size))
}
errs = append(errs, file.Close())
}
test.OKs(t, errs) // see if anything went wrong during the creation
// create rejection function
sizeExclude, _ := rejectBySize(maxSizeStr)
// To mock the archiver scanning walk, we create filepath.WalkFn
// that tests against the two rejection functions and stores
// the result in a map against we can test later.
m := make(map[string]bool)
walk := func(p string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
excluded := sizeExclude(p, fi)
// the log message helps debugging in case the test fails
t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded)
m[p] = !excluded
return nil
}
// walk through the temporary file and check the error
test.OK(t, filepath.Walk(tempDir, walk))
// compare whether the walk gave the expected values for the test cases
for _, f := range files {
p := filepath.Join(tempDir, filepath.FromSlash(f.path))
if m[p] != f.incl {
t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p])
}
}
}

View file

@ -144,6 +144,7 @@ the exclude options are:
- ``--exclude-file`` Specified one or more times to exclude items listed in a given file
- ``--iexclude-file`` Same as ``exclude-file`` but ignores cases like in ``--iexclude``
- ``--exclude-if-present foo`` Specified one or more times to exclude a folder's content if it contains a file called ``foo`` (optionally having a given header, no wildcards for the file name supported)
- ``--exclude-larger-than size`` Specified once to excludes files larger than the given size
Please see ``restic help backup`` for more specific information about each exclude option.
@ -240,6 +241,21 @@ include other filesystems like ``/sys`` and ``/proc``.
.. note:: ``--one-file-system`` is currently unsupported on Windows, and will
cause the backup to immediately fail with an error.
Files larger than a given size can be excluded using the `--exclude-larger-than`
option:
.. code-block:: console
$ restic -r /srv/restic-repo backup ~/work --exclude-larger-than 1M
This excludes files in ``~/work`` which are larger than 1 MB from the backup.
The default unit for the size value is bytes, so e.g. ``--exclude-larger-than 2048``
would exclude files larger than 2048 bytes (2 kilobytes). To specify other units,
suffix the size value with one of ``k``/``K`` for kilobytes, ``m``/``M`` for megabytes,
``g``/``G`` for gigabytes and ``t``/``T`` for terabytes (e.g. ``1k``, ``10K``, ``20m``,
``20M``, ``30g``, ``30G``, ``2t`` or ``2T``).
Including Files
***************