backup: add option for file read concurrency

This commit is contained in:
Kyle Brennan 2020-03-19 00:44:34 +00:00 committed by Michael Eischer
parent 9ec7eee803
commit 4a501d7118
4 changed files with 33 additions and 4 deletions

View file

@ -0,0 +1,5 @@
Enhancement: Make backup file read concurrency configurable
In order to tune restic for special situations we need to be able to configure `backup --file-read-concurrency`.
https://github.com/restic/restic/pull/2750

View file

@ -11,6 +11,7 @@ import (
"path"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
@ -100,6 +101,7 @@ type BackupOptions struct {
IgnoreCtime bool
UseFsSnapshot bool
DryRun bool
FileReadConcurrency uint
}
var backupOptions BackupOptions
@ -108,6 +110,12 @@ var backupOptions BackupOptions
var ErrInvalidSourceData = errors.New("at least one source file could not be read")
func init() {
//set FileReadConcurrency to 2 if not set in env
fileReadConcurrency, err := strconv.ParseUint(os.Getenv("RESTIC_FILE_READ_CONCURRENCY"), 10, 32)
if err != nil || fileReadConcurrency < 1 {
fileReadConcurrency = 2
}
cmdRoot.AddCommand(cmdBackup)
f := cmdBackup.Flags()
@ -124,15 +132,14 @@ func init() {
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
f.UintVar(&backupOptions.FileReadConcurrency, "file-read-concurrency", uint(fileReadConcurrency), "set concurrency on file reads. (default: $RESTIC_FILE_READ_CONCURRENCY or 2)")
f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
f.StringVar(&backupOptions.Host, "hostname", "", "set the `hostname` for the snapshot manually")
err := f.MarkDeprecated("hostname", "use --host")
err = f.MarkDeprecated("hostname", "use --host")
if err != nil {
// MarkDeprecated only returns an error when the flag could not be found
panic(err)
}
f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromVerbatim, "files-from-verbatim", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromRaw, "files-from-raw", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
@ -144,6 +151,7 @@ func init() {
if runtime.GOOS == "windows" {
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
}
}
// filterExisting returns a slice of all existing items, or an error if no
@ -284,6 +292,10 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
}
}
if backupOptions.FileReadConcurrency == 0 {
return errors.Fatal("--file-read-concurrency must be a positive, nonzero integer")
}
return nil
}
@ -685,7 +697,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
}
wg.Go(func() error { return sc.Scan(cancelCtx, targets) })
arch := archiver.New(repo, targetFS, archiver.Options{})
arch := archiver.New(repo, targetFS, archiver.Options{FileReadConcurrency: backupOptions.FileReadConcurrency})
arch.SelectByName = selectByNameFilter
arch.Select = selectFilter
arch.WithAtime = opts.WithAtime

View file

@ -51,6 +51,16 @@ only applied for the single run of restic. The option can also be set via the en
variable ``RESTIC_COMPRESSION``.
File Read Concurrency
=====================
In some instances, such as backing up traditional spinning disks, reducing the file read
concurrency is desired. This will help reduce the amount of time spent seeking around
the disk and can increase the overall performance of the backup operation. You can specify
the concurrency of file reads with the ``RESTIC_FILE_READ_CONCURRENCY`` environment variable
or the ``--file-read-concurrency`` flag for the ``backup`` subcommand.
Pack Size
=========

View file

@ -103,6 +103,7 @@ command:
--files-from file read the files to backup from file (can be combined with file args; can be specified multiple times)
--files-from-raw file read the files to backup from file (can be combined with file args; can be specified multiple times)
--files-from-verbatim file read the files to backup from file (can be combined with file args; can be specified multiple times)
--file-read-concurrency uint set concurrency on file reads. (default: $RESTIC_FILE_READ_CONCURRENCY or 2)
-f, --force force re-reading the target files/directories (overrides the "parent" flag)
-h, --help help for backup
-H, --host hostname set the hostname for the snapshot manually. To prevent an expensive rescan use the "parent" flag
@ -442,3 +443,4 @@ time it is used, so by looking at the timestamps of the sub directories of the
cache directory it can decide which sub directories are old and probably not
needed any more. You can either remove these directories manually, or run a
restic command with the ``--cleanup-cache`` flag.