Merge pull request #2750 from metalsp0rk/min-packsize

Add `backup --file-read-concurrency` flag
This commit is contained in:
Michael Eischer 2022-10-02 23:11:47 +02:00 committed by GitHub
commit e99ad39b34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 34 additions and 10 deletions

View file

@ -0,0 +1,6 @@
Enhancement: Make backup file read concurrency configurable
The `backup` command now supports a `--read-concurrency` flag to allowing
tuning restic for very fast storage like NVME disks.
https://github.com/restic/restic/pull/2750

View file

@ -11,6 +11,7 @@ import (
"path"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
@ -100,6 +101,7 @@ type BackupOptions struct {
IgnoreCtime bool
UseFsSnapshot bool
DryRun bool
ReadConcurrency uint
}
var backupOptions BackupOptions
@ -124,7 +126,7 @@ func init() {
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
f.UintVar(&backupOptions.ReadConcurrency, "read-concurrency", 0, "read `n` file concurrently. (default: $RESTIC_READ_CONCURRENCY or 2)")
f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
f.StringVar(&backupOptions.Host, "hostname", "", "set the `hostname` for the snapshot manually")
err := f.MarkDeprecated("hostname", "use --host")
@ -132,7 +134,6 @@ func init() {
// MarkDeprecated only returns an error when the flag could not be found
panic(err)
}
f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromVerbatim, "files-from-verbatim", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromRaw, "files-from-raw", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
@ -144,6 +145,10 @@ func init() {
if runtime.GOOS == "windows" {
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
}
// parse read concurrency from env, on error the default value will be used
readConcurrency, _ := strconv.ParseUint(os.Getenv("RESTIC_READ_CONCURRENCY"), 10, 32)
backupOptions.ReadConcurrency = uint(readConcurrency)
}
// filterExisting returns a slice of all existing items, or an error if no
@ -685,7 +690,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
}
wg.Go(func() error { return sc.Scan(cancelCtx, targets) })
arch := archiver.New(repo, targetFS, archiver.Options{})
arch := archiver.New(repo, targetFS, archiver.Options{ReadConcurrency: backupOptions.ReadConcurrency})
arch.SelectByName = selectByNameFilter
arch.Select = selectFilter
arch.WithAtime = opts.WithAtime

View file

@ -555,6 +555,7 @@ environment variables. The following lists these environment variables:
RESTIC_COMPRESSION Compression mode (only available for repository format version 2)
RESTIC_PROGRESS_FPS Frames per second by which the progress bar is updated
RESTIC_PACK_SIZE Target size for pack files
RESTIC_READ_CONCURRENCY Concurrency for file reads
TMPDIR Location for temporary files

View file

@ -51,6 +51,16 @@ only applied for the single run of restic. The option can also be set via the en
variable ``RESTIC_COMPRESSION``.
File Read Concurrency
=====================
When backing up fast storage like NVME disks, it can be beneficial to increase the read
concurrency. This can increase the overall performance of the backup operation by reading
more files in parallel. You can specify the concurrency of file reads with the
``RESTIC_READ_CONCURRENCY`` environment variable or the ``--read-concurrency`` flag for
the ``backup`` command.
Pack Size
=========

View file

@ -112,6 +112,7 @@ command:
--ignore-inode ignore inode number changes when checking for modified files
-x, --one-file-system exclude other file systems, don't cross filesystem boundaries and subvolumes
--parent snapshot use this parent snapshot (default: last snapshot in the repository that has the same target files/directories, and is not newer than the snapshot time)
--read-concurrency n read n file concurrently. (default: $RESTIC_READ_CONCURRENCY or 2)
--stdin read backup from stdin
--stdin-filename filename filename to use when reading from stdin (default "stdin")
--tag tags add tags for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times) (default [])
@ -442,3 +443,4 @@ time it is used, so by looking at the timestamps of the sub directories of the
cache directory it can decide which sub directories are old and probably not
needed any more. You can either remove these directories manually, or run a
restic command with the ``--cleanup-cache`` flag.

View file

@ -95,10 +95,10 @@ const (
// Options is used to configure the archiver.
type Options struct {
// FileReadConcurrency sets how many files are read in concurrently. If
// ReadConcurrency sets how many files are read in concurrently. If
// it's set to zero, at most two files are read in concurrently (which
// turned out to be a good default for most situations).
FileReadConcurrency uint
ReadConcurrency uint
// SaveBlobConcurrency sets how many blobs are hashed and saved
// concurrently. If it's set to zero, the default is the number of CPUs
@ -113,11 +113,11 @@ type Options struct {
// ApplyDefaults returns a copy of o with the default options set for all unset
// fields.
func (o Options) ApplyDefaults() Options {
if o.FileReadConcurrency == 0 {
if o.ReadConcurrency == 0 {
// two is a sweet spot for almost all situations. We've done some
// experiments documented here:
// https://github.com/borgbackup/borg/issues/3500
o.FileReadConcurrency = 2
o.ReadConcurrency = 2
}
if o.SaveBlobConcurrency == 0 {
@ -132,7 +132,7 @@ func (o Options) ApplyDefaults() Options {
// Also allow waiting for FileReadConcurrency files, this is the maximum of FutureFiles
// which currently can be in progress. The main backup loop blocks when trying to queue
// more files to read.
o.SaveTreeConcurrency = uint(runtime.GOMAXPROCS(0)) + o.FileReadConcurrency
o.SaveTreeConcurrency = uint(runtime.GOMAXPROCS(0)) + o.ReadConcurrency
}
return o
@ -782,7 +782,7 @@ func (arch *Archiver) runWorkers(ctx context.Context, wg *errgroup.Group) {
arch.fileSaver = NewFileSaver(ctx, wg,
arch.blobSaver.Save,
arch.Repo.Config().ChunkerPolynomial,
arch.Options.FileReadConcurrency, arch.Options.SaveBlobConcurrency)
arch.Options.ReadConcurrency, arch.Options.SaveBlobConcurrency)
arch.fileSaver.CompleteBlob = arch.CompleteBlob
arch.fileSaver.NodeFromFileInfo = arch.nodeFromFileInfo

View file

@ -2040,7 +2040,7 @@ func TestArchiverAbortEarlyOnError(t *testing.T) {
// at most two files may be queued
arch := New(testRepo, testFS, Options{
FileReadConcurrency: 2,
ReadConcurrency: 2,
})
_, _, err := arch.Snapshot(ctx, []string{"."}, SnapshotOptions{Time: time.Now()})