Merge pull request #4410 from Enrico204/restic-stdin-command

add --stdin-from-command flag to backup command
This commit is contained in:
Michael Eischer 2023-10-27 22:09:06 +00:00 committed by GitHub
commit 42ab3ea2b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 309 additions and 28 deletions

View file

@ -0,0 +1,14 @@
Enhancement: Support reading backup from a program's standard output
When reading data from stdin, the `backup` command could not verify whether the
corresponding command completed successfully.
The `backup` command now supports starting an arbitrary command and sourcing
the backup content from its standard output. This enables restic to verify that
the command completes with exit code zero. A non-zero exit code causes the
backup to fail.
Example: `restic backup --stdin-from-command mysqldump [...]`
https://github.com/restic/restic/issues/4251
https://github.com/restic/restic/pull/4410

View file

@ -97,6 +97,7 @@ type BackupOptions struct {
ExcludeLargerThan string
Stdin bool
StdinFilename string
StdinCommand bool
Tags restic.TagLists
Host string
FilesFrom []string
@ -134,6 +135,7 @@ func init() {
f.StringVar(&backupOptions.ExcludeLargerThan, "exclude-larger-than", "", "max `size` of the files to be backed up (allowed suffixes: k/K, m/M, g/G, t/T)")
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
f.BoolVar(&backupOptions.StdinCommand, "stdin-from-command", false, "execute command and store its stdout")
f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
f.UintVar(&backupOptions.ReadConcurrency, "read-concurrency", 0, "read `n` files concurrently (default: $RESTIC_READ_CONCURRENCY or 2)")
f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
@ -287,7 +289,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
}
}
if opts.Stdin {
if opts.Stdin || opts.StdinCommand {
if len(opts.FilesFrom) > 0 {
return errors.Fatal("--stdin and --files-from cannot be used together")
}
@ -298,7 +300,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
return errors.Fatal("--stdin and --files-from-raw cannot be used together")
}
if len(args) > 0 {
if len(args) > 0 && !opts.StdinCommand {
return errors.Fatal("--stdin was specified and files/dirs were listed as arguments")
}
}
@ -366,7 +368,7 @@ func collectRejectFuncs(opts BackupOptions, targets []string) (fs []RejectFunc,
// collectTargets returns a list of target files/dirs from several sources.
func collectTargets(opts BackupOptions, args []string) (targets []string, err error) {
if opts.Stdin {
if opts.Stdin || opts.StdinCommand {
return nil, nil
}
@ -592,16 +594,24 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
defer localVss.DeleteSnapshots()
targetFS = localVss
}
if opts.Stdin {
if opts.Stdin || opts.StdinCommand {
if !gopts.JSON {
progressPrinter.V("read data from stdin")
}
filename := path.Join("/", opts.StdinFilename)
var source io.ReadCloser = os.Stdin
if opts.StdinCommand {
source, err = fs.NewCommandReader(ctx, args, globalOptions.stderr)
if err != nil {
return err
}
}
targetFS = &fs.Reader{
ModTime: timeStamp,
Name: filename,
Mode: 0644,
ReadCloser: os.Stdin,
ReadCloser: source,
}
targets = []string{filename}
}
@ -630,7 +640,13 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
success := true
arch.Error = func(item string, err error) error {
success = false
return progressReporter.Error(item, err)
reterr := progressReporter.Error(item, err)
// If we receive a fatal error during the execution of the snapshot,
// we abort the snapshot.
if reterr == nil && errors.IsFatal(err) {
reterr = err
}
return reterr
}
arch.CompleteItem = progressReporter.CompleteItem
arch.StartFile = progressReporter.StartFile

View file

@ -568,3 +568,72 @@ func linkEqual(source, dest []string) bool {
return true
}
func TestStdinFromCommand(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}
testRunBackup(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('something'); sys.exit(0)"}, opts, env.gopts)
testListSnapshots(t, env.gopts, 1)
testRunCheck(t, env.gopts)
}
func TestStdinFromCommandNoOutput(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}
err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(0)"}, opts, env.gopts)
rtest.Assert(t, err != nil && err.Error() == "at least one source file could not be read", "No data error expected")
testListSnapshots(t, env.gopts, 1)
testRunCheck(t, env.gopts)
}
func TestStdinFromCommandFailExitCode(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}
err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('test'); sys.exit(1)"}, opts, env.gopts)
rtest.Assert(t, err != nil, "Expected error while backing up")
testListSnapshots(t, env.gopts, 0)
testRunCheck(t, env.gopts)
}
func TestStdinFromCommandFailNoOutputAndExitCode(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
opts := BackupOptions{
StdinCommand: true,
StdinFilename: "stdin",
}
err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(1)"}, opts, env.gopts)
rtest.Assert(t, err != nil, "Expected error while backing up")
testListSnapshots(t, env.gopts, 0)
testRunCheck(t, env.gopts)
}

View file

@ -489,35 +489,71 @@ particular note are::
- file ownership and ACLs on Windows
- the "hidden" flag on Windows
Reading data from a command
***************************
Sometimes, it can be useful to directly save the output of a program, for example,
``mysqldump`` so that the SQL can later be restored. Restic supports this mode
of operation; just supply the option ``--stdin-from-command`` when using the
``backup`` action, and write the command in place of the files/directories:
.. code-block:: console
$ restic -r /srv/restic-repo backup --stdin-from-command mysqldump [...]
This command creates a new snapshot based on the standard output of ``mysqldump``.
By default, the command's standard output is saved in a file named ``stdin``.
A different name can be specified with ``--stdin-filename``:
.. code-block:: console
$ restic -r /srv/restic-repo backup --stdin-filename production.sql --stdin-from-command mysqldump [...]
Restic uses the command exit code to determine whether the command succeeded. A
non-zero exit code from the command causes restic to cancel the backup. This causes
restic to fail with exit code 1. No snapshot will be created in this case.
Reading data from stdin
***********************
Sometimes it can be nice to directly save the output of a program, e.g.
``mysqldump`` so that the SQL can later be restored. Restic supports
this mode of operation, just supply the option ``--stdin`` to the
``backup`` command like this:
.. warning::
Restic cannot detect if data read from stdin is complete or not. As explained
below, this can cause incomplete backup unless additional checks (outside of
restic) are configured. If possible, use ``--stdin-from-command`` instead.
Alternatively, restic supports reading arbitrary data directly from the standard
input. Use the option ``--stdin`` of the ``backup`` command as follows:
.. code-block:: console
$ set -o pipefail
# Will not notice failures, see the warning below
$ gzip bigfile.dat | restic -r /srv/restic-repo backup --stdin
This creates a new snapshot of the content of ``bigfile.dat``.
As for ``--stdin-from-command``, the default file name is ``stdin``; a
different name can be specified with ``--stdin-filename``.
**Important**: while it is possible to pipe a command output to restic using
``--stdin``, doing so is discouraged as it will mask errors from the
command, leading to corrupted backups. For example, in the following code
block, if ``mysqldump`` fails to connect to the MySQL database, the restic
backup will nevertheless succeed in creating an _empty_ backup:
.. code-block:: console
# Will not notice failures, read the warning above
$ mysqldump [...] | restic -r /srv/restic-repo backup --stdin
This creates a new snapshot of the output of ``mysqldump``. You can then
use e.g. the fuse mounting option (see below) to mount the repository
and read the file.
By default, the file name ``stdin`` is used, a different name can be
specified with ``--stdin-filename``, e.g. like this:
.. code-block:: console
$ mysqldump [...] | restic -r /srv/restic-repo backup --stdin --stdin-filename production.sql
The option ``pipefail`` is highly recommended so that a non-zero exit code from
one of the programs in the pipe (e.g. ``mysqldump`` here) makes the whole chain
return a non-zero exit code. Refer to the `Use the Unofficial Bash Strict Mode
<http://redsymbol.net/articles/unofficial-bash-strict-mode/>`__ for more
details on this.
A simple solution is to use ``--stdin-from-command`` (see above). If you
still need to use the ``--stdin`` flag, you must use the shell option ``set -o pipefail``
(so that a non-zero exit code from one of the programs in the pipe makes the
whole chain return a non-zero exit code) and you must check the exit code of
the pipe and act accordingly (e.g., remove the last backup). Refer to the
`Use the Unofficial Bash Strict Mode <http://redsymbol.net/articles/unofficial-bash-strict-mode/>`__
for more details on this.
Tags for backup

View file

@ -2,6 +2,7 @@ package archiver
import (
"context"
"fmt"
"io"
"os"
"sync"
@ -146,7 +147,7 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat
panic("completed twice")
}
isCompleted = true
fnr.err = err
fnr.err = fmt.Errorf("failed to save %v: %w", target, err)
fnr.node = nil
fnr.stats = ItemStats{}
finish(fnr)

View file

@ -0,0 +1,97 @@
package fs
import (
"bufio"
"context"
"fmt"
"io"
"os/exec"
"github.com/restic/restic/internal/errors"
)
// CommandReader wrap a command such that its standard output can be read using
// a io.ReadCloser. Close() waits for the command to terminate, reporting
// any error back to the caller.
type CommandReader struct {
cmd *exec.Cmd
stdout io.ReadCloser
// cmd.Wait() must only be called once. Prevent duplicate executions in
// Read() and Close().
waitHandled bool
// alreadyClosedReadErr is the error that we should return if we try to
// read the pipe again after closing. This works around a Read() call that
// is issued after a previous Read() with `io.EOF` (but some bytes were
// read in the past).
alreadyClosedReadErr error
}
func NewCommandReader(ctx context.Context, args []string, logOutput io.Writer) (*CommandReader, error) {
// Prepare command and stdout
command := exec.CommandContext(ctx, args[0], args[1:]...)
stdout, err := command.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("failed to setup stdout pipe: %w", err)
}
// Use a Go routine to handle the stderr to avoid deadlocks
stderr, err := command.StderrPipe()
if err != nil {
return nil, fmt.Errorf("failed to setup stderr pipe: %w", err)
}
go func() {
sc := bufio.NewScanner(stderr)
for sc.Scan() {
_, _ = fmt.Fprintf(logOutput, "subprocess %v: %v\n", command.Args[0], sc.Text())
}
}()
if err := command.Start(); err != nil {
return nil, fmt.Errorf("failed to start command: %w", err)
}
return &CommandReader{
cmd: command,
stdout: stdout,
}, nil
}
// Read populate the array with data from the process stdout.
func (fp *CommandReader) Read(p []byte) (int, error) {
if fp.alreadyClosedReadErr != nil {
return 0, fp.alreadyClosedReadErr
}
b, err := fp.stdout.Read(p)
// If the error is io.EOF, the program terminated. We need to check the
// exit code here because, if the program terminated with no output, the
// error in `Close()` is ignored.
if errors.Is(err, io.EOF) {
fp.waitHandled = true
// check if the command terminated successfully, If not return the error.
if errw := fp.wait(); errw != nil {
err = errw
}
}
fp.alreadyClosedReadErr = err
return b, err
}
func (fp *CommandReader) wait() error {
err := fp.cmd.Wait()
if err != nil {
// Use a fatal error to abort the snapshot.
return errors.Fatal(fmt.Errorf("command failed: %w", err).Error())
}
return nil
}
func (fp *CommandReader) Close() error {
if fp.waitHandled {
return nil
}
return fp.wait()
}

View file

@ -0,0 +1,48 @@
package fs_test
import (
"bytes"
"context"
"io"
"strings"
"testing"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/test"
)
func TestCommandReaderSuccess(t *testing.T) {
reader, err := fs.NewCommandReader(context.TODO(), []string{"true"}, io.Discard)
test.OK(t, err)
_, err = io.Copy(io.Discard, reader)
test.OK(t, err)
test.OK(t, reader.Close())
}
func TestCommandReaderFail(t *testing.T) {
reader, err := fs.NewCommandReader(context.TODO(), []string{"false"}, io.Discard)
test.OK(t, err)
_, err = io.Copy(io.Discard, reader)
test.Assert(t, err != nil, "missing error")
}
func TestCommandReaderInvalid(t *testing.T) {
_, err := fs.NewCommandReader(context.TODO(), []string{"w54fy098hj7fy5twijouytfrj098y645wr"}, io.Discard)
test.Assert(t, err != nil, "missing error")
}
func TestCommandReaderOutput(t *testing.T) {
reader, err := fs.NewCommandReader(context.TODO(), []string{"echo", "hello world"}, io.Discard)
test.OK(t, err)
var buf bytes.Buffer
_, err = io.Copy(&buf, reader)
test.OK(t, err)
test.OK(t, reader.Close())
test.Equals(t, "hello world", strings.TrimSpace(buf.String()))
}