Merge pull request #2928 from MichaelEischer/init-copy-chunker

init: Add `--copy-chunker-parameters` option
This commit is contained in:
Alexander Neumann 2020-09-19 17:32:13 +02:00 committed by GitHub
commit 6ff0082c02
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 172 additions and 44 deletions

View file

@ -9,5 +9,12 @@ and destination repository. Also, the transferred files are not re-chunked,
which may break deduplication between files already stored in the which may break deduplication between files already stored in the
destination repo and files copied there using this command. destination repo and files copied there using this command.
To fully support deduplication between repositories when the copy command is
used, the init command now supports the `--copy-chunker-params` option,
which initializes the new repository with identical parameters for splitting
files into chunks as an already existing repository. This allows copied
snapshots to be equally deduplicated in both repositories.
https://github.com/restic/restic/issues/323 https://github.com/restic/restic/issues/323
https://github.com/restic/restic/pull/2606 https://github.com/restic/restic/pull/2606
https://github.com/restic/restic/pull/2928

View file

@ -3,10 +3,8 @@ package main
import ( import (
"context" "context"
"fmt" "fmt"
"os"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/spf13/cobra" "github.com/spf13/cobra"
@ -20,7 +18,8 @@ The "copy" command copies one or more snapshots from one repository to another
repository. Note that this will have to read (download) and write (upload) the repository. Note that this will have to read (download) and write (upload) the
entire snapshot(s) due to the different encryption keys on the source and entire snapshot(s) due to the different encryption keys on the source and
destination, and that transferred files are not re-chunked, which may break destination, and that transferred files are not re-chunked, which may break
their deduplication. their deduplication. This can be mitigated by the "--copy-chunker-params"
option when initializing a new destination repository using the "init" command.
`, `,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runCopy(copyOptions, globalOptions, args) return runCopy(copyOptions, globalOptions, args)
@ -29,14 +28,10 @@ their deduplication.
// CopyOptions bundles all options for the copy command. // CopyOptions bundles all options for the copy command.
type CopyOptions struct { type CopyOptions struct {
Repo string secondaryRepoOptions
password string Hosts []string
PasswordFile string Tags restic.TagLists
PasswordCommand string Paths []string
KeyHint string
Hosts []string
Tags restic.TagLists
Paths []string
} }
var copyOptions CopyOptions var copyOptions CopyOptions
@ -45,35 +40,14 @@ func init() {
cmdRoot.AddCommand(cmdCopy) cmdRoot.AddCommand(cmdCopy)
f := cmdCopy.Flags() f := cmdCopy.Flags()
f.StringVarP(&copyOptions.Repo, "repo2", "", os.Getenv("RESTIC_REPOSITORY2"), "destination repository to copy snapshots to (default: $RESTIC_REPOSITORY2)") initSecondaryRepoOptions(f, &copyOptions.secondaryRepoOptions, "destination", "to copy snapshots to")
f.StringVarP(&copyOptions.PasswordFile, "password-file2", "", os.Getenv("RESTIC_PASSWORD_FILE2"), "`file` to read the destination repository password from (default: $RESTIC_PASSWORD_FILE2)")
f.StringVarP(&copyOptions.KeyHint, "key-hint2", "", os.Getenv("RESTIC_KEY_HINT2"), "key ID of key to try decrypting the destination repository first (default: $RESTIC_KEY_HINT2)")
f.StringVarP(&copyOptions.PasswordCommand, "password-command2", "", os.Getenv("RESTIC_PASSWORD_COMMAND2"), "shell `command` to obtain the destination repository password from (default: $RESTIC_PASSWORD_COMMAND2)")
f.StringArrayVarP(&copyOptions.Hosts, "host", "H", nil, "only consider snapshots for this `host`, when no snapshot ID is given (can be specified multiple times)") f.StringArrayVarP(&copyOptions.Hosts, "host", "H", nil, "only consider snapshots for this `host`, when no snapshot ID is given (can be specified multiple times)")
f.Var(&copyOptions.Tags, "tag", "only consider snapshots which include this `taglist`, when no snapshot ID is given") f.Var(&copyOptions.Tags, "tag", "only consider snapshots which include this `taglist`, when no snapshot ID is given")
f.StringArrayVar(&copyOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path`, when no snapshot ID is given") f.StringArrayVar(&copyOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path`, when no snapshot ID is given")
} }
func runCopy(opts CopyOptions, gopts GlobalOptions, args []string) error { func runCopy(opts CopyOptions, gopts GlobalOptions, args []string) error {
if opts.Repo == "" { dstGopts, err := fillSecondaryGlobalOpts(opts.secondaryRepoOptions, gopts, "destination")
return errors.Fatal("Please specify a destination repository location (--repo2)")
}
var err error
dstGopts := gopts
dstGopts.Repo = opts.Repo
dstGopts.PasswordFile = opts.PasswordFile
dstGopts.PasswordCommand = opts.PasswordCommand
dstGopts.KeyHint = opts.KeyHint
if opts.password != "" {
dstGopts.password = opts.password
} else {
dstGopts.password, err = resolvePassword(dstGopts, "RESTIC_PASSWORD2")
if err != nil {
return err
}
}
dstGopts.password, err = ReadPassword(dstGopts, "enter password for destination repository: ")
if err != nil { if err != nil {
return err return err
} }

View file

@ -1,6 +1,7 @@
package main package main
import ( import (
"github.com/restic/chunker"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
@ -20,19 +21,36 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
`, `,
DisableAutoGenTag: true, DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runInit(globalOptions, args) return runInit(initOptions, globalOptions, args)
}, },
} }
func init() { // InitOptions bundles all options for the init command.
cmdRoot.AddCommand(cmdInit) type InitOptions struct {
secondaryRepoOptions
CopyChunkerParameters bool
} }
func runInit(gopts GlobalOptions, args []string) error { var initOptions InitOptions
func init() {
cmdRoot.AddCommand(cmdInit)
f := cmdInit.Flags()
initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from")
f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)")
}
func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
if gopts.Repo == "" { if gopts.Repo == "" {
return errors.Fatal("Please specify repository location (-r)") return errors.Fatal("Please specify repository location (-r)")
} }
chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts)
if err != nil {
return err
}
be, err := create(gopts.Repo, gopts.extended) be, err := create(gopts.Repo, gopts.extended)
if err != nil { if err != nil {
return errors.Fatalf("create repository at %s failed: %v\n", gopts.Repo, err) return errors.Fatalf("create repository at %s failed: %v\n", gopts.Repo, err)
@ -47,7 +65,7 @@ func runInit(gopts GlobalOptions, args []string) error {
s := repository.New(be) s := repository.New(be)
err = s.Init(gopts.ctx, gopts.password) err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial)
if err != nil { if err != nil {
return errors.Fatalf("create key in repository at %s failed: %v\n", gopts.Repo, err) return errors.Fatalf("create key in repository at %s failed: %v\n", gopts.Repo, err)
} }
@ -60,3 +78,25 @@ func runInit(gopts GlobalOptions, args []string) error {
return nil return nil
} }
func maybeReadChunkerPolynomial(opts InitOptions, gopts GlobalOptions) (*chunker.Pol, error) {
if opts.CopyChunkerParameters {
otherGopts, err := fillSecondaryGlobalOpts(opts.secondaryRepoOptions, gopts, "secondary")
if err != nil {
return nil, err
}
otherRepo, err := OpenRepository(otherGopts)
if err != nil {
return nil, err
}
pol := otherRepo.Config().ChunkerPolynomial
return &pol, nil
}
if opts.Repo != "" {
return nil, errors.Fatal("Secondary repository must only be specified when copying the chunker parameters")
}
return nil, nil
}

View file

@ -51,7 +51,7 @@ func testRunInit(t testing.TB, opts GlobalOptions) {
restic.TestDisableCheckPolynomial(t) restic.TestDisableCheckPolynomial(t)
restic.TestSetLockTimeout(t, 0) restic.TestSetLockTimeout(t, 0)
rtest.OK(t, runInit(opts, nil)) rtest.OK(t, runInit(InitOptions{}, opts, nil))
t.Logf("repository initialized at %v", opts.Repo) t.Logf("repository initialized at %v", opts.Repo)
} }
@ -616,8 +616,10 @@ func TestBackupTags(t *testing.T) {
func testRunCopy(t testing.TB, srcGopts GlobalOptions, dstGopts GlobalOptions) { func testRunCopy(t testing.TB, srcGopts GlobalOptions, dstGopts GlobalOptions) {
copyOpts := CopyOptions{ copyOpts := CopyOptions{
Repo: dstGopts.Repo, secondaryRepoOptions: secondaryRepoOptions{
password: dstGopts.password, Repo: dstGopts.Repo,
password: dstGopts.password,
},
} }
rtest.OK(t, runCopy(copyOpts, srcGopts, nil)) rtest.OK(t, runCopy(copyOpts, srcGopts, nil))
@ -729,6 +731,36 @@ func TestCopyIncremental(t *testing.T) {
len(copiedSnapshotIDs), len(snapshotIDs)) len(copiedSnapshotIDs), len(snapshotIDs))
} }
func TestInitCopyChunkerParams(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
env2, cleanup2 := withTestEnvironment(t)
defer cleanup2()
testRunInit(t, env2.gopts)
initOpts := InitOptions{
secondaryRepoOptions: secondaryRepoOptions{
Repo: env2.gopts.Repo,
password: env2.gopts.password,
},
}
rtest.Assert(t, runInit(initOpts, env.gopts, nil) != nil, "expected invalid init options to fail")
initOpts.CopyChunkerParameters = true
rtest.OK(t, runInit(initOpts, env.gopts, nil))
repo, err := OpenRepository(env.gopts)
rtest.OK(t, err)
otherRepo, err := OpenRepository(env2.gopts)
rtest.OK(t, err)
rtest.Assert(t, repo.Config().ChunkerPolynomial == otherRepo.Config().ChunkerPolynomial,
"expected equal chunker polynomials, got %v expected %v", repo.Config().ChunkerPolynomial,
otherRepo.Config().ChunkerPolynomial)
}
func testRunTag(t testing.TB, opts TagOptions, gopts GlobalOptions) { func testRunTag(t testing.TB, opts TagOptions, gopts GlobalOptions) {
rtest.OK(t, runTag(opts, gopts, []string{})) rtest.OK(t, runTag(opts, gopts, []string{}))
} }

View file

@ -0,0 +1,48 @@
package main
import (
"os"
"github.com/restic/restic/internal/errors"
"github.com/spf13/pflag"
)
type secondaryRepoOptions struct {
Repo string
password string
PasswordFile string
PasswordCommand string
KeyHint string
}
func initSecondaryRepoOptions(f *pflag.FlagSet, opts *secondaryRepoOptions, repoPrefix string, repoUsage string) {
f.StringVarP(&opts.Repo, "repo2", "", os.Getenv("RESTIC_REPOSITORY2"), repoPrefix+" repository "+repoUsage+" (default: $RESTIC_REPOSITORY2)")
f.StringVarP(&opts.PasswordFile, "password-file2", "", os.Getenv("RESTIC_PASSWORD_FILE2"), "`file` to read the "+repoPrefix+" repository password from (default: $RESTIC_PASSWORD_FILE2)")
f.StringVarP(&opts.KeyHint, "key-hint2", "", os.Getenv("RESTIC_KEY_HINT2"), "key ID of key to try decrypting the "+repoPrefix+" repository first (default: $RESTIC_KEY_HINT2)")
f.StringVarP(&opts.PasswordCommand, "password-command2", "", os.Getenv("RESTIC_PASSWORD_COMMAND2"), "shell `command` to obtain the "+repoPrefix+" repository password from (default: $RESTIC_PASSWORD_COMMAND2)")
}
func fillSecondaryGlobalOpts(opts secondaryRepoOptions, gopts GlobalOptions, repoPrefix string) (GlobalOptions, error) {
if opts.Repo == "" {
return GlobalOptions{}, errors.Fatal("Please specify a " + repoPrefix + " repository location (--repo2)")
}
var err error
dstGopts := gopts
dstGopts.Repo = opts.Repo
dstGopts.PasswordFile = opts.PasswordFile
dstGopts.PasswordCommand = opts.PasswordCommand
dstGopts.KeyHint = opts.KeyHint
if opts.password != "" {
dstGopts.password = opts.password
} else {
dstGopts.password, err = resolvePassword(dstGopts, "RESTIC_PASSWORD2")
if err != nil {
return GlobalOptions{}, err
}
}
dstGopts.password, err = ReadPassword(dstGopts, "enter password for "+repoPrefix+" repository: ")
if err != nil {
return GlobalOptions{}, err
}
return dstGopts, nil
}

View file

@ -110,7 +110,8 @@ be skipped by later copy runs.
entire snapshot(s) due to the different encryption keys used in the source and entire snapshot(s) due to the different encryption keys used in the source and
destination repository. Also, the transferred files are not re-chunked, which destination repository. Also, the transferred files are not re-chunked, which
may break deduplication between files already stored in the destination repo may break deduplication between files already stored in the destination repo
and files copied there using this command. and files copied there using this command. See the next section for how to avoid
this problem.
For the destination repository ``--repo2`` the password can be read from For the destination repository ``--repo2`` the password can be read from
a file ``--password-file2`` or from a command ``--password-command2``. a file ``--password-file2`` or from a command ``--password-command2``.
@ -142,6 +143,28 @@ which case only these instead of all snapshots will be copied:
$ restic -r /srv/restic-repo copy --repo2 /srv/restic-repo-copy 410b18a2 4e5d5487 latest $ restic -r /srv/restic-repo copy --repo2 /srv/restic-repo-copy 410b18a2 4e5d5487 latest
Ensuring deduplication for copied snapshots
-------------------------------------------
Even though the copy command can transfer snapshots between arbitrary repositories,
deduplication between snapshots from the source and destination repository may not work.
To ensure proper deduplication, both repositories have to use the same parameters for
splitting large files into smaller chunks, which requires additional setup steps. With
the same parameters restic will for both repositories split identical files into
identical chunks and therefore deduplication also works for snapshots copied between
these repositories.
The chunker parameters are generated once when creating a new (destination) repository.
That is for a copy destination repository we have to instruct restic to initialize it
using the same chunker parameters as the source repository:
.. code-block:: console
$ restic -r /srv/restic-repo-copy init --repo2 /srv/restic-repo --copy-chunker-params
Note that it is not possible to change the chunker parameters of an existing repository.
Checking integrity and consistency Checking integrity and consistency
================================== ==================================

View file

@ -8,6 +8,7 @@ import (
"io" "io"
"os" "os"
"github.com/restic/chunker"
"github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/cache"
"github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
@ -614,7 +615,7 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int
// Init creates a new master key with the supplied password, initializes and // Init creates a new master key with the supplied password, initializes and
// saves the repository config. // saves the repository config.
func (r *Repository) Init(ctx context.Context, password string) error { func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error {
has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile}) has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile})
if err != nil { if err != nil {
return err return err
@ -627,6 +628,9 @@ func (r *Repository) Init(ctx context.Context, password string) error {
if err != nil { if err != nil {
return err return err
} }
if chunkerPolynomial != nil {
cfg.ChunkerPolynomial = *chunkerPolynomial
}
return r.init(ctx, password, cfg) return r.init(ctx, password, cfg)
} }