init: Add --copy-chunker-params option

This allows creating multiple repositories with identical chunker
parameters which is required for working deduplication when copying
snapshots between different repositories.
This commit is contained in:
Michael Eischer 2020-09-19 12:41:52 +02:00
parent 655430550b
commit f003410402
6 changed files with 114 additions and 9 deletions

View file

@ -9,5 +9,12 @@ and destination repository. Also, the transferred files are not re-chunked,
which may break deduplication between files already stored in the which may break deduplication between files already stored in the
destination repo and files copied there using this command. destination repo and files copied there using this command.
To fully support deduplication between repositories when the copy command is
used, the init command now supports the `--copy-chunker-params` option,
which initializes the new repository with identical parameters for splitting
files into chunks as an already existing repository. This allows copied
snapshots to be equally deduplicated in both repositories.
https://github.com/restic/restic/issues/323 https://github.com/restic/restic/issues/323
https://github.com/restic/restic/pull/2606 https://github.com/restic/restic/pull/2606
https://github.com/restic/restic/pull/2928

View file

@ -18,7 +18,8 @@ The "copy" command copies one or more snapshots from one repository to another
repository. Note that this will have to read (download) and write (upload) the repository. Note that this will have to read (download) and write (upload) the
entire snapshot(s) due to the different encryption keys on the source and entire snapshot(s) due to the different encryption keys on the source and
destination, and that transferred files are not re-chunked, which may break destination, and that transferred files are not re-chunked, which may break
their deduplication. their deduplication. This can be mitigated by the "--copy-chunker-params"
option when initializing a new destination repository using the "init" command.
`, `,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runCopy(copyOptions, globalOptions, args) return runCopy(copyOptions, globalOptions, args)

View file

@ -1,6 +1,7 @@
package main package main
import ( import (
"github.com/restic/chunker"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
@ -20,19 +21,36 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
`, `,
DisableAutoGenTag: true, DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runInit(globalOptions, args) return runInit(initOptions, globalOptions, args)
}, },
} }
func init() { // InitOptions bundles all options for the init command.
cmdRoot.AddCommand(cmdInit) type InitOptions struct {
secondaryRepoOptions
CopyChunkerParameters bool
} }
func runInit(gopts GlobalOptions, args []string) error { var initOptions InitOptions
func init() {
cmdRoot.AddCommand(cmdInit)
f := cmdInit.Flags()
initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from")
f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)")
}
func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
if gopts.Repo == "" { if gopts.Repo == "" {
return errors.Fatal("Please specify repository location (-r)") return errors.Fatal("Please specify repository location (-r)")
} }
chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts)
if err != nil {
return err
}
be, err := create(gopts.Repo, gopts.extended) be, err := create(gopts.Repo, gopts.extended)
if err != nil { if err != nil {
return errors.Fatalf("create repository at %s failed: %v\n", gopts.Repo, err) return errors.Fatalf("create repository at %s failed: %v\n", gopts.Repo, err)
@ -47,7 +65,7 @@ func runInit(gopts GlobalOptions, args []string) error {
s := repository.New(be) s := repository.New(be)
err = s.Init(gopts.ctx, gopts.password) err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial)
if err != nil { if err != nil {
return errors.Fatalf("create key in repository at %s failed: %v\n", gopts.Repo, err) return errors.Fatalf("create key in repository at %s failed: %v\n", gopts.Repo, err)
} }
@ -60,3 +78,25 @@ func runInit(gopts GlobalOptions, args []string) error {
return nil return nil
} }
func maybeReadChunkerPolynomial(opts InitOptions, gopts GlobalOptions) (*chunker.Pol, error) {
if opts.CopyChunkerParameters {
otherGopts, err := fillSecondaryGlobalOpts(opts.secondaryRepoOptions, gopts, "secondary")
if err != nil {
return nil, err
}
otherRepo, err := OpenRepository(otherGopts)
if err != nil {
return nil, err
}
pol := otherRepo.Config().ChunkerPolynomial
return &pol, nil
}
if opts.Repo != "" {
return nil, errors.Fatal("Secondary repository must only be specified when copying the chunker parameters")
}
return nil, nil
}

View file

@ -51,7 +51,7 @@ func testRunInit(t testing.TB, opts GlobalOptions) {
restic.TestDisableCheckPolynomial(t) restic.TestDisableCheckPolynomial(t)
restic.TestSetLockTimeout(t, 0) restic.TestSetLockTimeout(t, 0)
rtest.OK(t, runInit(opts, nil)) rtest.OK(t, runInit(InitOptions{}, opts, nil))
t.Logf("repository initialized at %v", opts.Repo) t.Logf("repository initialized at %v", opts.Repo)
} }
@ -731,6 +731,36 @@ func TestCopyIncremental(t *testing.T) {
len(copiedSnapshotIDs), len(snapshotIDs)) len(copiedSnapshotIDs), len(snapshotIDs))
} }
func TestInitCopyChunkerParams(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
env2, cleanup2 := withTestEnvironment(t)
defer cleanup2()
testRunInit(t, env2.gopts)
initOpts := InitOptions{
secondaryRepoOptions: secondaryRepoOptions{
Repo: env2.gopts.Repo,
password: env2.gopts.password,
},
}
rtest.Assert(t, runInit(initOpts, env.gopts, nil) != nil, "expected invalid init options to fail")
initOpts.CopyChunkerParameters = true
rtest.OK(t, runInit(initOpts, env.gopts, nil))
repo, err := OpenRepository(env.gopts)
rtest.OK(t, err)
otherRepo, err := OpenRepository(env2.gopts)
rtest.OK(t, err)
rtest.Assert(t, repo.Config().ChunkerPolynomial == otherRepo.Config().ChunkerPolynomial,
"expected equal chunker polynomials, got %v expected %v", repo.Config().ChunkerPolynomial,
otherRepo.Config().ChunkerPolynomial)
}
func testRunTag(t testing.TB, opts TagOptions, gopts GlobalOptions) { func testRunTag(t testing.TB, opts TagOptions, gopts GlobalOptions) {
rtest.OK(t, runTag(opts, gopts, []string{})) rtest.OK(t, runTag(opts, gopts, []string{}))
} }

View file

@ -110,7 +110,8 @@ be skipped by later copy runs.
entire snapshot(s) due to the different encryption keys used in the source and entire snapshot(s) due to the different encryption keys used in the source and
destination repository. Also, the transferred files are not re-chunked, which destination repository. Also, the transferred files are not re-chunked, which
may break deduplication between files already stored in the destination repo may break deduplication between files already stored in the destination repo
and files copied there using this command. and files copied there using this command. See the next section for how to avoid
this problem.
For the destination repository ``--repo2`` the password can be read from For the destination repository ``--repo2`` the password can be read from
a file ``--password-file2`` or from a command ``--password-command2``. a file ``--password-file2`` or from a command ``--password-command2``.
@ -142,6 +143,28 @@ which case only these instead of all snapshots will be copied:
$ restic -r /srv/restic-repo copy --repo2 /srv/restic-repo-copy 410b18a2 4e5d5487 latest $ restic -r /srv/restic-repo copy --repo2 /srv/restic-repo-copy 410b18a2 4e5d5487 latest
Ensuring deduplication for copied snapshots
-------------------------------------------
Even though the copy command can transfer snapshots between arbitrary repositories,
deduplication between snapshots from the source and destination repository may not work.
To ensure proper deduplication, both repositories have to use the same parameters for
splitting large files into smaller chunks, which requires additional setup steps. With
the same parameters restic will for both repositories split identical files into
identical chunks and therefore deduplication also works for snapshots copied between
these repositories.
The chunker parameters are generated once when creating a new (destination) repository.
That is for a copy destination repository we have to instruct restic to initialize it
using the same chunker parameters as the source repository:
.. code-block:: console
$ restic -r /srv/restic-repo-copy init --repo2 /srv/restic-repo --copy-chunker-params
Note that it is not possible to change the chunker parameters of an existing repository.
Checking integrity and consistency Checking integrity and consistency
================================== ==================================

View file

@ -8,6 +8,7 @@ import (
"io" "io"
"os" "os"
"github.com/restic/chunker"
"github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/cache"
"github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
@ -614,7 +615,7 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int
// Init creates a new master key with the supplied password, initializes and // Init creates a new master key with the supplied password, initializes and
// saves the repository config. // saves the repository config.
func (r *Repository) Init(ctx context.Context, password string) error { func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error {
has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile}) has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile})
if err != nil { if err != nil {
return err return err
@ -627,6 +628,9 @@ func (r *Repository) Init(ctx context.Context, password string) error {
if err != nil { if err != nil {
return err return err
} }
if chunkerPolynomial != nil {
cfg.ChunkerPolynomial = *chunkerPolynomial
}
return r.init(ctx, password, cfg) return r.init(ctx, password, cfg)
} }