Merge pull request #3666 from MichaelEischer/compression

Implement compression support
This commit is contained in:
MichaelEischer 2022-04-30 11:49:05 +02:00 committed by GitHub
commit ac9324aeaf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 899 additions and 284 deletions

View file

@ -0,0 +1,21 @@
Enhancement: Add comppression support
We have added compression support to the restic repository format. To create a
repository using the new format run `init --repository-version 2`. Please note
that the repository cannot be read by restic versions prior to 0.14.0.
You can configure if data is compressed with the option `--compression`. It can
be set to `auto` (the default, which will compress very fast), `max` (which
will trade backup speed and CPU usage for better compression), or `off` (which
disables compression). Each setting is only applied for the single run of restic.
The new format version has not received much testing yet. Do not rely on it as
your only backup copy! Please run `check` in regular intervals to detect any
problems.
Upgrading in place is not yet supported. As a workaround, first create a new
repository using `init --repository-version 2 --copy-chunker-params --repo2 path/to/old/repo`.
Then use the `copy` command to copy all snapshots to the new repository.
https://github.com/restic/restic/issues/21
https://github.com/restic/restic/pull/3666

View file

@ -15,6 +15,7 @@ import (
"sort" "sort"
"time" "time"
"github.com/klauspost/compress/zstd"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
@ -309,6 +310,10 @@ func decryptUnsigned(ctx context.Context, k *crypto.Key, buf []byte) []byte {
} }
func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list []restic.Blob) error { func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list []restic.Blob) error {
dec, err := zstd.NewReader(nil)
if err != nil {
panic(err)
}
be := repo.Backend() be := repo.Backend()
h := restic.Handle{ h := restic.Handle{
Name: pack.String(), Name: pack.String(),
@ -333,44 +338,47 @@ func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list
nonce, plaintext := buf[:key.NonceSize()], buf[key.NonceSize():] nonce, plaintext := buf[:key.NonceSize()], buf[key.NonceSize():]
plaintext, err = key.Open(plaintext[:0], nonce, plaintext, nil) plaintext, err = key.Open(plaintext[:0], nonce, plaintext, nil)
outputPrefix := ""
filePrefix := ""
if err != nil { if err != nil {
Warnf("error decrypting blob: %v\n", err) Warnf("error decrypting blob: %v\n", err)
var plain []byte
if tryRepair || repairByte { if tryRepair || repairByte {
plain = tryRepairWithBitflip(ctx, key, buf, repairByte) plaintext = tryRepairWithBitflip(ctx, key, buf, repairByte)
} }
var prefix string if plaintext != nil {
if plain != nil { outputPrefix = "repaired "
id := restic.Hash(plain) filePrefix = "repaired-"
if !id.Equal(blob.ID) {
Printf(" repaired blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plain), id, blob.ID)
prefix = "repaired-wrong-hash-"
} else {
Printf(" successfully repaired blob (length %v), hash is %v, ID matches\n", len(plain), id)
prefix = "repaired-"
}
} else { } else {
plain = decryptUnsigned(ctx, key, buf) plaintext = decryptUnsigned(ctx, key, buf)
prefix = "damaged-" err = storePlainBlob(blob.ID, "damaged-", plaintext)
if err != nil {
return err
}
continue
} }
err = storePlainBlob(blob.ID, prefix, plain) }
if blob.IsCompressed() {
decompressed, err := dec.DecodeAll(plaintext, nil)
if err != nil { if err != nil {
return err Printf(" failed to decompress blob %v\n", blob.ID)
}
if decompressed != nil {
plaintext = decompressed
} }
continue
} }
id := restic.Hash(plaintext) id := restic.Hash(plaintext)
var prefix string var prefix string
if !id.Equal(blob.ID) { if !id.Equal(blob.ID) {
Printf(" successfully decrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plaintext), id, blob.ID) Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", outputPrefix, len(plaintext), id, blob.ID)
prefix = "wrong-hash-" prefix = "wrong-hash-"
} else { } else {
Printf(" successfully decrypted blob (length %v), hash is %v, ID matches\n", len(plaintext), id) Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID matches\n", outputPrefix, len(plaintext), id)
prefix = "correct-" prefix = "correct-"
} }
if extractPack { if extractPack {
err = storePlainBlob(id, prefix, plaintext) err = storePlainBlob(id, filePrefix+prefix, plaintext)
if err != nil { if err != nil {
return err return err
} }
@ -476,27 +484,15 @@ func examinePack(ctx context.Context, repo restic.Repository, id restic.ID) erro
blobsLoaded := false blobsLoaded := false
// examine all data the indexes have for the pack file // examine all data the indexes have for the pack file
for _, idx := range repo.Index().(*repository.MasterIndex).All() { for b := range repo.Index().ListPacks(ctx, restic.NewIDSet(id)) {
idxIDs, err := idx.IDs() blobs := b.Blobs
if err != nil {
idxIDs = restic.IDs{}
}
blobs := idx.ListPack(id)
if len(blobs) == 0 { if len(blobs) == 0 {
continue continue
} }
Printf(" index %v:\n", idxIDs) checkPackSize(blobs, fi.Size)
// convert list of blobs to []restic.Blob err = loadBlobs(ctx, repo, id, blobs)
var list []restic.Blob
for _, b := range blobs {
list = append(list, b.Blob)
}
checkPackSize(list, fi.Size)
err = loadBlobs(ctx, repo, id, list)
if err != nil { if err != nil {
Warnf("error: %v\n", err) Warnf("error: %v\n", err)
} else { } else {
@ -532,14 +528,10 @@ func checkPackSize(blobs []restic.Blob, fileSize int64) {
if offset != uint64(pb.Offset) { if offset != uint64(pb.Offset) {
Printf(" hole in file, want offset %v, got %v\n", offset, pb.Offset) Printf(" hole in file, want offset %v, got %v\n", offset, pb.Offset)
} }
offset += uint64(pb.Length) offset = uint64(pb.Offset + pb.Length)
size += uint64(pb.Length) size += uint64(pb.Length)
} }
size += uint64(pack.CalculateHeaderSize(blobs))
// compute header size, per blob: 1 byte type, 4 byte length, 32 byte id
size += uint64(restic.CiphertextLength(len(blobs) * (1 + 4 + 32)))
// length in uint32 little endian
size += 4
if uint64(fileSize) != size { if uint64(fileSize) != size {
Printf(" file sizes do not match: computed %v from index, file size is %v\n", size, fileSize) Printf(" file sizes do not match: computed %v from index, file size is %v\n", size, fileSize)

View file

@ -1,10 +1,13 @@
package main package main
import ( import (
"strconv"
"github.com/restic/chunker" "github.com/restic/chunker"
"github.com/restic/restic/internal/backend/location" "github.com/restic/restic/internal/backend/location"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@ -30,6 +33,7 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
type InitOptions struct { type InitOptions struct {
secondaryRepoOptions secondaryRepoOptions
CopyChunkerParameters bool CopyChunkerParameters bool
RepositoryVersion string
} }
var initOptions InitOptions var initOptions InitOptions
@ -40,9 +44,26 @@ func init() {
f := cmdInit.Flags() f := cmdInit.Flags()
initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from") initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from")
f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)") f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)")
f.StringVar(&initOptions.RepositoryVersion, "repository-version", "stable", "repository format version to use, allowed values are a format version, 'latest' and 'stable'")
} }
func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
var version uint
if opts.RepositoryVersion == "latest" || opts.RepositoryVersion == "" {
version = restic.MaxRepoVersion
} else if opts.RepositoryVersion == "stable" {
version = restic.StableRepoVersion
} else {
v, err := strconv.ParseUint(opts.RepositoryVersion, 10, 32)
if err != nil {
return errors.Fatal("invalid repository version")
}
version = uint(v)
}
if version < restic.MinRepoVersion || version > restic.MaxRepoVersion {
return errors.Fatalf("only repository versions between %v and %v are allowed", restic.MinRepoVersion, restic.MaxRepoVersion)
}
chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts) chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts)
if err != nil { if err != nil {
return err return err
@ -65,9 +86,9 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
return errors.Fatalf("create repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err) return errors.Fatalf("create repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err)
} }
s := repository.New(be) s := repository.New(be, repository.Options{Compression: gopts.Compression})
err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial) err = s.Init(gopts.ctx, version, gopts.password, chunkerPolynomial)
if err != nil { if err != nil {
return errors.Fatalf("create key in repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err) return errors.Fatalf("create key in repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err)
} }

View file

@ -64,6 +64,7 @@ type GlobalOptions struct {
InsecureTLS bool InsecureTLS bool
TLSClientCert string TLSClientCert string
CleanupCache bool CleanupCache bool
Compression repository.CompressionMode
LimitUploadKb int LimitUploadKb int
LimitDownloadKb int LimitDownloadKb int
@ -120,6 +121,7 @@ func init() {
f.StringVar(&globalOptions.TLSClientCert, "tls-client-cert", "", "path to a `file` containing PEM encoded TLS client certificate and private key") f.StringVar(&globalOptions.TLSClientCert, "tls-client-cert", "", "path to a `file` containing PEM encoded TLS client certificate and private key")
f.BoolVar(&globalOptions.InsecureTLS, "insecure-tls", false, "skip TLS certificate verification when connecting to the repo (insecure)") f.BoolVar(&globalOptions.InsecureTLS, "insecure-tls", false, "skip TLS certificate verification when connecting to the repo (insecure)")
f.BoolVar(&globalOptions.CleanupCache, "cleanup-cache", false, "auto remove old cache directories") f.BoolVar(&globalOptions.CleanupCache, "cleanup-cache", false, "auto remove old cache directories")
f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repo format version 2), one of (auto|off|max)")
f.IntVar(&globalOptions.LimitUploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)") f.IntVar(&globalOptions.LimitUploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)")
f.IntVar(&globalOptions.LimitDownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)") f.IntVar(&globalOptions.LimitDownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)")
f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)")
@ -435,7 +437,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
} }
} }
s := repository.New(be) s := repository.New(be, repository.Options{Compression: opts.Compression})
passwordTriesLeft := 1 passwordTriesLeft := 1
if stdinIsTerminal() && opts.password == "" { if stdinIsTerminal() && opts.password == "" {
@ -471,7 +473,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
id = id[:8] id = id[:8]
} }
if !opts.JSON { if !opts.JSON {
Verbosef("repository %v opened successfully, password is correct\n", id) Verbosef("repository %v opened (repo version %v) successfully, password is correct\n", id, s.Config().Version)
} }
} }

View file

@ -1470,7 +1470,7 @@ func TestRebuildIndexAlwaysFull(t *testing.T) {
defer func() { defer func() {
repository.IndexFull = indexFull repository.IndexFull = indexFull
}() }()
repository.IndexFull = func(*repository.Index) bool { return true } repository.IndexFull = func(*repository.Index, bool) bool { return true }
testRebuildIndex(t, nil) testRebuildIndex(t, nil)
} }

View file

@ -35,6 +35,13 @@ options exist:
* Configuring a program to be called when the password is needed via the * Configuring a program to be called when the password is needed via the
option ``--password-command`` or the environment variable option ``--password-command`` or the environment variable
``RESTIC_PASSWORD_COMMAND`` ``RESTIC_PASSWORD_COMMAND``
* The ``init`` command has an option called ``--repository-version`` which can
be used to explicitely set the version for the new repository. By default,
the current stable version is used. Have a look at the `design documentation
<https://github.com/restic/restic/blob/master/doc/design.rst>`__ for
details.
Local Local
***** *****
@ -692,4 +699,3 @@ On MSYS2, you can install ``winpty`` as follows:
$ pacman -S winpty $ pacman -S winpty
$ winpty restic -r /srv/restic-repo init $ winpty restic -r /srv/restic-repo init

View file

@ -62,28 +62,30 @@ like the following:
.. code:: json .. code:: json
{ {
"version": 1, "version": 2,
"id": "5956a3f67a6230d4a92cefb29529f10196c7d92582ec305fd71ff6d331d6271b", "id": "5956a3f67a6230d4a92cefb29529f10196c7d92582ec305fd71ff6d331d6271b",
"chunker_polynomial": "25b468838dcb75" "chunker_polynomial": "25b468838dcb75"
} }
After decryption, restic first checks that the version field contains a After decryption, restic first checks that the version field contains a
version number that it understands, otherwise it aborts. At the moment, version number that it understands, otherwise it aborts. At the moment, the
the version is expected to be 1. The field ``id`` holds a unique ID version is expected to be 1 or 2. The list of changes in the repository
which consists of 32 random bytes, encoded in hexadecimal. This uniquely format is contained in the section "Changes" below.
identifies the repository, regardless if it is accessed via SFTP or
locally. The field ``chunker_polynomial`` contains a parameter that is The field ``id`` holds a unique ID which consists of 32 random bytes, encoded
used for splitting large files into smaller chunks (see below). in hexadecimal. This uniquely identifies the repository, regardless if it is
accessed via a remote storage backend or locally. The field
``chunker_polynomial`` contains a parameter that is used for splitting large
files into smaller chunks (see below).
Repository Layout Repository Layout
----------------- -----------------
The ``local`` and ``sftp`` backends are implemented using files and The ``local`` and ``sftp`` backends are implemented using files and
directories stored in a file system. The directory layout is the same directories stored in a file system. The directory layout is the same
for both backend types. for both backend types and is also used for all other remote backends.
The basic layout of a repository stored in a ``local`` or ``sftp`` The basic layout of a repository is shown here:
backend is shown here:
:: ::
@ -109,8 +111,7 @@ backend is shown here:
│ └── 22a5af1bdc6e616f8a29579458c49627e01b32210d09adb288d1ecda7c5711ec │ └── 22a5af1bdc6e616f8a29579458c49627e01b32210d09adb288d1ecda7c5711ec
└── tmp └── tmp
A local repository can be initialized with the ``restic init`` command, A local repository can be initialized with the ``restic init`` command, e.g.:
e.g.:
.. code-block:: console .. code-block:: console
@ -186,40 +187,75 @@ After decryption, a Pack's header consists of the following elements:
:: ::
Type_Blob1 || Length(EncryptedBlob1) || Hash(Plaintext_Blob1) || Type_Blob1 || Data_Blob1 ||
[...] [...]
Type_BlobN || Length(EncryptedBlobN) || Hash(Plaintext_Blobn) || Type_BlobN || Data_BlobN ||
The Blob type field is a single byte. What follows it depends on the type. The
following Blob types are defined:
+-----------+----------------------+-------------------------------------------------------------------------------+
| Type | Meaning | Data |
+===========+======================+===============================================================================+
| 0b00 | data blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
| 0b01 | tree blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
| 0b10 | compressed data blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
| 0b11 | compressed tree blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
This is enough to calculate the offsets for all the Blobs in the Pack. This is enough to calculate the offsets for all the Blobs in the Pack.
Length is the length of a Blob as a four byte integer in little-endian The length fields are encoded as four byte integers in little-endian
format. The type field is a one byte field and labels the content of a format. In the Data column, ``Length(plaintext_blob)`` means the length
blob according to the following table: of the decrypted and uncompressed data a blob consists of.
+--------+-----------+ All other types are invalid, more types may be added in the future. The
| Type | Meaning | compressed types are only valid for repository format version 2. Data and
+========+===========+ tree blobs may be compressed with the zstandard compression algorithm.
| 0 | data |
+--------+-----------+
| 1 | tree |
+--------+-----------+
All other types are invalid, more types may be added in the future. In repository format version 1, data and tree blobs should be stored in
separate pack files. In version 2, they must be stored in separate files.
Compressed and non-compress blobs of the same type may be mixed in a pack
file.
For reconstructing the index or parsing a pack without an index, first For reconstructing the index or parsing a pack without an index, first
the last four bytes must be read in order to find the length of the the last four bytes must be read in order to find the length of the
header. Afterwards, the header can be read and parsed, which yields all header. Afterwards, the header can be read and parsed, which yields all
plaintext hashes, types, offsets and lengths of all included blobs. plaintext hashes, types, offsets and lengths of all included blobs.
Unpacked Data Format
====================
Individual files for the index, locks or snapshots are encrypted
and authenticated like Data and Tree Blobs, so the outer structure is
``IV || Ciphertext || MAC`` again. In repository format version 1 the
plaintext always consists of a JSON document which must either be an
object or an array.
Repository format version 2 adds support for compression. The plaintext
now starts with a header to indicate the encoding version to distinguish
it from plain JSON and to allow for further evolution of the storage format:
``encoding_version || data``
The ``encoding_version`` field is encoded as one byte.
For backwards compatibility the encoding versions '[' (0x5b) and '{' (0x7b)
are used to mark that the whole plaintext (including the encoding version
byte) should treated as JSON document.
For new data the encoding version is currently always ``2``. For that
version ``data`` contains a JSON document compressed using the zstandard
compression algorithm.
Indexing Indexing
======== ========
Index files contain information about Data and Tree Blobs and the Packs Index files contain information about Data and Tree Blobs and the Packs
they are contained in and store this information in the repository. When they are contained in and store this information in the repository. When
the local cached index is not accessible any more, the index files can the local cached index is not accessible any more, the index files can
be downloaded and used to reconstruct the index. The files are encrypted be downloaded and used to reconstruct the index. The file encoding is
and authenticated like Data and Tree Blobs, so the outer structure is described in the "Unpacked Data Format" section. The plaintext consists
``IV || Ciphertext || MAC`` again. The plaintext consists of a JSON of a JSON document like the following:
document like the following:
.. code:: json .. code:: json
@ -235,18 +271,22 @@ document like the following:
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data", "type": "data",
"offset": 0, "offset": 0,
"length": 25 "length": 38,
},{ // no 'uncompressed_length' as blob is not compressed
},
{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree", "type": "tree",
"offset": 38, "offset": 38,
"length": 100 "length": 112,
"uncompressed_length": 511,
}, },
{ {
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data", "type": "data",
"offset": 150, "offset": 150,
"length": 123 "length": 123,
"uncompressed_length": 234,
} }
] ]
}, [...] }, [...]
@ -255,7 +295,11 @@ document like the following:
This JSON document lists Packs and the blobs contained therein. In this This JSON document lists Packs and the blobs contained therein. In this
example, the Pack ``73d04e61`` contains two data Blobs and one Tree example, the Pack ``73d04e61`` contains two data Blobs and one Tree
blob, the plaintext hashes are listed afterwards. blob, the plaintext hashes are listed afterwards. The ``length`` field
corresponds to ``Length(encrypted_blob)`` in the pack file header.
Field ``uncompressed_length`` is only present for compressed blobs and
therefore is never present in version 1. It is set to the value of
``Length(blob)``.
The field ``supersedes`` lists the storage IDs of index files that have The field ``supersedes`` lists the storage IDs of index files that have
been replaced with the current index file. This happens when index files been replaced with the current index file. This happens when index files
@ -350,8 +394,9 @@ Snapshots
A snapshot represents a directory with all files and sub-directories at A snapshot represents a directory with all files and sub-directories at
a given point in time. For each backup that is made, a new snapshot is a given point in time. For each backup that is made, a new snapshot is
created. A snapshot is a JSON document that is stored in an encrypted created. A snapshot is a JSON document that is stored in a file below
file below the directory ``snapshots`` in the repository. The filename the directory ``snapshots`` in the repository. It uses the file encoding
described in the "Unpacked Data Format" section. The filename
is the storage ID. This string is unique and used within restic to is the storage ID. This string is unique and used within restic to
uniquely identify a snapshot. uniquely identify a snapshot.
@ -412,7 +457,7 @@ Blobs of data. The SHA-256 hashes of all Blobs are saved in an ordered
list which then represents the content of the file. list which then represents the content of the file.
In order to relate these plaintext hashes to the actual location within In order to relate these plaintext hashes to the actual location within
a Pack file , an index is used. If the index is not available, the a Pack file, an index is used. If the index is not available, the
header of all data Blobs can be read. header of all data Blobs can be read.
Trees and Data Trees and Data
@ -517,8 +562,8 @@ time there must not be any other locks (exclusive and non-exclusive).
There may be multiple non-exclusive locks in parallel. There may be multiple non-exclusive locks in parallel.
A lock is a file in the subdir ``locks`` whose filename is the storage A lock is a file in the subdir ``locks`` whose filename is the storage
ID of the contents. It is encrypted and authenticated the same way as ID of the contents. It is stored in the file encoding described in the
other files in the repository and contains the following JSON structure: "Unpacked Data Format" section and contains the following JSON structure:
.. code:: json .. code:: json
@ -721,3 +766,11 @@ An adversary who has a leaked (decrypted) key for a repository could:
only be done using the ``copy`` command, which moves the data into a new only be done using the ``copy`` command, which moves the data into a new
repository with a new master key, or by making a completely new repository repository with a new master key, or by making a completely new repository
and new backup. and new backup.
Changes
=======
Repository Version 2
--------------------
* Support compression for blobs (data/tree) and index / lock / snapshot files

2
go.mod
View file

@ -21,7 +21,7 @@ require (
github.com/hashicorp/golang-lru v0.5.4 github.com/hashicorp/golang-lru v0.5.4
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/juju/ratelimit v1.0.1 github.com/juju/ratelimit v1.0.1
github.com/klauspost/compress v1.15.1 // indirect github.com/klauspost/compress v1.15.1
github.com/klauspost/cpuid/v2 v2.0.12 // indirect github.com/klauspost/cpuid/v2 v2.0.12 // indirect
github.com/kurin/blazer v0.5.4-0.20211030221322-ba894c124ac6 github.com/kurin/blazer v0.5.4-0.20211030221322-ba894c124ac6
github.com/minio/md5-simd v1.1.2 // indirect github.com/minio/md5-simd v1.1.2 // indirect

View file

@ -1894,7 +1894,7 @@ func TestArchiverContextCanceled(t *testing.T) {
defer removeTempdir() defer removeTempdir()
// Ensure that the archiver itself reports the canceled context and not just the backend // Ensure that the archiver itself reports the canceled context and not just the backend
repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}) repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}, 0)
back := restictest.Chdir(t, tempdir) back := restictest.Chdir(t, tempdir)
defer back() defer back()

View file

@ -350,7 +350,7 @@ func TestCheckerModifiedData(t *testing.T) {
t.Logf("archived as %v", sn.ID().Str()) t.Logf("archived as %v", sn.ID().Str())
beError := &errorBackend{Backend: repo.Backend()} beError := &errorBackend{Backend: repo.Backend()}
checkRepo := repository.New(beError) checkRepo := repository.New(beError, repository.Options{})
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, "")) test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
chkr := checker.New(checkRepo, false) chkr := checker.New(checkRepo, false)

View file

@ -32,7 +32,7 @@ func NewPacker(k *crypto.Key, wr io.Writer) *Packer {
// Add saves the data read from rd as a new blob to the packer. Returned is the // Add saves the data read from rd as a new blob to the packer. Returned is the
// number of bytes written to the pack. // number of bytes written to the pack.
func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) { func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte, uncompressedLength int) (int, error) {
p.m.Lock() p.m.Lock()
defer p.m.Unlock() defer p.m.Unlock()
@ -41,13 +41,15 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error)
n, err := p.wr.Write(data) n, err := p.wr.Write(data)
c.Length = uint(n) c.Length = uint(n)
c.Offset = p.bytes c.Offset = p.bytes
c.UncompressedLength = uint(uncompressedLength)
p.bytes += uint(n) p.bytes += uint(n)
p.blobs = append(p.blobs, c) p.blobs = append(p.blobs, c)
return n, errors.Wrap(err, "Write") return n, errors.Wrap(err, "Write")
} }
var entrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) var entrySize = uint(binary.Size(restic.BlobType(0)) + 2*headerLengthSize + len(restic.ID{}))
var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
// headerEntry describes the format of header entries. It serves only as // headerEntry describes the format of header entries. It serves only as
// documentation. // documentation.
@ -57,6 +59,15 @@ type headerEntry struct {
ID restic.ID ID restic.ID
} }
// compressedHeaderEntry describes the format of header entries for compressed blobs.
// It serves only as documentation.
type compressedHeaderEntry struct {
Type uint8
Length uint32
UncompressedLength uint32
ID restic.ID
}
// Finalize writes the header for all added blobs and finalizes the pack. // Finalize writes the header for all added blobs and finalizes the pack.
// Returned are the number of bytes written, including the header. // Returned are the number of bytes written, including the header.
func (p *Packer) Finalize() (uint, error) { func (p *Packer) Finalize() (uint, error) {
@ -70,7 +81,7 @@ func (p *Packer) Finalize() (uint, error) {
return 0, err return 0, err
} }
encryptedHeader := make([]byte, 0, len(header)+p.k.Overhead()+p.k.NonceSize()) encryptedHeader := make([]byte, 0, restic.CiphertextLength(len(header)))
nonce := crypto.NewRandomNonce() nonce := crypto.NewRandomNonce()
encryptedHeader = append(encryptedHeader, nonce...) encryptedHeader = append(encryptedHeader, nonce...)
encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil) encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil)
@ -81,7 +92,7 @@ func (p *Packer) Finalize() (uint, error) {
return 0, errors.Wrap(err, "Write") return 0, errors.Wrap(err, "Write")
} }
hdrBytes := restic.CiphertextLength(len(header)) hdrBytes := len(encryptedHeader)
if n != hdrBytes { if n != hdrBytes {
return 0, errors.New("wrong number of bytes written") return 0, errors.New("wrong number of bytes written")
} }
@ -104,11 +115,15 @@ func (p *Packer) makeHeader() ([]byte, error) {
buf := make([]byte, 0, len(p.blobs)*int(entrySize)) buf := make([]byte, 0, len(p.blobs)*int(entrySize))
for _, b := range p.blobs { for _, b := range p.blobs {
switch b.Type { switch {
case restic.DataBlob: case b.Type == restic.DataBlob && b.UncompressedLength == 0:
buf = append(buf, 0) buf = append(buf, 0)
case restic.TreeBlob: case b.Type == restic.TreeBlob && b.UncompressedLength == 0:
buf = append(buf, 1) buf = append(buf, 1)
case b.Type == restic.DataBlob && b.UncompressedLength != 0:
buf = append(buf, 2)
case b.Type == restic.TreeBlob && b.UncompressedLength != 0:
buf = append(buf, 3)
default: default:
return nil, errors.Errorf("invalid blob type %v", b.Type) return nil, errors.Errorf("invalid blob type %v", b.Type)
} }
@ -116,6 +131,10 @@ func (p *Packer) makeHeader() ([]byte, error) {
var lenLE [4]byte var lenLE [4]byte
binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length)) binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length))
buf = append(buf, lenLE[:]...) buf = append(buf, lenLE[:]...)
if b.UncompressedLength != 0 {
binary.LittleEndian.PutUint32(lenLE[:], uint32(b.UncompressedLength))
buf = append(buf, lenLE[:]...)
}
buf = append(buf, b.ID[:]...) buf = append(buf, b.ID[:]...)
} }
@ -152,7 +171,7 @@ func (p *Packer) String() string {
var ( var (
// we require at least one entry in the header, and one blob for a pack file // we require at least one entry in the header, and one blob for a pack file
minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) minFileSize = plainEntrySize + crypto.Extension + uint(headerLengthSize)
) )
const ( const (
@ -167,16 +186,11 @@ const (
eagerEntries = 15 eagerEntries = 15
) )
// readRecords reads up to max records from the underlying ReaderAt, returning // readRecords reads up to bufsize bytes from the underlying ReaderAt, returning
// the raw header, the total number of records in the header, and any error. // the raw header, the total number of bytes in the header, and any error.
// If the header contains fewer than max entries, the header is truncated to // If the header contains fewer than bufsize bytes, the header is truncated to
// the appropriate size. // the appropriate size.
func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { func readRecords(rd io.ReaderAt, size int64, bufsize int) ([]byte, int, error) {
var bufsize int
bufsize += max * int(entrySize)
bufsize += crypto.Extension
bufsize += headerLengthSize
if bufsize > int(size) { if bufsize > int(size) {
bufsize = int(size) bufsize = int(size)
} }
@ -197,8 +211,6 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
err = InvalidFileError{Message: "header length is zero"} err = InvalidFileError{Message: "header length is zero"}
case hlen < crypto.Extension: case hlen < crypto.Extension:
err = InvalidFileError{Message: "header length is too small"} err = InvalidFileError{Message: "header length is too small"}
case (hlen-crypto.Extension)%uint32(entrySize) != 0:
err = InvalidFileError{Message: "header length is invalid"}
case int64(hlen) > size-int64(headerLengthSize): case int64(hlen) > size-int64(headerLengthSize):
err = InvalidFileError{Message: "header is larger than file"} err = InvalidFileError{Message: "header is larger than file"}
case int64(hlen) > MaxHeaderSize-int64(headerLengthSize): case int64(hlen) > MaxHeaderSize-int64(headerLengthSize):
@ -208,8 +220,8 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
return nil, 0, errors.Wrap(err, "readHeader") return nil, 0, errors.Wrap(err, "readHeader")
} }
total := (int(hlen) - crypto.Extension) / int(entrySize) total := int(hlen + headerLengthSize)
if total < max { if total < bufsize {
// truncate to the beginning of the pack header // truncate to the beginning of the pack header
b = b[len(b)-int(hlen):] b = b[len(b)-int(hlen):]
} }
@ -230,11 +242,12 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
// eagerly download eagerEntries header entries as part of header-length request. // eagerly download eagerEntries header entries as part of header-length request.
// only make second request if actual number of entries is greater than eagerEntries // only make second request if actual number of entries is greater than eagerEntries
b, c, err := readRecords(rd, size, eagerEntries) eagerSize := eagerEntries*int(entrySize) + headerSize
b, c, err := readRecords(rd, size, eagerSize)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if c <= eagerEntries { if c <= eagerSize {
// eager read sufficed, return what we got // eager read sufficed, return what we got
return b, nil return b, nil
} }
@ -262,7 +275,7 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
return nil, 0, err return nil, 0, err
} }
if len(buf) < k.NonceSize()+k.Overhead() { if len(buf) < restic.CiphertextLength(0) {
return nil, 0, errors.New("invalid header, too small") return nil, 0, errors.New("invalid header, too small")
} }
@ -274,11 +287,12 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
return nil, 0, err return nil, 0, err
} }
entries = make([]restic.Blob, 0, uint(len(buf))/entrySize) // might over allocate a bit if all blobs have EntrySize but only by a few percent
entries = make([]restic.Blob, 0, uint(len(buf))/plainEntrySize)
pos := uint(0) pos := uint(0)
for len(buf) > 0 { for len(buf) > 0 {
entry, err := parseHeaderEntry(buf) entry, headerSize, err := parseHeaderEntry(buf)
if err != nil { if err != nil {
return nil, 0, err return nil, 0, err
} }
@ -286,36 +300,60 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
entries = append(entries, entry) entries = append(entries, entry)
pos += entry.Length pos += entry.Length
buf = buf[entrySize:] buf = buf[headerSize:]
} }
return entries, hdrSize, nil return entries, hdrSize, nil
} }
func parseHeaderEntry(p []byte) (b restic.Blob, err error) { func parseHeaderEntry(p []byte) (b restic.Blob, size uint, err error) {
if uint(len(p)) < entrySize { l := uint(len(p))
size = plainEntrySize
if l < plainEntrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, err return b, size, err
} }
p = p[:entrySize] tpe := p[0]
switch p[0] { switch tpe {
case 0: case 0, 2:
b.Type = restic.DataBlob b.Type = restic.DataBlob
case 1: case 1, 3:
b.Type = restic.TreeBlob b.Type = restic.TreeBlob
default: default:
return b, errors.Errorf("invalid type %d", p[0]) return b, size, errors.Errorf("invalid type %d", tpe)
} }
b.Length = uint(binary.LittleEndian.Uint32(p[1:5])) b.Length = uint(binary.LittleEndian.Uint32(p[1:5]))
copy(b.ID[:], p[5:]) p = p[5:]
if tpe == 2 || tpe == 3 {
size = entrySize
if l < entrySize {
err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
return b, size, err
}
b.UncompressedLength = uint(binary.LittleEndian.Uint32(p[0:4]))
p = p[4:]
}
return b, nil copy(b.ID[:], p[:])
return b, size, nil
}
func CalculateEntrySize(blob restic.Blob) int {
if blob.UncompressedLength != 0 {
return int(entrySize)
}
return int(plainEntrySize)
} }
func CalculateHeaderSize(blobs []restic.Blob) int { func CalculateHeaderSize(blobs []restic.Blob) int {
return headerSize + len(blobs)*int(entrySize) size := headerSize
for _, blob := range blobs {
size += CalculateEntrySize(blob)
}
return size
} }
// Size returns the size of all packs computed by index information. // Size returns the size of all packs computed by index information.
@ -333,7 +371,7 @@ func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.I
if !onlyHdr { if !onlyHdr {
size += int64(blob.Length) size += int64(blob.Length)
} }
packSize[blob.PackID] = size + int64(entrySize) packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
} }
return packSize return packSize

View file

@ -13,7 +13,7 @@ import (
func TestParseHeaderEntry(t *testing.T) { func TestParseHeaderEntry(t *testing.T) {
h := headerEntry{ h := headerEntry{
Type: 0, // Blob. Type: 0, // Blob
Length: 100, Length: 100,
} }
for i := range h.ID { for i := range h.ID {
@ -23,25 +23,58 @@ func TestParseHeaderEntry(t *testing.T) {
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &h) _ = binary.Write(buf, binary.LittleEndian, &h)
b, err := parseHeaderEntry(buf.Bytes()) b, size, err := parseHeaderEntry(buf.Bytes())
rtest.OK(t, err) rtest.OK(t, err)
rtest.Equals(t, restic.DataBlob, b.Type) rtest.Equals(t, restic.DataBlob, b.Type)
rtest.Equals(t, plainEntrySize, size)
t.Logf("%v %v", h.ID, b.ID) t.Logf("%v %v", h.ID, b.ID)
rtest.Assert(t, bytes.Equal(h.ID[:], b.ID[:]), "id mismatch") rtest.Equals(t, h.ID[:], b.ID[:])
rtest.Equals(t, uint(h.Length), b.Length) rtest.Equals(t, uint(h.Length), b.Length)
rtest.Equals(t, uint(0), b.UncompressedLength)
c := compressedHeaderEntry{
Type: 2, // compressed Blob
Length: 100,
UncompressedLength: 200,
}
for i := range c.ID {
c.ID[i] = byte(i)
}
buf = new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &c)
b, size, err = parseHeaderEntry(buf.Bytes())
rtest.OK(t, err)
rtest.Equals(t, restic.DataBlob, b.Type)
rtest.Equals(t, entrySize, size)
t.Logf("%v %v", c.ID, b.ID)
rtest.Equals(t, c.ID[:], b.ID[:])
rtest.Equals(t, uint(c.Length), b.Length)
rtest.Equals(t, uint(c.UncompressedLength), b.UncompressedLength)
}
func TestParseHeaderEntryErrors(t *testing.T) {
h := headerEntry{
Type: 0, // Blob
Length: 100,
}
for i := range h.ID {
h.ID[i] = byte(i)
}
h.Type = 0xae h.Type = 0xae
buf.Reset() buf := new(bytes.Buffer)
_ = binary.Write(buf, binary.LittleEndian, &h) _ = binary.Write(buf, binary.LittleEndian, &h)
b, err = parseHeaderEntry(buf.Bytes()) _, _, err := parseHeaderEntry(buf.Bytes())
rtest.Assert(t, err != nil, "no error for invalid type") rtest.Assert(t, err != nil, "no error for invalid type")
h.Type = 0 h.Type = 0
buf.Reset() buf.Reset()
_ = binary.Write(buf, binary.LittleEndian, &h) _ = binary.Write(buf, binary.LittleEndian, &h)
b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1]) _, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1])
rtest.Assert(t, err != nil, "no error for short input") rtest.Assert(t, err != nil, "no error for short input")
} }
@ -97,7 +130,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
func TestReadRecords(t *testing.T) { func TestReadRecords(t *testing.T) {
testReadRecords := func(dataSize, entryCount, totalRecords int) { testReadRecords := func(dataSize, entryCount, totalRecords int) {
totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension) totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension)
off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension) bufSize := entryCount*int(entrySize) + crypto.Extension
off := len(totalHeader) - bufSize
if off < 0 { if off < 0 {
off = 0 off = 0
} }
@ -110,10 +144,10 @@ func TestReadRecords(t *testing.T) {
rd := bytes.NewReader(buf.Bytes()) rd := bytes.NewReader(buf.Bytes())
header, count, err := readRecords(rd, int64(rd.Len()), entryCount) header, count, err := readRecords(rd, int64(rd.Len()), bufSize+4)
rtest.OK(t, err) rtest.OK(t, err)
rtest.Equals(t, len(totalHeader)+4, count)
rtest.Equals(t, expectedHeader, header) rtest.Equals(t, expectedHeader, header)
rtest.Equals(t, totalRecords, count)
} }
// basic // basic

View file

@ -38,7 +38,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) {
var buf bytes.Buffer var buf bytes.Buffer
p := pack.NewPacker(k, &buf) p := pack.NewPacker(k, &buf)
for _, b := range bufs { for _, b := range bufs {
_, err := p.Add(restic.TreeBlob, b.id, b.data) _, err := p.Add(restic.TreeBlob, b.id, b.data, 2*len(b.data))
rtest.OK(t, err) rtest.OK(t, err)
} }

View file

@ -75,12 +75,12 @@ const maxuint32 = 1<<32 - 1
func (idx *Index) store(packIndex int, blob restic.Blob) { func (idx *Index) store(packIndex int, blob restic.Blob) {
// assert that offset and length fit into uint32! // assert that offset and length fit into uint32!
if blob.Offset > maxuint32 || blob.Length > maxuint32 { if blob.Offset > maxuint32 || blob.Length > maxuint32 || blob.UncompressedLength > maxuint32 {
panic("offset or length does not fit in uint32. You have packs > 4GB!") panic("offset or length does not fit in uint32. You have packs > 4GB!")
} }
m := &idx.byType[blob.Type] m := &idx.byType[blob.Type]
m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length)) m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length), uint32(blob.UncompressedLength))
} }
// Final returns true iff the index is already written to the repository, it is // Final returns true iff the index is already written to the repository, it is
@ -93,12 +93,13 @@ func (idx *Index) Final() bool {
} }
const ( const (
indexMaxBlobs = 50000 indexMaxBlobs = 50000
indexMaxAge = 10 * time.Minute indexMaxBlobsCompressed = 3 * indexMaxBlobs
indexMaxAge = 10 * time.Minute
) )
// IndexFull returns true iff the index is "full enough" to be saved as a preliminary index. // IndexFull returns true iff the index is "full enough" to be saved as a preliminary index.
var IndexFull = func(idx *Index) bool { var IndexFull = func(idx *Index, compress bool) bool {
idx.m.Lock() idx.m.Lock()
defer idx.m.Unlock() defer idx.m.Unlock()
@ -109,12 +110,18 @@ var IndexFull = func(idx *Index) bool {
blobs += idx.byType[typ].len() blobs += idx.byType[typ].len()
} }
age := time.Since(idx.created) age := time.Since(idx.created)
var maxBlobs uint
if compress {
maxBlobs = indexMaxBlobsCompressed
} else {
maxBlobs = indexMaxBlobs
}
switch { switch {
case age >= indexMaxAge: case age >= indexMaxAge:
debug.Log("index %p is old enough", idx, age) debug.Log("index %p is old enough", idx, age)
return true return true
case blobs >= indexMaxBlobs: case blobs >= maxBlobs:
debug.Log("index %p has %d blobs", idx, blobs) debug.Log("index %p has %d blobs", idx, blobs)
return true return true
} }
@ -169,8 +176,9 @@ func (idx *Index) toPackedBlob(e *indexEntry, t restic.BlobType) restic.PackedBl
BlobHandle: restic.BlobHandle{ BlobHandle: restic.BlobHandle{
ID: e.id, ID: e.id,
Type: t}, Type: t},
Length: uint(e.length), Length: uint(e.length),
Offset: uint(e.offset), Offset: uint(e.offset),
UncompressedLength: uint(e.uncompressedLength),
}, },
PackID: idx.packs[e.packIndex], PackID: idx.packs[e.packIndex],
} }
@ -225,6 +233,9 @@ func (idx *Index) LookupSize(bh restic.BlobHandle) (plaintextLength uint, found
if e == nil { if e == nil {
return 0, false return 0, false
} }
if e.uncompressedLength != 0 {
return uint(e.uncompressedLength), true
}
return uint(restic.PlaintextLength(int(e.length))), true return uint(restic.PlaintextLength(int(e.length))), true
} }
@ -357,10 +368,11 @@ type packJSON struct {
} }
type blobJSON struct { type blobJSON struct {
ID restic.ID `json:"id"` ID restic.ID `json:"id"`
Type restic.BlobType `json:"type"` Type restic.BlobType `json:"type"`
Offset uint `json:"offset"` Offset uint `json:"offset"`
Length uint `json:"length"` Length uint `json:"length"`
UncompressedLength uint `json:"uncompressed_length,omitempty"`
} }
// generatePackList returns a list of packs. // generatePackList returns a list of packs.
@ -391,10 +403,11 @@ func (idx *Index) generatePackList() ([]*packJSON, error) {
// add blob // add blob
p.Blobs = append(p.Blobs, blobJSON{ p.Blobs = append(p.Blobs, blobJSON{
ID: e.id, ID: e.id,
Type: restic.BlobType(typ), Type: restic.BlobType(typ),
Offset: uint(e.offset), Offset: uint(e.offset),
Length: uint(e.length), Length: uint(e.length),
UncompressedLength: uint(e.uncompressedLength),
}) })
return true return true
@ -553,7 +566,7 @@ func (idx *Index) merge(idx2 *Index) error {
m2.foreach(func(e2 *indexEntry) bool { m2.foreach(func(e2 *indexEntry) bool {
if !hasIdenticalEntry(e2) { if !hasIdenticalEntry(e2) {
// packIndex needs to be changed as idx2.pack was appended to idx.pack, see above // packIndex needs to be changed as idx2.pack was appended to idx.pack, see above
m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length) m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length, e2.uncompressedLength)
} }
return true return true
}) })
@ -601,8 +614,9 @@ func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err erro
BlobHandle: restic.BlobHandle{ BlobHandle: restic.BlobHandle{
Type: blob.Type, Type: blob.Type,
ID: blob.ID}, ID: blob.ID},
Offset: blob.Offset, Offset: blob.Offset,
Length: blob.Length, Length: blob.Length,
UncompressedLength: blob.UncompressedLength,
}) })
switch blob.Type { switch blob.Type {
@ -648,6 +662,7 @@ func decodeOldIndex(buf []byte) (idx *Index, err error) {
ID: blob.ID}, ID: blob.ID},
Offset: blob.Offset, Offset: blob.Offset,
Length: blob.Length, Length: blob.Length,
// no compressed length in the old index format
}) })
switch blob.Type { switch blob.Type {

View file

@ -23,11 +23,17 @@ func TestIndexSerialize(t *testing.T) {
pos := uint(0) pos := uint(0)
for j := 0; j < 20; j++ { for j := 0; j < 20; j++ {
length := uint(i*100 + j) length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{ pb := restic.PackedBlob{
Blob: restic.Blob{ Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(), BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos, Offset: pos,
Length: length, Length: length,
UncompressedLength: uncompressedLength,
}, },
PackID: packID, PackID: packID,
} }
@ -164,7 +170,7 @@ func TestIndexSize(t *testing.T) {
} }
// example index serialization from doc/Design.rst // example index serialization from doc/Design.rst
var docExample = []byte(` var docExampleV1 = []byte(`
{ {
"supersedes": [ "supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452" "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
@ -177,12 +183,12 @@ var docExample = []byte(`
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data", "type": "data",
"offset": 0, "offset": 0,
"length": 25 "length": 38
},{ },{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree", "type": "tree",
"offset": 38, "offset": 38,
"length": 100 "length": 112
}, },
{ {
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
@ -196,6 +202,41 @@ var docExample = []byte(`
} }
`) `)
var docExampleV2 = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
],
"packs": [
{
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},
{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112,
"uncompressed_length": 511
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123,
"uncompressed_length": 234
}
]
}
]
}
`)
var docOldExample = []byte(` var docOldExample = []byte(`
[ { [ {
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c", "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
@ -204,12 +245,12 @@ var docOldExample = []byte(`
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data", "type": "data",
"offset": 0, "offset": 0,
"length": 25 "length": 38
},{ },{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree", "type": "tree",
"offset": 38, "offset": 38,
"length": 100 "length": 112
}, },
{ {
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
@ -222,22 +263,23 @@ var docOldExample = []byte(`
`) `)
var exampleTests = []struct { var exampleTests = []struct {
id, packID restic.ID id, packID restic.ID
tpe restic.BlobType tpe restic.BlobType
offset, length uint offset, length uint
uncompressedLength uint
}{ }{
{ {
restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"), restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 0, 25, restic.DataBlob, 0, 38, 0,
}, { }, {
restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"), restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.TreeBlob, 38, 100, restic.TreeBlob, 38, 112, 511,
}, { }, {
restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"), restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 150, 123, restic.DataBlob, 150, 123, 234,
}, },
} }
@ -254,41 +296,56 @@ var exampleLookupTest = struct {
} }
func TestIndexUnserialize(t *testing.T) { func TestIndexUnserialize(t *testing.T) {
oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} for _, task := range []struct {
idxBytes []byte
version int
}{
{docExampleV1, 1},
{docExampleV2, 2},
} {
oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}
idx, oldFormat, err := repository.DecodeIndex(docExample, restic.NewRandomID()) idx, oldFormat, err := repository.DecodeIndex(task.idxBytes, restic.NewRandomID())
rtest.OK(t, err) rtest.OK(t, err)
rtest.Assert(t, !oldFormat, "new index format recognized as old format") rtest.Assert(t, !oldFormat, "new index format recognized as old format")
for _, test := range exampleTests { for _, test := range exampleTests {
list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil) list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
if len(list) != 1 { if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
rtest.Equals(t, test.packID, blob.PackID)
rtest.Equals(t, test.tpe, blob.Type)
rtest.Equals(t, test.offset, blob.Offset)
rtest.Equals(t, test.length, blob.Length)
if task.version == 1 {
rtest.Equals(t, uint(0), blob.UncompressedLength)
} else if task.version == 2 {
rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength)
} else {
t.Fatal("Invalid index version")
}
} }
blob := list[0]
t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob) rtest.Equals(t, oldIdx, idx.Supersedes())
rtest.Equals(t, test.packID, blob.PackID) blobs := idx.ListPack(exampleLookupTest.packID)
rtest.Equals(t, test.tpe, blob.Type) if len(blobs) != len(exampleLookupTest.blobs) {
rtest.Equals(t, test.offset, blob.Offset) t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
rtest.Equals(t, test.length, blob.Length)
}
rtest.Equals(t, oldIdx, idx.Supersedes())
blobs := idx.ListPack(exampleLookupTest.packID)
if len(blobs) != len(exampleLookupTest.blobs) {
t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
}
for _, blob := range blobs {
b, ok := exampleLookupTest.blobs[blob.ID]
if !ok {
t.Errorf("unexpected blob %v found", blob.ID.Str())
} }
if blob.Type != b {
t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type) for _, blob := range blobs {
b, ok := exampleLookupTest.blobs[blob.ID]
if !ok {
t.Errorf("unexpected blob %v found", blob.ID.Str())
}
if blob.Type != b {
t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
}
} }
} }
} }
@ -403,8 +460,9 @@ func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lo
Type: restic.DataBlob, Type: restic.DataBlob,
ID: id, ID: id,
}, },
Length: uint(size), Length: uint(size),
Offset: uint(offset), UncompressedLength: uint(2 * size),
Offset: uint(offset),
}) })
offset += size offset += size
@ -475,11 +533,17 @@ func TestIndexHas(t *testing.T) {
pos := uint(0) pos := uint(0)
for j := 0; j < 20; j++ { for j := 0; j < 20; j++ {
length := uint(i*100 + j) length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{ pb := restic.PackedBlob{
Blob: restic.Blob{ Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(), BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos, Offset: pos,
Length: length, Length: length,
UncompressedLength: uncompressedLength,
}, },
PackID: packID, PackID: packID,
} }

View file

@ -32,7 +32,7 @@ const (
// add inserts an indexEntry for the given arguments into the map, // add inserts an indexEntry for the given arguments into the map,
// using id as the key. // using id as the key.
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) { func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
switch { switch {
case m.numentries == 0: // Lazy initialization. case m.numentries == 0: // Lazy initialization.
m.init() m.init()
@ -47,6 +47,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) {
e.packIndex = packIdx e.packIndex = packIdx
e.offset = offset e.offset = offset
e.length = length e.length = length
e.uncompressedLength = uncompressedLength
m.buckets[h] = e m.buckets[h] = e
m.numentries++ m.numentries++
@ -130,12 +131,12 @@ func (m *indexMap) len() uint { return m.numentries }
func (m *indexMap) newEntry() *indexEntry { func (m *indexMap) newEntry() *indexEntry {
// Allocating in batches means that we get closer to optimal space usage, // Allocating in batches means that we get closer to optimal space usage,
// as Go's malloc will overallocate for structures of size 56 (indexEntry // as Go's malloc will overallocate for structures of size 60 (indexEntry
// on amd64). // on amd64).
// //
// 256*56 and 256*48 both have minimal malloc overhead among reasonable sizes. // 128*60 and 128*60 both have low malloc overhead among reasonable sizes.
// See src/runtime/sizeclasses.go in the standard library. // See src/runtime/sizeclasses.go in the standard library.
const entryAllocBatch = 256 const entryAllocBatch = 128
if m.free == nil { if m.free == nil {
free := new([entryAllocBatch]indexEntry) free := new([entryAllocBatch]indexEntry)
@ -152,9 +153,10 @@ func (m *indexMap) newEntry() *indexEntry {
} }
type indexEntry struct { type indexEntry struct {
id restic.ID id restic.ID
next *indexEntry next *indexEntry
packIndex int // Position in containing Index's packs field. packIndex int // Position in containing Index's packs field.
offset uint32 offset uint32
length uint32 length uint32
uncompressedLength uint32
} }

View file

@ -22,7 +22,7 @@ func TestIndexMapBasic(t *testing.T) {
r.Read(id[:]) r.Read(id[:])
rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id) rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id)
m.add(id, 0, 0, 0) m.add(id, 0, 0, 0, 0)
rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id) rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id)
rtest.Equals(t, uint(i), m.len()) rtest.Equals(t, uint(i), m.len())
} }
@ -41,7 +41,7 @@ func TestIndexMapForeach(t *testing.T) {
for i := 0; i < N; i++ { for i := 0; i < N; i++ {
var id restic.ID var id restic.ID
id[0] = byte(i) id[0] = byte(i)
m.add(id, i, uint32(i), uint32(i)) m.add(id, i, uint32(i), uint32(i), uint32(i/2))
} }
seen := make(map[int]struct{}) seen := make(map[int]struct{})
@ -51,6 +51,7 @@ func TestIndexMapForeach(t *testing.T) {
rtest.Equals(t, i, e.packIndex) rtest.Equals(t, i, e.packIndex)
rtest.Equals(t, i, int(e.length)) rtest.Equals(t, i, int(e.length))
rtest.Equals(t, i, int(e.offset)) rtest.Equals(t, i, int(e.offset))
rtest.Equals(t, i/2, int(e.uncompressedLength))
seen[i] = struct{}{} seen[i] = struct{}{}
return true return true
@ -85,13 +86,13 @@ func TestIndexMapForeachWithID(t *testing.T) {
// Test insertion and retrieval of duplicates. // Test insertion and retrieval of duplicates.
for i := 0; i < ndups; i++ { for i := 0; i < ndups; i++ {
m.add(id, i, 0, 0) m.add(id, i, 0, 0, 0)
} }
for i := 0; i < 100; i++ { for i := 0; i < 100; i++ {
var otherid restic.ID var otherid restic.ID
r.Read(otherid[:]) r.Read(otherid[:])
m.add(otherid, -1, 0, 0) m.add(otherid, -1, 0, 0, 0)
} }
n = 0 n = 0
@ -109,7 +110,7 @@ func TestIndexMapForeachWithID(t *testing.T) {
func BenchmarkIndexMapHash(b *testing.B) { func BenchmarkIndexMapHash(b *testing.B) {
var m indexMap var m indexMap
m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization. m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization.
ids := make([]restic.ID, 128) // 4 KiB. ids := make([]restic.ID, 128) // 4 KiB.
r := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))

View file

@ -16,6 +16,7 @@ type MasterIndex struct {
idx []*Index idx []*Index
pendingBlobs restic.BlobSet pendingBlobs restic.BlobSet
idxMutex sync.RWMutex idxMutex sync.RWMutex
compress bool
} }
// NewMasterIndex creates a new master index. // NewMasterIndex creates a new master index.
@ -28,6 +29,10 @@ func NewMasterIndex() *MasterIndex {
return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()} return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()}
} }
func (mi *MasterIndex) markCompressed() {
mi.compress = true
}
// Lookup queries all known Indexes for the ID and returns all matches. // Lookup queries all known Indexes for the ID and returns all matches.
func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) { func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) {
mi.idxMutex.RLock() mi.idxMutex.RLock()
@ -206,7 +211,7 @@ func (mi *MasterIndex) FinalizeFullIndexes() []*Index {
continue continue
} }
if IndexFull(idx) { if IndexFull(idx, mi.compress) {
debug.Log("index %p is full", idx) debug.Log("index %p is full", idx)
idx.Finalize() idx.Finalize()
list = append(list, idx) list = append(list, idx)
@ -334,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla
for pbs := range idx.EachByPack(ctx, packBlacklist) { for pbs := range idx.EachByPack(ctx, packBlacklist) {
newIndex.StorePack(pbs.packID, pbs.blobs) newIndex.StorePack(pbs.packID, pbs.blobs)
p.Add(1) p.Add(1)
if IndexFull(newIndex) { if IndexFull(newIndex, mi.compress) {
select { select {
case ch <- newIndex: case ch <- newIndex:
case <-ctx.Done(): case <-ctx.Done():

View file

@ -30,9 +30,10 @@ func TestMasterIndex(t *testing.T) {
blob2 := restic.PackedBlob{ blob2 := restic.PackedBlob{
PackID: restic.NewRandomID(), PackID: restic.NewRandomID(),
Blob: restic.Blob{ Blob: restic.Blob{
BlobHandle: bhInIdx2, BlobHandle: bhInIdx2,
Length: uint(restic.CiphertextLength(100)), Length: uint(restic.CiphertextLength(100)),
Offset: 10, Offset: 10,
UncompressedLength: 200,
}, },
} }
@ -48,9 +49,10 @@ func TestMasterIndex(t *testing.T) {
blob12b := restic.PackedBlob{ blob12b := restic.PackedBlob{
PackID: restic.NewRandomID(), PackID: restic.NewRandomID(),
Blob: restic.Blob{ Blob: restic.Blob{
BlobHandle: bhInIdx12, BlobHandle: bhInIdx12,
Length: uint(restic.CiphertextLength(123)), Length: uint(restic.CiphertextLength(123)),
Offset: 50, Offset: 50,
UncompressedLength: 80,
}, },
} }
@ -86,7 +88,7 @@ func TestMasterIndex(t *testing.T) {
size, found = mIdx.LookupSize(bhInIdx2) size, found = mIdx.LookupSize(bhInIdx2)
rtest.Equals(t, true, found) rtest.Equals(t, true, found)
rtest.Equals(t, uint(100), size) rtest.Equals(t, uint(200), size)
// test idInIdx12 // test idInIdx12
found = mIdx.Has(bhInIdx12) found = mIdx.Has(bhInIdx12)
@ -144,9 +146,10 @@ func TestMasterMergeFinalIndexes(t *testing.T) {
blob2 := restic.PackedBlob{ blob2 := restic.PackedBlob{
PackID: restic.NewRandomID(), PackID: restic.NewRandomID(),
Blob: restic.Blob{ Blob: restic.Blob{
BlobHandle: bhInIdx2, BlobHandle: bhInIdx2,
Length: 100, Length: 100,
Offset: 10, Offset: 10,
UncompressedLength: 200,
}, },
} }
@ -335,8 +338,8 @@ var (
depth = 3 depth = 3
) )
func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Repository, func()) { func createFilledRepo(t testing.TB, snapshots int, dup float32, version uint) (restic.Repository, func()) {
repo, cleanup := repository.TestRepository(t) repo, cleanup := repository.TestRepositoryWithVersion(t, version)
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup) restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup)
@ -346,7 +349,11 @@ func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Reposito
} }
func TestIndexSave(t *testing.T) { func TestIndexSave(t *testing.T) {
repo, cleanup := createFilledRepo(t, 3, 0) repository.TestAllVersions(t, testIndexSave)
}
func testIndexSave(t *testing.T, version uint) {
repo, cleanup := createFilledRepo(t, 3, 0, version)
defer cleanup() defer cleanup()
err := repo.LoadIndex(context.TODO()) err := repo.LoadIndex(context.TODO())

View file

@ -70,7 +70,7 @@ func fillPacks(t testing.TB, rnd *rand.Rand, be Saver, pm *packerManager, buf []
// Only change a few bytes so we know we're not benchmarking the RNG. // Only change a few bytes so we know we're not benchmarking the RNG.
rnd.Read(buf[:min(l, 4)]) rnd.Read(buf[:min(l, 4)])
n, err := packer.Add(restic.DataBlob, id, buf) n, err := packer.Add(restic.DataBlob, id, buf, 0)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -212,7 +212,11 @@ func reloadIndex(t *testing.T, repo restic.Repository) {
} }
func TestRepack(t *testing.T) { func TestRepack(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testRepack)
}
func testRepack(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
seed := time.Now().UnixNano() seed := time.Now().UnixNano()
@ -279,9 +283,13 @@ func TestRepack(t *testing.T) {
} }
func TestRepackCopy(t *testing.T) { func TestRepackCopy(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testRepackCopy)
}
func testRepackCopy(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
dstRepo, dstCleanup := repository.TestRepository(t) dstRepo, dstCleanup := repository.TestRepositoryWithVersion(t, version)
defer dstCleanup() defer dstCleanup()
seed := time.Now().UnixNano() seed := time.Now().UnixNano()
@ -318,7 +326,11 @@ func TestRepackCopy(t *testing.T) {
} }
func TestRepackWrongBlob(t *testing.T) { func TestRepackWrongBlob(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testRepackWrongBlob)
}
func testRepackWrongBlob(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
seed := time.Now().UnixNano() seed := time.Now().UnixNano()

View file

@ -12,6 +12,7 @@ import (
"sync" "sync"
"github.com/cenkalti/backoff/v4" "github.com/cenkalti/backoff/v4"
"github.com/klauspost/compress/zstd"
"github.com/restic/chunker" "github.com/restic/chunker"
"github.com/restic/restic/internal/backend/dryrun" "github.com/restic/restic/internal/backend/dryrun"
"github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/cache"
@ -36,16 +37,71 @@ type Repository struct {
idx *MasterIndex idx *MasterIndex
Cache *cache.Cache Cache *cache.Cache
opts Options
noAutoIndexUpdate bool noAutoIndexUpdate bool
treePM *packerManager treePM *packerManager
dataPM *packerManager dataPM *packerManager
allocEnc sync.Once
allocDec sync.Once
enc *zstd.Encoder
dec *zstd.Decoder
}
type Options struct {
Compression CompressionMode
}
// CompressionMode configures if data should be compressed.
type CompressionMode uint
// Constants for the different compression levels.
const (
CompressionAuto CompressionMode = 0
CompressionOff CompressionMode = 1
CompressionMax CompressionMode = 2
)
// Set implements the method needed for pflag command flag parsing.
func (c *CompressionMode) Set(s string) error {
switch s {
case "auto":
*c = CompressionAuto
case "off":
*c = CompressionOff
case "max":
*c = CompressionMax
default:
return fmt.Errorf("invalid compression mode %q, must be one of (auto|off|max)", s)
}
return nil
}
func (c *CompressionMode) String() string {
switch *c {
case CompressionAuto:
return "auto"
case CompressionOff:
return "off"
case CompressionMax:
return "max"
default:
return "invalid"
}
}
func (c *CompressionMode) Type() string {
return "mode"
} }
// New returns a new repository with backend be. // New returns a new repository with backend be.
func New(be restic.Backend) *Repository { func New(be restic.Backend, opts Options) *Repository {
repo := &Repository{ repo := &Repository{
be: be, be: be,
opts: opts,
idx: NewMasterIndex(), idx: NewMasterIndex(),
dataPM: newPackerManager(be, nil), dataPM: newPackerManager(be, nil),
treePM: newPackerManager(be, nil), treePM: newPackerManager(be, nil),
@ -60,6 +116,14 @@ func (r *Repository) DisableAutoIndexUpdate() {
r.noAutoIndexUpdate = true r.noAutoIndexUpdate = true
} }
// setConfig assigns the given config and updates the repository parameters accordingly
func (r *Repository) setConfig(cfg restic.Config) {
r.cfg = cfg
if r.cfg.Version >= 2 {
r.idx.markCompressed()
}
}
// Config returns the repository configuration. // Config returns the repository configuration.
func (r *Repository) Config() restic.Config { func (r *Repository) Config() restic.Config {
return r.cfg return r.cfg
@ -125,6 +189,9 @@ func (r *Repository) LoadUnpacked(ctx context.Context, buf []byte, t restic.File
if err != nil { if err != nil {
return nil, err return nil, err
} }
if t != restic.ConfigFile {
return r.decompressUnpacked(plaintext)
}
return plaintext, nil return plaintext, nil
} }
@ -218,12 +285,23 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.
continue continue
} }
if blob.IsCompressed() {
plaintext, err = r.getZstdDecoder().DecodeAll(plaintext, make([]byte, 0, blob.DataLength()))
if err != nil {
lastError = errors.Errorf("decompressing blob %v failed: %v", id, err)
continue
}
}
// check hash // check hash
if !restic.Hash(plaintext).Equal(id) { if !restic.Hash(plaintext).Equal(id) {
lastError = errors.Errorf("blob %v returned invalid hash", id) lastError = errors.Errorf("blob %v returned invalid hash", id)
continue continue
} }
if len(plaintext) > cap(buf) {
return plaintext, nil
}
// move decrypted data to the start of the buffer // move decrypted data to the start of the buffer
copy(buf, plaintext) copy(buf, plaintext)
return buf[:len(plaintext)], nil return buf[:len(plaintext)], nil
@ -252,12 +330,70 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo
return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe}) return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe})
} }
func (r *Repository) getZstdEncoder() *zstd.Encoder {
r.allocEnc.Do(func() {
level := zstd.SpeedDefault
if r.opts.Compression == CompressionMax {
level = zstd.SpeedBestCompression
}
opts := []zstd.EOption{
// Set the compression level configured.
zstd.WithEncoderLevel(level),
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd.WithEncoderCRC(false),
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd.WithWindowSize(512 * 1024),
}
enc, err := zstd.NewWriter(nil, opts...)
if err != nil {
panic(err)
}
r.enc = enc
})
return r.enc
}
func (r *Repository) getZstdDecoder() *zstd.Decoder {
r.allocDec.Do(func() {
opts := []zstd.DOption{
// Use all available cores.
zstd.WithDecoderConcurrency(0),
// Limit the maximum decompressed memory. Set to a very high,
// conservative value.
zstd.WithDecoderMaxMemory(16 * 1024 * 1024 * 1024),
}
dec, err := zstd.NewReader(nil, opts...)
if err != nil {
panic(err)
}
r.dec = dec
})
return r.dec
}
// saveAndEncrypt encrypts data and stores it to the backend as type t. If data // saveAndEncrypt encrypts data and stores it to the backend as type t. If data
// is small enough, it will be packed together with other small blobs. // is small enough, it will be packed together with other small blobs.
// The caller must ensure that the id matches the data. // The caller must ensure that the id matches the data.
func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error { func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
debug.Log("save id %v (%v, %d bytes)", id, t, len(data)) debug.Log("save id %v (%v, %d bytes)", id, t, len(data))
uncompressedLength := 0
if r.cfg.Version > 1 {
// we have a repo v2, so compression is available. if the user opts to
// not compress, we won't compress any data, but everything else is
// compressed.
if r.opts.Compression != CompressionOff || t != restic.DataBlob {
uncompressedLength = len(data)
data = r.getZstdEncoder().EncodeAll(data, nil)
}
}
nonce := crypto.NewRandomNonce() nonce := crypto.NewRandomNonce()
ciphertext := make([]byte, 0, restic.CiphertextLength(len(data))) ciphertext := make([]byte, 0, restic.CiphertextLength(len(data)))
@ -284,7 +420,7 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data
} }
// save ciphertext // save ciphertext
_, err = packer.Add(t, id, ciphertext) _, err = packer.Add(t, id, ciphertext, uncompressedLength)
if err != nil { if err != nil {
return err return err
} }
@ -312,9 +448,50 @@ func (r *Repository) SaveJSONUnpacked(ctx context.Context, t restic.FileType, it
return r.SaveUnpacked(ctx, t, plaintext) return r.SaveUnpacked(ctx, t, plaintext)
} }
func (r *Repository) compressUnpacked(p []byte) ([]byte, error) {
// compression is only available starting from version 2
if r.cfg.Version < 2 {
return p, nil
}
// version byte
out := []byte{2}
out = r.getZstdEncoder().EncodeAll(p, out)
return out, nil
}
func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) {
// compression is only available starting from version 2
if r.cfg.Version < 2 {
return p, nil
}
if len(p) == 0 {
// too short for version header
return p, nil
}
if p[0] == '[' || p[0] == '{' {
// probably raw JSON
return p, nil
}
// version
if p[0] != 2 {
return nil, errors.New("not supported encoding format")
}
return r.getZstdDecoder().DecodeAll(p[1:], nil)
}
// SaveUnpacked encrypts data and stores it in the backend. Returned is the // SaveUnpacked encrypts data and stores it in the backend. Returned is the
// storage hash. // storage hash.
func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []byte) (id restic.ID, err error) { func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []byte) (id restic.ID, err error) {
if t != restic.ConfigFile {
p, err = r.compressUnpacked(p)
if err != nil {
return restic.ID{}, err
}
}
ciphertext := restic.NewBlobBuffer(len(p)) ciphertext := restic.NewBlobBuffer(len(p))
ciphertext = ciphertext[:0] ciphertext = ciphertext[:0]
nonce := crypto.NewRandomNonce() nonce := crypto.NewRandomNonce()
@ -478,6 +655,17 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
return err return err
} }
if r.cfg.Version < 2 {
// sanity check
ctx, cancel := context.WithCancel(ctx)
defer cancel()
for blob := range r.idx.Each(ctx) {
if blob.IsCompressed() {
return errors.Fatal("index uses feature not supported by repository version 1")
}
}
}
// remove index files from the cache which have been removed in the repo // remove index files from the cache which have been removed in the repo
return r.PrepareCache(validIndex) return r.PrepareCache(validIndex)
} }
@ -592,18 +780,28 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int
r.dataPM.key = key.master r.dataPM.key = key.master
r.treePM.key = key.master r.treePM.key = key.master
r.keyName = key.Name() r.keyName = key.Name()
r.cfg, err = restic.LoadConfig(ctx, r) cfg, err := restic.LoadConfig(ctx, r)
if err == crypto.ErrUnauthenticated { if err == crypto.ErrUnauthenticated {
return errors.Fatalf("config or key %v is damaged: %v", key.Name(), err) return errors.Fatalf("config or key %v is damaged: %v", key.Name(), err)
} else if err != nil { } else if err != nil {
return errors.Fatalf("config cannot be loaded: %v", err) return errors.Fatalf("config cannot be loaded: %v", err)
} }
r.setConfig(cfg)
return nil return nil
} }
// Init creates a new master key with the supplied password, initializes and // Init creates a new master key with the supplied password, initializes and
// saves the repository config. // saves the repository config.
func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error { func (r *Repository) Init(ctx context.Context, version uint, password string, chunkerPolynomial *chunker.Pol) error {
if version > restic.MaxRepoVersion {
return fmt.Errorf("repo version %v too high", version)
}
if version < restic.MinRepoVersion {
return fmt.Errorf("repo version %v too low", version)
}
has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile}) has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile})
if err != nil { if err != nil {
return err return err
@ -612,7 +810,7 @@ func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomia
return errors.New("repository master key and config already initialized") return errors.New("repository master key and config already initialized")
} }
cfg, err := restic.CreateConfig() cfg, err := restic.CreateConfig(version)
if err != nil { if err != nil {
return err return err
} }
@ -635,7 +833,7 @@ func (r *Repository) init(ctx context.Context, password string, cfg restic.Confi
r.dataPM.key = key.master r.dataPM.key = key.master
r.treePM.key = key.master r.treePM.key = key.master
r.keyName = key.Name() r.keyName = key.Name()
r.cfg = cfg r.setConfig(cfg)
_, err = r.SaveJSONUnpacked(ctx, restic.ConfigFile, cfg) _, err = r.SaveJSONUnpacked(ctx, restic.ConfigFile, cfg)
return err return err
} }
@ -768,9 +966,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs)) debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs))
dec, err := zstd.NewReader(nil)
if err != nil {
panic(dec)
}
defer dec.Close()
ctx, cancel := context.WithCancel(ctx) ctx, cancel := context.WithCancel(ctx)
// stream blobs in pack // stream blobs in pack
err := beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error { err = beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error {
// prevent callbacks after cancelation // prevent callbacks after cancelation
if ctx.Err() != nil { if ctx.Err() != nil {
return ctx.Err() return ctx.Err()
@ -783,6 +987,7 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
bufRd := bufio.NewReaderSize(rd, bufferSize) bufRd := bufio.NewReaderSize(rd, bufferSize)
currentBlobEnd := dataStart currentBlobEnd := dataStart
var buf []byte var buf []byte
var decode []byte
for _, entry := range blobs { for _, entry := range blobs {
skipBytes := int(entry.Offset - currentBlobEnd) skipBytes := int(entry.Offset - currentBlobEnd)
if skipBytes < 0 { if skipBytes < 0 {
@ -822,6 +1027,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
// decryption errors are likely permanent, give the caller a chance to skip them // decryption errors are likely permanent, give the caller a chance to skip them
nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():] nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():]
plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil) plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil)
if err == nil && entry.IsCompressed() {
// DecodeAll will allocate a slice if it is not large enough since it
// knows the decompressed size (because we're using EncodeAll)
decode, err = dec.DecodeAll(plaintext, decode[:0])
plaintext = decode
if err != nil {
err = errors.Errorf("decompressing blob %v failed: %v", h, err)
}
}
if err == nil { if err == nil {
id := restic.Hash(plaintext) id := restic.Hash(plaintext)
if !id.Equal(entry.ID) { if !id.Equal(entry.ID) {

View file

@ -15,6 +15,7 @@ import (
"time" "time"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/klauspost/compress/zstd"
"github.com/restic/restic/internal/archiver" "github.com/restic/restic/internal/archiver"
"github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
@ -28,7 +29,11 @@ var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20}
var rnd = rand.New(rand.NewSource(time.Now().UnixNano())) var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
func TestSave(t *testing.T) { func TestSave(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testSave)
}
func testSave(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
for _, size := range testSizes { for _, size := range testSizes {
@ -63,7 +68,11 @@ func TestSave(t *testing.T) {
} }
func TestSaveFrom(t *testing.T) { func TestSaveFrom(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testSaveFrom)
}
func testSaveFrom(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
for _, size := range testSizes { for _, size := range testSizes {
@ -96,7 +105,11 @@ func TestSaveFrom(t *testing.T) {
} }
func BenchmarkSaveAndEncrypt(t *testing.B) { func BenchmarkSaveAndEncrypt(t *testing.B) {
repo, cleanup := repository.TestRepository(t) repository.BenchmarkAllVersions(t, benchmarkSaveAndEncrypt)
}
func benchmarkSaveAndEncrypt(t *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
size := 4 << 20 // 4MiB size := 4 << 20 // 4MiB
@ -118,7 +131,11 @@ func BenchmarkSaveAndEncrypt(t *testing.B) {
} }
func TestLoadTree(t *testing.T) { func TestLoadTree(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testLoadTree)
}
func testLoadTree(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
if rtest.BenchArchiveDirectory == "" { if rtest.BenchArchiveDirectory == "" {
@ -134,7 +151,11 @@ func TestLoadTree(t *testing.T) {
} }
func BenchmarkLoadTree(t *testing.B) { func BenchmarkLoadTree(t *testing.B) {
repo, cleanup := repository.TestRepository(t) repository.BenchmarkAllVersions(t, benchmarkLoadTree)
}
func benchmarkLoadTree(t *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
if rtest.BenchArchiveDirectory == "" { if rtest.BenchArchiveDirectory == "" {
@ -154,7 +175,11 @@ func BenchmarkLoadTree(t *testing.B) {
} }
func TestLoadBlob(t *testing.T) { func TestLoadBlob(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testLoadBlob)
}
func testLoadBlob(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
length := 1000000 length := 1000000
@ -183,7 +208,11 @@ func TestLoadBlob(t *testing.T) {
} }
func BenchmarkLoadBlob(b *testing.B) { func BenchmarkLoadBlob(b *testing.B) {
repo, cleanup := repository.TestRepository(b) repository.BenchmarkAllVersions(b, benchmarkLoadBlob)
}
func benchmarkLoadBlob(b *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(b, version)
defer cleanup() defer cleanup()
length := 1000000 length := 1000000
@ -219,7 +248,11 @@ func BenchmarkLoadBlob(b *testing.B) {
} }
func BenchmarkLoadUnpacked(b *testing.B) { func BenchmarkLoadUnpacked(b *testing.B) {
repo, cleanup := repository.TestRepository(b) repository.BenchmarkAllVersions(b, benchmarkLoadUnpacked)
}
func benchmarkLoadUnpacked(b *testing.B, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(b, version)
defer cleanup() defer cleanup()
length := 1000000 length := 1000000
@ -255,7 +288,11 @@ func BenchmarkLoadUnpacked(b *testing.B) {
} }
func TestLoadJSONUnpacked(t *testing.T) { func TestLoadJSONUnpacked(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testLoadJSONUnpacked)
}
func testLoadJSONUnpacked(t *testing.T, version uint) {
repo, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
if rtest.BenchArchiveDirectory == "" { if rtest.BenchArchiveDirectory == "" {
@ -313,9 +350,13 @@ func loadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*repo
} }
func BenchmarkLoadIndex(b *testing.B) { func BenchmarkLoadIndex(b *testing.B) {
repository.BenchmarkAllVersions(b, benchmarkLoadIndex)
}
func benchmarkLoadIndex(b *testing.B, version uint) {
repository.TestUseLowSecurityKDFParameters(b) repository.TestUseLowSecurityKDFParameters(b)
repo, cleanup := repository.TestRepository(b) repo, cleanup := repository.TestRepositoryWithVersion(b, version)
defer cleanup() defer cleanup()
idx := repository.NewIndex() idx := repository.NewIndex()
@ -362,12 +403,16 @@ func saveRandomDataBlobs(t testing.TB, repo restic.Repository, num int, sizeMax
} }
func TestRepositoryIncrementalIndex(t *testing.T) { func TestRepositoryIncrementalIndex(t *testing.T) {
r, cleanup := repository.TestRepository(t) repository.TestAllVersions(t, testRepositoryIncrementalIndex)
}
func testRepositoryIncrementalIndex(t *testing.T, version uint) {
r, cleanup := repository.TestRepositoryWithVersion(t, version)
defer cleanup() defer cleanup()
repo := r.(*repository.Repository) repo := r.(*repository.Repository)
repository.IndexFull = func(*repository.Index) bool { return true } repository.IndexFull = func(*repository.Index, bool) bool { return true }
// add 15 packs // add 15 packs
for j := 0; j < 5; j++ { for j := 0; j < 5; j++ {
@ -417,10 +462,31 @@ func TestRepositoryIncrementalIndex(t *testing.T) {
} }
// buildPackfileWithoutHeader returns a manually built pack file without a header. // buildPackfileWithoutHeader returns a manually built pack file without a header.
func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) (blobs []restic.Blob, packfile []byte) { func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key, compress bool) (blobs []restic.Blob, packfile []byte) {
opts := []zstd.EOption{
// Set the compression level configured.
zstd.WithEncoderLevel(zstd.SpeedDefault),
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd.WithEncoderCRC(false),
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd.WithWindowSize(512 * 1024),
}
enc, err := zstd.NewWriter(nil, opts...)
if err != nil {
panic(err)
}
var offset uint var offset uint
for i, size := range blobSizes { for i, size := range blobSizes {
plaintext := test.Random(800+i, size) plaintext := test.Random(800+i, size)
id := restic.Hash(plaintext)
uncompressedLength := uint(0)
if compress {
uncompressedLength = uint(len(plaintext))
plaintext = enc.EncodeAll(plaintext, nil)
}
// we use a deterministic nonce here so the whole process is // we use a deterministic nonce here so the whole process is
// deterministic, last byte is the blob index // deterministic, last byte is the blob index
@ -438,11 +504,12 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key)
blobs = append(blobs, restic.Blob{ blobs = append(blobs, restic.Blob{
BlobHandle: restic.BlobHandle{ BlobHandle: restic.BlobHandle{
ID: restic.Hash(plaintext),
Type: restic.DataBlob, Type: restic.DataBlob,
ID: id,
}, },
Length: uint(ciphertextLength), Length: uint(ciphertextLength),
Offset: offset, UncompressedLength: uncompressedLength,
Offset: offset,
}) })
offset = uint(len(packfile)) offset = uint(len(packfile))
@ -452,6 +519,10 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key)
} }
func TestStreamPack(t *testing.T) { func TestStreamPack(t *testing.T) {
repository.TestAllVersions(t, testStreamPack)
}
func testStreamPack(t *testing.T, version uint) {
// always use the same key for deterministic output // always use the same key for deterministic output
const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}` const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}`
@ -476,7 +547,17 @@ func TestStreamPack(t *testing.T) {
18883, 18883,
} }
packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key) var compress bool
switch version {
case 1:
compress = false
case 2:
compress = true
default:
t.Fatal("test does not suport repository version", version)
}
packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key, compress)
load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error { load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
data := packfile data := packfile

View file

@ -2,6 +2,7 @@ package repository
import ( import (
"context" "context"
"fmt"
"os" "os"
"testing" "testing"
@ -41,7 +42,7 @@ const TestChunkerPol = chunker.Pol(0x3DA3358B4DC173)
// TestRepositoryWithBackend returns a repository initialized with a test // TestRepositoryWithBackend returns a repository initialized with a test
// password. If be is nil, an in-memory backend is used. A constant polynomial // password. If be is nil, an in-memory backend is used. A constant polynomial
// is used for the chunker and low-security test parameters. // is used for the chunker and low-security test parameters.
func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Repository, cleanup func()) { func TestRepositoryWithBackend(t testing.TB, be restic.Backend, version uint) (r restic.Repository, cleanup func()) {
t.Helper() t.Helper()
TestUseLowSecurityKDFParameters(t) TestUseLowSecurityKDFParameters(t)
restic.TestDisableCheckPolynomial(t) restic.TestDisableCheckPolynomial(t)
@ -51,9 +52,9 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi
be, beCleanup = TestBackend(t) be, beCleanup = TestBackend(t)
} }
repo := New(be) repo := New(be, Options{})
cfg := restic.TestCreateConfig(t, TestChunkerPol) cfg := restic.TestCreateConfig(t, TestChunkerPol, version)
err := repo.init(context.TODO(), test.TestPassword, cfg) err := repo.init(context.TODO(), test.TestPassword, cfg)
if err != nil { if err != nil {
t.Fatalf("TestRepository(): initialize repo failed: %v", err) t.Fatalf("TestRepository(): initialize repo failed: %v", err)
@ -71,6 +72,11 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi
// a non-existing directory, a local backend is created there and this is used // a non-existing directory, a local backend is created there and this is used
// instead. The directory is not removed, but left there for inspection. // instead. The directory is not removed, but left there for inspection.
func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) { func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
t.Helper()
return TestRepositoryWithVersion(t, 0)
}
func TestRepositoryWithVersion(t testing.TB, version uint) (r restic.Repository, cleanup func()) {
t.Helper() t.Helper()
dir := os.Getenv("RESTIC_TEST_REPO") dir := os.Getenv("RESTIC_TEST_REPO")
if dir != "" { if dir != "" {
@ -80,7 +86,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
if err != nil { if err != nil {
t.Fatalf("error creating local backend at %v: %v", dir, err) t.Fatalf("error creating local backend at %v: %v", dir, err)
} }
return TestRepositoryWithBackend(t, be) return TestRepositoryWithBackend(t, be, version)
} }
if err == nil { if err == nil {
@ -88,7 +94,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
} }
} }
return TestRepositoryWithBackend(t, nil) return TestRepositoryWithBackend(t, nil, version)
} }
// TestOpenLocal opens a local repository. // TestOpenLocal opens a local repository.
@ -98,7 +104,7 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) {
t.Fatal(err) t.Fatal(err)
} }
repo := New(be) repo := New(be, Options{})
err = repo.SearchKey(context.TODO(), test.TestPassword, 10, "") err = repo.SearchKey(context.TODO(), test.TestPassword, 10, "")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -106,3 +112,23 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) {
return repo return repo
} }
type VersionedTest func(t *testing.T, version uint)
func TestAllVersions(t *testing.T, test VersionedTest) {
for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ {
t.Run(fmt.Sprintf("v%d", version), func(t *testing.T) {
test(t, uint(version))
})
}
}
type VersionedBenchmark func(b *testing.B, version uint)
func BenchmarkAllVersions(b *testing.B, bench VersionedBenchmark) {
for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ {
b.Run(fmt.Sprintf("v%d", version), func(b *testing.B) {
bench(b, uint(version))
})
}
}

View file

@ -9,13 +9,25 @@ import (
// Blob is one part of a file or a tree. // Blob is one part of a file or a tree.
type Blob struct { type Blob struct {
BlobHandle BlobHandle
Length uint Length uint
Offset uint Offset uint
UncompressedLength uint
} }
func (b Blob) String() string { func (b Blob) String() string {
return fmt.Sprintf("<Blob (%v) %v, offset %v, length %v>", return fmt.Sprintf("<Blob (%v) %v, offset %v, length %v, uncompressed length %v>",
b.Type, b.ID.Str(), b.Offset, b.Length) b.Type, b.ID.Str(), b.Offset, b.Length, b.UncompressedLength)
}
func (b Blob) DataLength() uint {
if b.UncompressedLength != 0 {
return b.UncompressedLength
}
return uint(PlaintextLength(int(b.Length)))
}
func (b Blob) IsCompressed() bool {
return b.UncompressedLength != 0
} }
// PackedBlob is a blob stored within a file. // PackedBlob is a blob stored within a file.

View file

@ -18,9 +18,12 @@ type Config struct {
ChunkerPolynomial chunker.Pol `json:"chunker_polynomial"` ChunkerPolynomial chunker.Pol `json:"chunker_polynomial"`
} }
// RepoVersion is the version that is written to the config when a repository const MinRepoVersion = 1
const MaxRepoVersion = 2
// StableRepoVersion is the version that is written to the config when a repository
// is newly created with Init(). // is newly created with Init().
const RepoVersion = 1 const StableRepoVersion = 1
// JSONUnpackedLoader loads unpacked JSON. // JSONUnpackedLoader loads unpacked JSON.
type JSONUnpackedLoader interface { type JSONUnpackedLoader interface {
@ -29,7 +32,7 @@ type JSONUnpackedLoader interface {
// CreateConfig creates a config file with a randomly selected polynomial and // CreateConfig creates a config file with a randomly selected polynomial and
// ID. // ID.
func CreateConfig() (Config, error) { func CreateConfig(version uint) (Config, error) {
var ( var (
err error err error
cfg Config cfg Config
@ -41,18 +44,24 @@ func CreateConfig() (Config, error) {
} }
cfg.ID = NewRandomID().String() cfg.ID = NewRandomID().String()
cfg.Version = RepoVersion cfg.Version = version
debug.Log("New config: %#v", cfg) debug.Log("New config: %#v", cfg)
return cfg, nil return cfg, nil
} }
// TestCreateConfig creates a config for use within tests. // TestCreateConfig creates a config for use within tests.
func TestCreateConfig(t testing.TB, pol chunker.Pol) (cfg Config) { func TestCreateConfig(t testing.TB, pol chunker.Pol, version uint) (cfg Config) {
cfg.ChunkerPolynomial = pol cfg.ChunkerPolynomial = pol
cfg.ID = NewRandomID().String() cfg.ID = NewRandomID().String()
cfg.Version = RepoVersion if version == 0 {
version = StableRepoVersion
}
if version < MinRepoVersion || version > MaxRepoVersion {
t.Fatalf("version %d is out of range", version)
}
cfg.Version = version
return cfg return cfg
} }
@ -77,7 +86,7 @@ func LoadConfig(ctx context.Context, r JSONUnpackedLoader) (Config, error) {
return Config{}, err return Config{}, err
} }
if cfg.Version != RepoVersion { if cfg.Version < MinRepoVersion || cfg.Version > MaxRepoVersion {
return Config{}, errors.Errorf("unsupported repository version %v", cfg.Version) return Config{}, errors.Errorf("unsupported repository version %v", cfg.Version)
} }

View file

@ -32,7 +32,7 @@ func TestConfig(t *testing.T) {
return restic.ID{}, nil return restic.ID{}, nil
} }
cfg1, err := restic.CreateConfig() cfg1, err := restic.CreateConfig(restic.MaxRepoVersion)
rtest.OK(t, err) rtest.OK(t, err)
_, err = saver(save).SaveJSONUnpacked(restic.ConfigFile, cfg1) _, err = saver(save).SaveJSONUnpacked(restic.ConfigFile, cfg1)

View file

@ -117,7 +117,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) { err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) {
if largeFile { if largeFile {
packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset}) packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset})
fileOffset += int64(restic.PlaintextLength(int(blob.Length))) fileOffset += int64(blob.DataLength())
} }
pack, ok := packs[packID] pack, ok := packs[packID]
if !ok { if !ok {
@ -195,7 +195,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
if packID.Equal(pack.id) { if packID.Equal(pack.id) {
addBlob(blob, fileOffset) addBlob(blob, fileOffset)
} }
fileOffset += int64(restic.PlaintextLength(int(blob.Length))) fileOffset += int64(blob.DataLength())
}) })
if err != nil { if err != nil {
// restoreFiles should have caught this error before // restoreFiles should have caught this error before