From 270ed00d1feded81ebedf39f5ac05fa0d6e747ae Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 15 Feb 2022 20:53:20 +0100 Subject: [PATCH 01/23] doc: Add repository compression support documentation Co-authored-by: Michael Eischer --- doc/design.rst | 121 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 33 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index aad70e1f7..a219cf628 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -62,18 +62,21 @@ like the following: .. code:: json { - "version": 1, + "version": 2, "id": "5956a3f67a6230d4a92cefb29529f10196c7d92582ec305fd71ff6d331d6271b", "chunker_polynomial": "25b468838dcb75" } After decryption, restic first checks that the version field contains a -version number that it understands, otherwise it aborts. At the moment, -the version is expected to be 1. The field ``id`` holds a unique ID -which consists of 32 random bytes, encoded in hexadecimal. This uniquely -identifies the repository, regardless if it is accessed via SFTP or -locally. The field ``chunker_polynomial`` contains a parameter that is -used for splitting large files into smaller chunks (see below). +version number that it understands, otherwise it aborts. At the moment, the +version is expected to be 1 or 2. The list of changes in the repository +format is contained in the section "Changes" below. + +The field ``id`` holds a unique ID which consists of 32 random bytes, encoded +in hexadecimal. This uniquely identifies the repository, regardless if it is +accessed via SFTP or locally. The field ``chunker_polynomial`` contains a +parameter that is used for splitting large files into smaller chunks (see +below). Repository Layout ----------------- @@ -186,40 +189,75 @@ After decryption, a Pack's header consists of the following elements: :: - Type_Blob1 || Length(EncryptedBlob1) || Hash(Plaintext_Blob1) || + Type_Blob1 || Data_Blob1 || [...] - Type_BlobN || Length(EncryptedBlobN) || Hash(Plaintext_Blobn) || + Type_BlobN || Data_BlobN || + +The Blob type field is a single byte. What follows it depends on the type. The +following Blob types are defined: + ++-----------+----------------------+-------------------------------------------------------------------------------+ +| Type | Meaning | Data | ++===========+======================+===============================================================================+ +| 0b00 | data blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ +| 0b01 | tree blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ +| 0b10 | compressed data blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ +| 0b11 | compressed tree blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ This is enough to calculate the offsets for all the Blobs in the Pack. -Length is the length of a Blob as a four byte integer in little-endian -format. The type field is a one byte field and labels the content of a -blob according to the following table: +The length fields are encoded as four byte integers in little-endian +format. In the Data column, ``Length(plaintext_blob)`` means the length +of the decrypted and uncompressed data a blob consists of. -+--------+-----------+ -| Type | Meaning | -+========+===========+ -| 0 | data | -+--------+-----------+ -| 1 | tree | -+--------+-----------+ +All other types are invalid, more types may be added in the future. The +compressed types are only valid for repository format version 2. Data and +tree blobs may be compressed with the zstandard compression algorithm. -All other types are invalid, more types may be added in the future. +In repository format version 1, data and tree blobs should be stored in +separate pack files. In version 2, they must be stored in separate files. +Compressed and non-compress blobs of the same type may be mixed in a pack +file. For reconstructing the index or parsing a pack without an index, first the last four bytes must be read in order to find the length of the header. Afterwards, the header can be read and parsed, which yields all plaintext hashes, types, offsets and lengths of all included blobs. +Unpacked Data Format +==================== + +Individual files for the index, locks or snapshots are encrypted +and authenticated like Data and Tree Blobs, so the outer structure is +``IV || Ciphertext || MAC`` again. In repository format version 1 the +plaintext always consists of a JSON document which must either be an +object or an array. + +Repository format version 2 adds support for compression. The plaintext +now starts with a header to indicate the encoding version to distinguish +it from plain JSON and to allow for further evolution of the storage format: +``encoding_version || data`` +The ``encoding_version`` field is encoded as one byte. +For backwards compatibility the encoding versions '[' (0x5b) and '{' (0x7b) +are used to mark that the whole plaintext (including the encoding version +byte) should treated as JSON document. + +For new data the encoding version is currently always ``2``. For that +version ``data`` contains a JSON document compressed using the zstandard +compression algorithm. + Indexing ======== Index files contain information about Data and Tree Blobs and the Packs they are contained in and store this information in the repository. When the local cached index is not accessible any more, the index files can -be downloaded and used to reconstruct the index. The files are encrypted -and authenticated like Data and Tree Blobs, so the outer structure is -``IV || Ciphertext || MAC`` again. The plaintext consists of a JSON -document like the following: +be downloaded and used to reconstruct the index. The file encoding is +described in the "Unpacked Data Format" section. The plaintext consists +of a JSON document like the following: .. code:: json @@ -235,18 +273,22 @@ document like the following: "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "type": "data", "offset": 0, - "length": 25 - },{ + "length": 38, + // no 'uncompressed_length' as blob is not compressed + }, + { "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "type": "tree", "offset": 38, - "length": 100 + "length": 112, + "uncompressed_length": 511, }, { "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", "type": "data", "offset": 150, - "length": 123 + "length": 123, + "uncompressed_length": 234, } ] }, [...] @@ -255,7 +297,11 @@ document like the following: This JSON document lists Packs and the blobs contained therein. In this example, the Pack ``73d04e61`` contains two data Blobs and one Tree -blob, the plaintext hashes are listed afterwards. +blob, the plaintext hashes are listed afterwards. The ``length`` field +corresponds to ``Length(encrypted_blob)`` in the pack file header. +Field ``uncompressed_length`` is only present for compressed blobs and +therefore is never present in version 1. It is set to the value of +``Length(blob)``. The field ``supersedes`` lists the storage IDs of index files that have been replaced with the current index file. This happens when index files @@ -350,8 +396,9 @@ Snapshots A snapshot represents a directory with all files and sub-directories at a given point in time. For each backup that is made, a new snapshot is -created. A snapshot is a JSON document that is stored in an encrypted -file below the directory ``snapshots`` in the repository. The filename +created. A snapshot is a JSON document that is stored in a file below +the directory ``snapshots`` in the repository. It uses the file encoding +described in the "Unpacked Data Format" section. The filename is the storage ID. This string is unique and used within restic to uniquely identify a snapshot. @@ -517,8 +564,8 @@ time there must not be any other locks (exclusive and non-exclusive). There may be multiple non-exclusive locks in parallel. A lock is a file in the subdir ``locks`` whose filename is the storage -ID of the contents. It is encrypted and authenticated the same way as -other files in the repository and contains the following JSON structure: +ID of the contents. It is stored in the file encoding described in the +"Unpacked Data Format" section and contains the following JSON structure: .. code:: json @@ -721,3 +768,11 @@ An adversary who has a leaked (decrypted) key for a repository could: only be done using the ``copy`` command, which moves the data into a new repository with a new master key, or by making a completely new repository and new backup. + +Changes +======= + +Repository Version 2 +-------------------- + + * Support compression for blobs (data/tree) and index / lock / snapshot files From 0957b74887c6c4f3eb5fd411aacfea1d9f4dce09 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 15 Feb 2022 20:55:46 +0100 Subject: [PATCH 02/23] Misc design.rst cleanups --- doc/design.rst | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/doc/design.rst b/doc/design.rst index a219cf628..17ab4c1b5 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -74,19 +74,18 @@ format is contained in the section "Changes" below. The field ``id`` holds a unique ID which consists of 32 random bytes, encoded in hexadecimal. This uniquely identifies the repository, regardless if it is -accessed via SFTP or locally. The field ``chunker_polynomial`` contains a -parameter that is used for splitting large files into smaller chunks (see -below). +accessed via a remote storage backend or locally. The field +``chunker_polynomial`` contains a parameter that is used for splitting large +files into smaller chunks (see below). Repository Layout ----------------- The ``local`` and ``sftp`` backends are implemented using files and directories stored in a file system. The directory layout is the same -for both backend types. +for both backend types and is also used for all other remote backends. -The basic layout of a repository stored in a ``local`` or ``sftp`` -backend is shown here: +The basic layout of a repository is shown here: :: @@ -112,8 +111,7 @@ backend is shown here: │ └── 22a5af1bdc6e616f8a29579458c49627e01b32210d09adb288d1ecda7c5711ec └── tmp -A local repository can be initialized with the ``restic init`` command, -e.g.: +A local repository can be initialized with the ``restic init`` command, e.g.: .. code-block:: console @@ -459,7 +457,7 @@ Blobs of data. The SHA-256 hashes of all Blobs are saved in an ordered list which then represents the content of the file. In order to relate these plaintext hashes to the actual location within -a Pack file , an index is used. If the index is not available, the +a Pack file, an index is used. If the index is not available, the header of all data Blobs can be read. Trees and Data From 4b957e7373db43f79ccd612d12eba76af8a41e12 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 13 Feb 2022 00:12:40 +0100 Subject: [PATCH 03/23] repository: Implement index/snapshot/lock compression The config file is not compressed as it should remain readable by older restic versions such that these can return a proper error. As the old format for unpacked data does not include a version header, make use of a trick: The old data is always encoded as JSON. Thus it can only start with '{' or '['. For any other value the first byte indicates a versioned format. The version is set to 2 for now. Then the zstd compressed data follows. --- go.mod | 2 +- internal/repository/repository.go | 58 +++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b5d72874f..a20ae44a1 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/hashicorp/golang-lru v0.5.4 github.com/json-iterator/go v1.1.12 // indirect github.com/juju/ratelimit v1.0.1 - github.com/klauspost/compress v1.15.1 // indirect + github.com/klauspost/compress v1.15.1 github.com/klauspost/cpuid/v2 v2.0.12 // indirect github.com/kurin/blazer v0.5.4-0.20211030221322-ba894c124ac6 github.com/minio/md5-simd v1.1.2 // indirect diff --git a/internal/repository/repository.go b/internal/repository/repository.go index d74868895..eb2d0a109 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -12,6 +12,7 @@ import ( "sync" "github.com/cenkalti/backoff/v4" + "github.com/klauspost/compress/zstd" "github.com/restic/chunker" "github.com/restic/restic/internal/backend/dryrun" "github.com/restic/restic/internal/cache" @@ -40,6 +41,9 @@ type Repository struct { treePM *packerManager dataPM *packerManager + + enc *zstd.Encoder + dec *zstd.Decoder } // New returns a new repository with backend be. @@ -51,6 +55,16 @@ func New(be restic.Backend) *Repository { treePM: newPackerManager(be, nil), } + enc, err := zstd.NewWriter(nil) + if err != nil { + panic(err) + } + repo.enc = enc + dec, err := zstd.NewReader(nil) + if err != nil { + panic(err) + } + repo.dec = dec return repo } @@ -125,6 +139,9 @@ func (r *Repository) LoadUnpacked(ctx context.Context, buf []byte, t restic.File if err != nil { return nil, err } + if t != restic.ConfigFile { + return r.decompressUnpacked(plaintext) + } return plaintext, nil } @@ -312,9 +329,50 @@ func (r *Repository) SaveJSONUnpacked(ctx context.Context, t restic.FileType, it return r.SaveUnpacked(ctx, t, plaintext) } +func (r *Repository) compressUnpacked(p []byte) ([]byte, error) { + // compression is only available starting from version 2 + if r.cfg.Version < 2 { + return p, nil + } + + // version byte + out := []byte{2} + out = r.enc.EncodeAll(p, out) + return out, nil +} + +func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) { + // compression is only available starting from version 2 + if r.cfg.Version < 2 { + return p, nil + } + + if len(p) < 1 { + // too short for version header + return p, nil + } + if p[0] == '[' || p[0] == '{' { + // probably raw JSON + return p, nil + } + // version + if p[0] != 2 { + return nil, errors.New("not supported encoding format") + } + + return r.dec.DecodeAll(p[1:], nil) +} + // SaveUnpacked encrypts data and stores it in the backend. Returned is the // storage hash. func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []byte) (id restic.ID, err error) { + if t != restic.ConfigFile { + p, err = r.compressUnpacked(p) + if err != nil { + return restic.ID{}, err + } + } + ciphertext := restic.NewBlobBuffer(len(p)) ciphertext = ciphertext[:0] nonce := crypto.NewRandomNonce() From 362ab06023e179af2b91244d9da11324841a9fd0 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 13 Feb 2022 00:52:03 +0100 Subject: [PATCH 04/23] init: Add flag to specify created repository version --- cmd/restic/cmd_init.go | 23 ++++++++++++++++++++++- internal/repository/repository.go | 12 ++++++++++-- internal/restic/config.go | 15 +++++++++------ internal/restic/config_test.go | 2 +- 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/cmd/restic/cmd_init.go b/cmd/restic/cmd_init.go index bbab3711d..f5e43bbc9 100644 --- a/cmd/restic/cmd_init.go +++ b/cmd/restic/cmd_init.go @@ -1,10 +1,13 @@ package main import ( + "strconv" + "github.com/restic/chunker" "github.com/restic/restic/internal/backend/location" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/restic" "github.com/spf13/cobra" ) @@ -30,6 +33,7 @@ Exit status is 0 if the command was successful, and non-zero if there was any er type InitOptions struct { secondaryRepoOptions CopyChunkerParameters bool + RepositoryVersion string } var initOptions InitOptions @@ -40,9 +44,26 @@ func init() { f := cmdInit.Flags() initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from") f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)") + f.StringVar(&initOptions.RepositoryVersion, "repository-version", "stable", "repository format version to use, allowed values are a format version, 'latest' and 'stable'") } func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { + var version uint + if opts.RepositoryVersion == "latest" || opts.RepositoryVersion == "" { + version = restic.MaxRepoVersion + } else if opts.RepositoryVersion == "stable" { + version = restic.StableRepoVersion + } else { + v, err := strconv.ParseUint(opts.RepositoryVersion, 10, 32) + if err != nil { + return errors.Fatal("invalid repository version") + } + version = uint(v) + } + if version < restic.MinRepoVersion || version > restic.MaxRepoVersion { + return errors.Fatalf("only repository versions between %v and %v are allowed", restic.MinRepoVersion, restic.MaxRepoVersion) + } + chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts) if err != nil { return err @@ -67,7 +88,7 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { s := repository.New(be) - err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial) + err = s.Init(gopts.ctx, version, gopts.password, chunkerPolynomial) if err != nil { return errors.Fatalf("create key in repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err) } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index eb2d0a109..b00dfa39a 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -661,7 +661,15 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int // Init creates a new master key with the supplied password, initializes and // saves the repository config. -func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error { +func (r *Repository) Init(ctx context.Context, version uint, password string, chunkerPolynomial *chunker.Pol) error { + if version > restic.MaxRepoVersion { + return fmt.Errorf("repo version %v too high", version) + } + + if version < restic.MinRepoVersion { + return fmt.Errorf("repo version %v too low", version) + } + has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile}) if err != nil { return err @@ -670,7 +678,7 @@ func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomia return errors.New("repository master key and config already initialized") } - cfg, err := restic.CreateConfig() + cfg, err := restic.CreateConfig(version) if err != nil { return err } diff --git a/internal/restic/config.go b/internal/restic/config.go index ded98ac1b..3a6bab746 100644 --- a/internal/restic/config.go +++ b/internal/restic/config.go @@ -18,9 +18,12 @@ type Config struct { ChunkerPolynomial chunker.Pol `json:"chunker_polynomial"` } -// RepoVersion is the version that is written to the config when a repository +const MinRepoVersion = 1 +const MaxRepoVersion = 2 + +// StableRepoVersion is the version that is written to the config when a repository // is newly created with Init(). -const RepoVersion = 1 +const StableRepoVersion = 1 // JSONUnpackedLoader loads unpacked JSON. type JSONUnpackedLoader interface { @@ -29,7 +32,7 @@ type JSONUnpackedLoader interface { // CreateConfig creates a config file with a randomly selected polynomial and // ID. -func CreateConfig() (Config, error) { +func CreateConfig(version uint) (Config, error) { var ( err error cfg Config @@ -41,7 +44,7 @@ func CreateConfig() (Config, error) { } cfg.ID = NewRandomID().String() - cfg.Version = RepoVersion + cfg.Version = version debug.Log("New config: %#v", cfg) return cfg, nil @@ -52,7 +55,7 @@ func TestCreateConfig(t testing.TB, pol chunker.Pol) (cfg Config) { cfg.ChunkerPolynomial = pol cfg.ID = NewRandomID().String() - cfg.Version = RepoVersion + cfg.Version = StableRepoVersion return cfg } @@ -77,7 +80,7 @@ func LoadConfig(ctx context.Context, r JSONUnpackedLoader) (Config, error) { return Config{}, err } - if cfg.Version != RepoVersion { + if cfg.Version < MinRepoVersion || cfg.Version > MaxRepoVersion { return Config{}, errors.Errorf("unsupported repository version %v", cfg.Version) } diff --git a/internal/restic/config_test.go b/internal/restic/config_test.go index 506381965..fd8e4aeed 100644 --- a/internal/restic/config_test.go +++ b/internal/restic/config_test.go @@ -32,7 +32,7 @@ func TestConfig(t *testing.T) { return restic.ID{}, nil } - cfg1, err := restic.CreateConfig() + cfg1, err := restic.CreateConfig(restic.MaxRepoVersion) rtest.OK(t, err) _, err = saver(save).SaveJSONUnpacked(restic.ConfigFile, cfg1) From 6fb408d90e6e808642212c7b3b60e11aab2728d5 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 13 Feb 2022 17:24:09 +0100 Subject: [PATCH 05/23] repository: implement pack compression --- internal/pack/pack.go | 114 +++++++++++++-------- internal/pack/pack_internal_test.go | 14 +-- internal/pack/pack_test.go | 2 +- internal/repository/index.go | 38 ++++--- internal/repository/indexmap.go | 14 +-- internal/repository/indexmap_test.go | 11 +- internal/repository/packer_manager_test.go | 2 +- internal/repository/repository.go | 49 ++++++++- internal/restic/blob.go | 20 +++- internal/restorer/filerestorer.go | 4 +- 10 files changed, 184 insertions(+), 84 deletions(-) diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 9fa209054..697a14a5d 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -32,7 +32,7 @@ func NewPacker(k *crypto.Key, wr io.Writer) *Packer { // Add saves the data read from rd as a new blob to the packer. Returned is the // number of bytes written to the pack. -func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) { +func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte, uncompressedLength int) (int, error) { p.m.Lock() defer p.m.Unlock() @@ -41,20 +41,23 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) n, err := p.wr.Write(data) c.Length = uint(n) c.Offset = p.bytes + c.UncompressedLength = uint(uncompressedLength) p.bytes += uint(n) p.blobs = append(p.blobs, c) return n, errors.Wrap(err, "Write") } -var entrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) +var entrySize = uint(binary.Size(restic.BlobType(0)) + 2*headerLengthSize + len(restic.ID{})) +var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) // headerEntry describes the format of header entries. It serves only as // documentation. type headerEntry struct { - Type uint8 - Length uint32 - ID restic.ID + Type uint8 + Length uint32 + ID restic.ID + CompressedLength uint32 } // Finalize writes the header for all added blobs and finalizes the pack. @@ -70,7 +73,7 @@ func (p *Packer) Finalize() (uint, error) { return 0, err } - encryptedHeader := make([]byte, 0, len(header)+p.k.Overhead()+p.k.NonceSize()) + encryptedHeader := make([]byte, 0, restic.CiphertextLength(len(header))) nonce := crypto.NewRandomNonce() encryptedHeader = append(encryptedHeader, nonce...) encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil) @@ -81,7 +84,7 @@ func (p *Packer) Finalize() (uint, error) { return 0, errors.Wrap(err, "Write") } - hdrBytes := restic.CiphertextLength(len(header)) + hdrBytes := len(encryptedHeader) if n != hdrBytes { return 0, errors.New("wrong number of bytes written") } @@ -104,11 +107,15 @@ func (p *Packer) makeHeader() ([]byte, error) { buf := make([]byte, 0, len(p.blobs)*int(entrySize)) for _, b := range p.blobs { - switch b.Type { - case restic.DataBlob: + switch { + case b.Type == restic.DataBlob && b.UncompressedLength == 0: buf = append(buf, 0) - case restic.TreeBlob: + case b.Type == restic.TreeBlob && b.UncompressedLength == 0: buf = append(buf, 1) + case b.Type == restic.DataBlob && b.UncompressedLength != 0: + buf = append(buf, 2) + case b.Type == restic.TreeBlob && b.UncompressedLength != 0: + buf = append(buf, 3) default: return nil, errors.Errorf("invalid blob type %v", b.Type) } @@ -116,6 +123,10 @@ func (p *Packer) makeHeader() ([]byte, error) { var lenLE [4]byte binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length)) buf = append(buf, lenLE[:]...) + if b.UncompressedLength != 0 { + binary.LittleEndian.PutUint32(lenLE[:], uint32(b.UncompressedLength)) + buf = append(buf, lenLE[:]...) + } buf = append(buf, b.ID[:]...) } @@ -152,7 +163,7 @@ func (p *Packer) String() string { var ( // we require at least one entry in the header, and one blob for a pack file - minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) + minFileSize = plainEntrySize + crypto.Extension + uint(headerLengthSize) ) const ( @@ -167,16 +178,11 @@ const ( eagerEntries = 15 ) -// readRecords reads up to max records from the underlying ReaderAt, returning -// the raw header, the total number of records in the header, and any error. -// If the header contains fewer than max entries, the header is truncated to +// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning +// the raw header, the total number of bytes in the header, and any error. +// If the header contains fewer than bufsize bytes, the header is truncated to // the appropriate size. -func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { - var bufsize int - bufsize += max * int(entrySize) - bufsize += crypto.Extension - bufsize += headerLengthSize - +func readRecords(rd io.ReaderAt, size int64, bufsize int) ([]byte, int, error) { if bufsize > int(size) { bufsize = int(size) } @@ -197,8 +203,6 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { err = InvalidFileError{Message: "header length is zero"} case hlen < crypto.Extension: err = InvalidFileError{Message: "header length is too small"} - case (hlen-crypto.Extension)%uint32(entrySize) != 0: - err = InvalidFileError{Message: "header length is invalid"} case int64(hlen) > size-int64(headerLengthSize): err = InvalidFileError{Message: "header is larger than file"} case int64(hlen) > MaxHeaderSize-int64(headerLengthSize): @@ -208,8 +212,8 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { return nil, 0, errors.Wrap(err, "readHeader") } - total := (int(hlen) - crypto.Extension) / int(entrySize) - if total < max { + total := int(hlen + headerLengthSize) + if total < bufsize { // truncate to the beginning of the pack header b = b[len(b)-int(hlen):] } @@ -230,11 +234,12 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { // eagerly download eagerEntries header entries as part of header-length request. // only make second request if actual number of entries is greater than eagerEntries - b, c, err := readRecords(rd, size, eagerEntries) + eagerSize := eagerEntries*int(entrySize) + headerSize + b, c, err := readRecords(rd, size, eagerSize) if err != nil { return nil, err } - if c <= eagerEntries { + if c <= eagerSize { // eager read sufficed, return what we got return b, nil } @@ -262,7 +267,7 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr return nil, 0, err } - if len(buf) < k.NonceSize()+k.Overhead() { + if len(buf) < restic.CiphertextLength(0) { return nil, 0, errors.New("invalid header, too small") } @@ -274,11 +279,12 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr return nil, 0, err } - entries = make([]restic.Blob, 0, uint(len(buf))/entrySize) + // might over allocate a bit if all blobs have EntrySize but only by a few percent + entries = make([]restic.Blob, 0, uint(len(buf))/plainEntrySize) pos := uint(0) for len(buf) > 0 { - entry, err := parseHeaderEntry(buf) + entry, headerSize, err := parseHeaderEntry(buf) if err != nil { return nil, 0, err } @@ -286,36 +292,60 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr entries = append(entries, entry) pos += entry.Length - buf = buf[entrySize:] + buf = buf[headerSize:] } return entries, hdrSize, nil } -func parseHeaderEntry(p []byte) (b restic.Blob, err error) { - if uint(len(p)) < entrySize { +func parseHeaderEntry(p []byte) (b restic.Blob, size uint, err error) { + l := uint(len(p)) + size = plainEntrySize + if l < plainEntrySize { err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) - return b, err + return b, size, err } - p = p[:entrySize] + tpe := p[0] - switch p[0] { - case 0: + switch tpe { + case 0, 2: b.Type = restic.DataBlob - case 1: + case 1, 3: b.Type = restic.TreeBlob default: - return b, errors.Errorf("invalid type %d", p[0]) + return b, size, errors.Errorf("invalid type %d", tpe) } b.Length = uint(binary.LittleEndian.Uint32(p[1:5])) - copy(b.ID[:], p[5:]) + p = p[5:] + if tpe == 2 || tpe == 3 { + size = entrySize + if l < entrySize { + err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) + return b, size, err + } + b.UncompressedLength = uint(binary.LittleEndian.Uint32(p[0:4])) + p = p[4:] + } - return b, nil + copy(b.ID[:], p[:]) + + return b, size, nil +} + +func CalculateEntrySize(blob restic.Blob) int { + if blob.UncompressedLength != 0 { + return int(entrySize) + } + return int(plainEntrySize) } func CalculateHeaderSize(blobs []restic.Blob) int { - return headerSize + len(blobs)*int(entrySize) + size := headerSize + for _, blob := range blobs { + size += CalculateEntrySize(blob) + } + return size } // Size returns the size of all packs computed by index information. @@ -333,7 +363,7 @@ func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.I if !onlyHdr { size += int64(blob.Length) } - packSize[blob.PackID] = size + int64(entrySize) + packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob)) } return packSize diff --git a/internal/pack/pack_internal_test.go b/internal/pack/pack_internal_test.go index b8078c829..93d04f18e 100644 --- a/internal/pack/pack_internal_test.go +++ b/internal/pack/pack_internal_test.go @@ -23,9 +23,10 @@ func TestParseHeaderEntry(t *testing.T) { buf := new(bytes.Buffer) _ = binary.Write(buf, binary.LittleEndian, &h) - b, err := parseHeaderEntry(buf.Bytes()) + b, size, err := parseHeaderEntry(buf.Bytes()) rtest.OK(t, err) rtest.Equals(t, restic.DataBlob, b.Type) + rtest.Equals(t, plainEntrySize, size) t.Logf("%v %v", h.ID, b.ID) rtest.Assert(t, bytes.Equal(h.ID[:], b.ID[:]), "id mismatch") rtest.Equals(t, uint(h.Length), b.Length) @@ -34,14 +35,14 @@ func TestParseHeaderEntry(t *testing.T) { buf.Reset() _ = binary.Write(buf, binary.LittleEndian, &h) - b, err = parseHeaderEntry(buf.Bytes()) + b, _, err = parseHeaderEntry(buf.Bytes()) rtest.Assert(t, err != nil, "no error for invalid type") h.Type = 0 buf.Reset() _ = binary.Write(buf, binary.LittleEndian, &h) - b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1]) + b, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1]) rtest.Assert(t, err != nil, "no error for short input") } @@ -97,7 +98,8 @@ func TestReadHeaderEagerLoad(t *testing.T) { func TestReadRecords(t *testing.T) { testReadRecords := func(dataSize, entryCount, totalRecords int) { totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension) - off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension) + bufSize := entryCount*int(entrySize) + crypto.Extension + off := len(totalHeader) - bufSize if off < 0 { off = 0 } @@ -110,10 +112,10 @@ func TestReadRecords(t *testing.T) { rd := bytes.NewReader(buf.Bytes()) - header, count, err := readRecords(rd, int64(rd.Len()), entryCount) + header, count, err := readRecords(rd, int64(rd.Len()), bufSize+4) rtest.OK(t, err) + rtest.Equals(t, len(totalHeader)+4, count) rtest.Equals(t, expectedHeader, header) - rtest.Equals(t, totalRecords, count) } // basic diff --git a/internal/pack/pack_test.go b/internal/pack/pack_test.go index 2b7ec7fea..6170e807c 100644 --- a/internal/pack/pack_test.go +++ b/internal/pack/pack_test.go @@ -38,7 +38,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) { var buf bytes.Buffer p := pack.NewPacker(k, &buf) for _, b := range bufs { - _, err := p.Add(restic.TreeBlob, b.id, b.data) + _, err := p.Add(restic.TreeBlob, b.id, b.data, 2*len(b.data)) rtest.OK(t, err) } diff --git a/internal/repository/index.go b/internal/repository/index.go index 3db19b3b8..5f6fe1997 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -75,12 +75,12 @@ const maxuint32 = 1<<32 - 1 func (idx *Index) store(packIndex int, blob restic.Blob) { // assert that offset and length fit into uint32! - if blob.Offset > maxuint32 || blob.Length > maxuint32 { + if blob.Offset > maxuint32 || blob.Length > maxuint32 || blob.UncompressedLength > maxuint32 { panic("offset or length does not fit in uint32. You have packs > 4GB!") } m := &idx.byType[blob.Type] - m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length)) + m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length), uint32(blob.UncompressedLength)) } // Final returns true iff the index is already written to the repository, it is @@ -169,8 +169,9 @@ func (idx *Index) toPackedBlob(e *indexEntry, t restic.BlobType) restic.PackedBl BlobHandle: restic.BlobHandle{ ID: e.id, Type: t}, - Length: uint(e.length), - Offset: uint(e.offset), + Length: uint(e.length), + Offset: uint(e.offset), + UncompressedLength: uint(e.uncompressedLength), }, PackID: idx.packs[e.packIndex], } @@ -225,6 +226,9 @@ func (idx *Index) LookupSize(bh restic.BlobHandle) (plaintextLength uint, found if e == nil { return 0, false } + if e.uncompressedLength != 0 { + return uint(e.uncompressedLength), true + } return uint(restic.PlaintextLength(int(e.length))), true } @@ -357,10 +361,11 @@ type packJSON struct { } type blobJSON struct { - ID restic.ID `json:"id"` - Type restic.BlobType `json:"type"` - Offset uint `json:"offset"` - Length uint `json:"length"` + ID restic.ID `json:"id"` + Type restic.BlobType `json:"type"` + Offset uint `json:"offset"` + Length uint `json:"length"` + UncompressedLength uint `json:"uncompressed_length,omitempty"` } // generatePackList returns a list of packs. @@ -391,10 +396,11 @@ func (idx *Index) generatePackList() ([]*packJSON, error) { // add blob p.Blobs = append(p.Blobs, blobJSON{ - ID: e.id, - Type: restic.BlobType(typ), - Offset: uint(e.offset), - Length: uint(e.length), + ID: e.id, + Type: restic.BlobType(typ), + Offset: uint(e.offset), + Length: uint(e.length), + UncompressedLength: uint(e.uncompressedLength), }) return true @@ -553,7 +559,7 @@ func (idx *Index) merge(idx2 *Index) error { m2.foreach(func(e2 *indexEntry) bool { if !hasIdenticalEntry(e2) { // packIndex needs to be changed as idx2.pack was appended to idx.pack, see above - m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length) + m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length, e2.uncompressedLength) } return true }) @@ -601,8 +607,9 @@ func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err erro BlobHandle: restic.BlobHandle{ Type: blob.Type, ID: blob.ID}, - Offset: blob.Offset, - Length: blob.Length, + Offset: blob.Offset, + Length: blob.Length, + UncompressedLength: blob.UncompressedLength, }) switch blob.Type { @@ -648,6 +655,7 @@ func decodeOldIndex(buf []byte) (idx *Index, err error) { ID: blob.ID}, Offset: blob.Offset, Length: blob.Length, + // no compressed length in the old index format }) switch blob.Type { diff --git a/internal/repository/indexmap.go b/internal/repository/indexmap.go index f713a3304..3d0ed5db4 100644 --- a/internal/repository/indexmap.go +++ b/internal/repository/indexmap.go @@ -32,7 +32,7 @@ const ( // add inserts an indexEntry for the given arguments into the map, // using id as the key. -func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) { +func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) { switch { case m.numentries == 0: // Lazy initialization. m.init() @@ -47,6 +47,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) { e.packIndex = packIdx e.offset = offset e.length = length + e.uncompressedLength = uncompressedLength m.buckets[h] = e m.numentries++ @@ -152,9 +153,10 @@ func (m *indexMap) newEntry() *indexEntry { } type indexEntry struct { - id restic.ID - next *indexEntry - packIndex int // Position in containing Index's packs field. - offset uint32 - length uint32 + id restic.ID + next *indexEntry + packIndex int // Position in containing Index's packs field. + offset uint32 + length uint32 + uncompressedLength uint32 } diff --git a/internal/repository/indexmap_test.go b/internal/repository/indexmap_test.go index d803bf3c5..6699b3601 100644 --- a/internal/repository/indexmap_test.go +++ b/internal/repository/indexmap_test.go @@ -22,7 +22,7 @@ func TestIndexMapBasic(t *testing.T) { r.Read(id[:]) rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id) - m.add(id, 0, 0, 0) + m.add(id, 0, 0, 0, 0) rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id) rtest.Equals(t, uint(i), m.len()) } @@ -41,7 +41,7 @@ func TestIndexMapForeach(t *testing.T) { for i := 0; i < N; i++ { var id restic.ID id[0] = byte(i) - m.add(id, i, uint32(i), uint32(i)) + m.add(id, i, uint32(i), uint32(i), uint32(i/2)) } seen := make(map[int]struct{}) @@ -51,6 +51,7 @@ func TestIndexMapForeach(t *testing.T) { rtest.Equals(t, i, e.packIndex) rtest.Equals(t, i, int(e.length)) rtest.Equals(t, i, int(e.offset)) + rtest.Equals(t, i/2, int(e.uncompressedLength)) seen[i] = struct{}{} return true @@ -85,13 +86,13 @@ func TestIndexMapForeachWithID(t *testing.T) { // Test insertion and retrieval of duplicates. for i := 0; i < ndups; i++ { - m.add(id, i, 0, 0) + m.add(id, i, 0, 0, 0) } for i := 0; i < 100; i++ { var otherid restic.ID r.Read(otherid[:]) - m.add(otherid, -1, 0, 0) + m.add(otherid, -1, 0, 0, 0) } n = 0 @@ -109,7 +110,7 @@ func TestIndexMapForeachWithID(t *testing.T) { func BenchmarkIndexMapHash(b *testing.B) { var m indexMap - m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization. + m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization. ids := make([]restic.ID, 128) // 4 KiB. r := rand.New(rand.NewSource(time.Now().UnixNano())) diff --git a/internal/repository/packer_manager_test.go b/internal/repository/packer_manager_test.go index 1a810ab61..c5233ab4e 100644 --- a/internal/repository/packer_manager_test.go +++ b/internal/repository/packer_manager_test.go @@ -70,7 +70,7 @@ func fillPacks(t testing.TB, rnd *rand.Rand, be Saver, pm *packerManager, buf [] // Only change a few bytes so we know we're not benchmarking the RNG. rnd.Read(buf[:min(l, 4)]) - n, err := packer.Add(restic.DataBlob, id, buf) + n, err := packer.Add(restic.DataBlob, id, buf, 0) if err != nil { t.Fatal(err) } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index b00dfa39a..4a340222f 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -235,12 +235,23 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. continue } + if blob.IsCompressed() { + plaintext, err = r.dec.DecodeAll(plaintext, make([]byte, 0, blob.DataLength())) + if err != nil { + lastError = errors.Errorf("decompressing blob %v failed: %v", id, err) + continue + } + } + // check hash if !restic.Hash(plaintext).Equal(id) { lastError = errors.Errorf("blob %v returned invalid hash", id) continue } + if len(plaintext) > cap(buf) { + return plaintext, nil + } // move decrypted data to the start of the buffer copy(buf, plaintext) return buf[:len(plaintext)], nil @@ -275,6 +286,12 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error { debug.Log("save id %v (%v, %d bytes)", id, t, len(data)) + uncompressedLength := 0 + if r.cfg.Version > 1 { + uncompressedLength = len(data) + data = r.enc.EncodeAll(data, nil) + } + nonce := crypto.NewRandomNonce() ciphertext := make([]byte, 0, restic.CiphertextLength(len(data))) @@ -301,7 +318,7 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data } // save ciphertext - _, err = packer.Add(t, id, ciphertext) + _, err = packer.Add(t, id, ciphertext, uncompressedLength) if err != nil { return err } @@ -536,6 +553,17 @@ func (r *Repository) LoadIndex(ctx context.Context) error { return err } + if r.cfg.Version < 2 { + // sanity check + ctx, cancel := context.WithCancel(ctx) + defer cancel() + for blob := range r.idx.Each(ctx) { + if blob.IsCompressed() { + return errors.Fatal("index uses feature not supported by repository version 1") + } + } + } + // remove index files from the cache which have been removed in the repo return r.PrepareCache(validIndex) } @@ -834,9 +862,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs)) + dec, err := zstd.NewReader(nil) + if err != nil { + panic(dec) + } + defer dec.Close() + ctx, cancel := context.WithCancel(ctx) // stream blobs in pack - err := beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error { + err = beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error { // prevent callbacks after cancelation if ctx.Err() != nil { return ctx.Err() @@ -849,6 +883,7 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack bufRd := bufio.NewReaderSize(rd, bufferSize) currentBlobEnd := dataStart var buf []byte + var decode []byte for _, entry := range blobs { skipBytes := int(entry.Offset - currentBlobEnd) if skipBytes < 0 { @@ -888,6 +923,16 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack // decryption errors are likely permanent, give the caller a chance to skip them nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():] plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil) + if err == nil && entry.IsCompressed() { + if cap(decode) < int(entry.DataLength()) { + decode = make([]byte, 0, entry.DataLength()) + } + decode, err = dec.DecodeAll(plaintext, decode[:0]) + plaintext = decode + if err != nil { + err = errors.Errorf("decompressing blob %v failed: %v", h, err) + } + } if err == nil { id := restic.Hash(plaintext) if !id.Equal(entry.ID) { diff --git a/internal/restic/blob.go b/internal/restic/blob.go index d365bb92e..a4fcdb1ac 100644 --- a/internal/restic/blob.go +++ b/internal/restic/blob.go @@ -9,13 +9,25 @@ import ( // Blob is one part of a file or a tree. type Blob struct { BlobHandle - Length uint - Offset uint + Length uint + Offset uint + UncompressedLength uint } func (b Blob) String() string { - return fmt.Sprintf("", - b.Type, b.ID.Str(), b.Offset, b.Length) + return fmt.Sprintf("", + b.Type, b.ID.Str(), b.Offset, b.Length, b.UncompressedLength) +} + +func (b Blob) DataLength() uint { + if b.UncompressedLength != 0 { + return b.UncompressedLength + } + return uint(PlaintextLength(int(b.Length))) +} + +func (b Blob) IsCompressed() bool { + return b.UncompressedLength != 0 } // PackedBlob is a blob stored within a file. diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 206703ce3..d255dad15 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -117,7 +117,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) { if largeFile { packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset}) - fileOffset += int64(restic.PlaintextLength(int(blob.Length))) + fileOffset += int64(blob.DataLength()) } pack, ok := packs[packID] if !ok { @@ -195,7 +195,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { if packID.Equal(pack.id) { addBlob(blob, fileOffset) } - fileOffset += int64(restic.PlaintextLength(int(blob.Length))) + fileOffset += int64(blob.DataLength()) }) if err != nil { // restoreFiles should have caught this error before From fd05037e1ad7350d89ae91fdb9f52f954787bc4a Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 13 Feb 2022 17:49:49 +0100 Subject: [PATCH 06/23] repository: recalibrate index batch allocation size --- internal/repository/indexmap.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/repository/indexmap.go b/internal/repository/indexmap.go index 3d0ed5db4..6a8e86aad 100644 --- a/internal/repository/indexmap.go +++ b/internal/repository/indexmap.go @@ -131,12 +131,12 @@ func (m *indexMap) len() uint { return m.numentries } func (m *indexMap) newEntry() *indexEntry { // Allocating in batches means that we get closer to optimal space usage, - // as Go's malloc will overallocate for structures of size 56 (indexEntry + // as Go's malloc will overallocate for structures of size 60 (indexEntry // on amd64). // - // 256*56 and 256*48 both have minimal malloc overhead among reasonable sizes. + // 128*60 and 128*60 both have low malloc overhead among reasonable sizes. // See src/runtime/sizeclasses.go in the standard library. - const entryAllocBatch = 256 + const entryAllocBatch = 128 if m.free == nil { free := new([entryAllocBatch]indexEntry) From 66f9048bceb9e83ec0dbf6557404590dc44eb61c Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 19 Feb 2022 21:15:31 +0100 Subject: [PATCH 07/23] repository: Alloc zstd encoder/decoder on demand --- internal/repository/repository.go | 46 ++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 4a340222f..f028b57ad 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -42,8 +42,10 @@ type Repository struct { treePM *packerManager dataPM *packerManager - enc *zstd.Encoder - dec *zstd.Decoder + allocEnc sync.Once + allocDec sync.Once + enc *zstd.Encoder + dec *zstd.Decoder } // New returns a new repository with backend be. @@ -55,16 +57,6 @@ func New(be restic.Backend) *Repository { treePM: newPackerManager(be, nil), } - enc, err := zstd.NewWriter(nil) - if err != nil { - panic(err) - } - repo.enc = enc - dec, err := zstd.NewReader(nil) - if err != nil { - panic(err) - } - repo.dec = dec return repo } @@ -236,7 +228,7 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. } if blob.IsCompressed() { - plaintext, err = r.dec.DecodeAll(plaintext, make([]byte, 0, blob.DataLength())) + plaintext, err = r.getZstdDecoder().DecodeAll(plaintext, make([]byte, 0, blob.DataLength())) if err != nil { lastError = errors.Errorf("decompressing blob %v failed: %v", id, err) continue @@ -280,6 +272,28 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe}) } +func (r *Repository) getZstdEncoder() *zstd.Encoder { + r.allocEnc.Do(func() { + enc, err := zstd.NewWriter(nil) + if err != nil { + panic(err) + } + r.enc = enc + }) + return r.enc +} + +func (r *Repository) getZstdDecoder() *zstd.Decoder { + r.allocDec.Do(func() { + dec, err := zstd.NewReader(nil) + if err != nil { + panic(err) + } + r.dec = dec + }) + return r.dec +} + // saveAndEncrypt encrypts data and stores it to the backend as type t. If data // is small enough, it will be packed together with other small blobs. // The caller must ensure that the id matches the data. @@ -289,7 +303,7 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data uncompressedLength := 0 if r.cfg.Version > 1 { uncompressedLength = len(data) - data = r.enc.EncodeAll(data, nil) + data = r.getZstdEncoder().EncodeAll(data, nil) } nonce := crypto.NewRandomNonce() @@ -354,7 +368,7 @@ func (r *Repository) compressUnpacked(p []byte) ([]byte, error) { // version byte out := []byte{2} - out = r.enc.EncodeAll(p, out) + out = r.getZstdEncoder().EncodeAll(p, out) return out, nil } @@ -377,7 +391,7 @@ func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) { return nil, errors.New("not supported encoding format") } - return r.dec.DecodeAll(p[1:], nil) + return r.getZstdDecoder().DecodeAll(p[1:], nil) } // SaveUnpacked encrypts data and stores it in the backend. Returned is the From fda7bb0f097d92fd78699bcf8b3dc8c76afcf32c Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 19 Feb 2022 21:59:02 +0100 Subject: [PATCH 08/23] debug: Reduce code duplication --- cmd/restic/cmd_debug.go | 65 +++++++++++++---------------------------- 1 file changed, 21 insertions(+), 44 deletions(-) diff --git a/cmd/restic/cmd_debug.go b/cmd/restic/cmd_debug.go index 7947f789f..d144c751f 100644 --- a/cmd/restic/cmd_debug.go +++ b/cmd/restic/cmd_debug.go @@ -333,44 +333,37 @@ func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list nonce, plaintext := buf[:key.NonceSize()], buf[key.NonceSize():] plaintext, err = key.Open(plaintext[:0], nonce, plaintext, nil) + outputPrefix := "" + filePrefix := "" if err != nil { Warnf("error decrypting blob: %v\n", err) - var plain []byte if tryRepair || repairByte { - plain = tryRepairWithBitflip(ctx, key, buf, repairByte) + plaintext = tryRepairWithBitflip(ctx, key, buf, repairByte) } - var prefix string - if plain != nil { - id := restic.Hash(plain) - if !id.Equal(blob.ID) { - Printf(" repaired blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plain), id, blob.ID) - prefix = "repaired-wrong-hash-" - } else { - Printf(" successfully repaired blob (length %v), hash is %v, ID matches\n", len(plain), id) - prefix = "repaired-" - } + if plaintext != nil { + outputPrefix = "repaired " + filePrefix = "repaired-" } else { - plain = decryptUnsigned(ctx, key, buf) - prefix = "damaged-" + plaintext = decryptUnsigned(ctx, key, buf) + err = storePlainBlob(blob.ID, "damaged-", plaintext) + if err != nil { + return err + } + continue } - err = storePlainBlob(blob.ID, prefix, plain) - if err != nil { - return err - } - continue } id := restic.Hash(plaintext) var prefix string if !id.Equal(blob.ID) { - Printf(" successfully decrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plaintext), id, blob.ID) + Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", outputPrefix, len(plaintext), id, blob.ID) prefix = "wrong-hash-" } else { - Printf(" successfully decrypted blob (length %v), hash is %v, ID matches\n", len(plaintext), id) + Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID matches\n", outputPrefix, len(plaintext), id) prefix = "correct-" } if extractPack { - err = storePlainBlob(id, prefix, plaintext) + err = storePlainBlob(id, filePrefix+prefix, plaintext) if err != nil { return err } @@ -476,27 +469,15 @@ func examinePack(ctx context.Context, repo restic.Repository, id restic.ID) erro blobsLoaded := false // examine all data the indexes have for the pack file - for _, idx := range repo.Index().(*repository.MasterIndex).All() { - idxIDs, err := idx.IDs() - if err != nil { - idxIDs = restic.IDs{} - } - - blobs := idx.ListPack(id) + for b := range repo.Index().ListPacks(ctx, restic.NewIDSet(id)) { + blobs := b.Blobs if len(blobs) == 0 { continue } - Printf(" index %v:\n", idxIDs) + checkPackSize(blobs, fi.Size) - // convert list of blobs to []restic.Blob - var list []restic.Blob - for _, b := range blobs { - list = append(list, b.Blob) - } - checkPackSize(list, fi.Size) - - err = loadBlobs(ctx, repo, id, list) + err = loadBlobs(ctx, repo, id, blobs) if err != nil { Warnf("error: %v\n", err) } else { @@ -532,14 +513,10 @@ func checkPackSize(blobs []restic.Blob, fileSize int64) { if offset != uint64(pb.Offset) { Printf(" hole in file, want offset %v, got %v\n", offset, pb.Offset) } - offset += uint64(pb.Length) + offset = uint64(pb.Offset + pb.Length) size += uint64(pb.Length) } - - // compute header size, per blob: 1 byte type, 4 byte length, 32 byte id - size += uint64(restic.CiphertextLength(len(blobs) * (1 + 4 + 32))) - // length in uint32 little endian - size += 4 + size += uint64(pack.CalculateHeaderSize(blobs)) if uint64(fileSize) != size { Printf(" file sizes do not match: computed %v from index, file size is %v\n", size, fileSize) From 253552413231e1bb78581ddf5bc6588c1ba3405e Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 19 Feb 2022 21:59:47 +0100 Subject: [PATCH 09/23] debug: Add support for compressed blobs --- cmd/restic/cmd_debug.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cmd/restic/cmd_debug.go b/cmd/restic/cmd_debug.go index d144c751f..4c856c8ac 100644 --- a/cmd/restic/cmd_debug.go +++ b/cmd/restic/cmd_debug.go @@ -15,6 +15,7 @@ import ( "sort" "time" + "github.com/klauspost/compress/zstd" "github.com/spf13/cobra" "golang.org/x/sync/errgroup" @@ -309,6 +310,10 @@ func decryptUnsigned(ctx context.Context, k *crypto.Key, buf []byte) []byte { } func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list []restic.Blob) error { + dec, err := zstd.NewReader(nil) + if err != nil { + panic(err) + } be := repo.Backend() h := restic.Handle{ Name: pack.String(), @@ -353,6 +358,16 @@ func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list } } + if blob.IsCompressed() { + decompressed, err := dec.DecodeAll(plaintext, nil) + if err != nil { + Printf(" failed to decompress blob %v\n", blob.ID) + } + if decompressed != nil { + plaintext = decompressed + } + } + id := restic.Hash(plaintext) var prefix string if !id.Equal(blob.ID) { From 7132df529e061b85a3aabd65e896a994c72bfe80 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 10 Apr 2022 12:20:15 +0200 Subject: [PATCH 10/23] repository: Increase index size for repo version 2 A compressed index is only about one third the size of an uncompressed one. Thus increase the number of entries in an index to avoid cluttering the repository with small indexes. --- cmd/restic/integration_test.go | 2 +- internal/repository/index.go | 15 +++++++++++---- internal/repository/master_index.go | 9 +++++++-- internal/repository/repository.go | 3 +++ internal/repository/repository_test.go | 2 +- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 49121bb1d..ebf63e930 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1470,7 +1470,7 @@ func TestRebuildIndexAlwaysFull(t *testing.T) { defer func() { repository.IndexFull = indexFull }() - repository.IndexFull = func(*repository.Index) bool { return true } + repository.IndexFull = func(*repository.Index, bool) bool { return true } testRebuildIndex(t, nil) } diff --git a/internal/repository/index.go b/internal/repository/index.go index 5f6fe1997..520fcbd8e 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -93,12 +93,13 @@ func (idx *Index) Final() bool { } const ( - indexMaxBlobs = 50000 - indexMaxAge = 10 * time.Minute + indexMaxBlobs = 50000 + indexMaxBlobsCompressed = 3 * indexMaxBlobs + indexMaxAge = 10 * time.Minute ) // IndexFull returns true iff the index is "full enough" to be saved as a preliminary index. -var IndexFull = func(idx *Index) bool { +var IndexFull = func(idx *Index, compress bool) bool { idx.m.Lock() defer idx.m.Unlock() @@ -109,12 +110,18 @@ var IndexFull = func(idx *Index) bool { blobs += idx.byType[typ].len() } age := time.Since(idx.created) + var maxBlobs uint + if compress { + maxBlobs = indexMaxBlobsCompressed + } else { + maxBlobs = indexMaxBlobs + } switch { case age >= indexMaxAge: debug.Log("index %p is old enough", idx, age) return true - case blobs >= indexMaxBlobs: + case blobs >= maxBlobs: debug.Log("index %p has %d blobs", idx, blobs) return true } diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index 9056528a2..96462d4a4 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -16,6 +16,7 @@ type MasterIndex struct { idx []*Index pendingBlobs restic.BlobSet idxMutex sync.RWMutex + compress bool } // NewMasterIndex creates a new master index. @@ -28,6 +29,10 @@ func NewMasterIndex() *MasterIndex { return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()} } +func (mi *MasterIndex) markCompressed() { + mi.compress = true +} + // Lookup queries all known Indexes for the ID and returns all matches. func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) { mi.idxMutex.RLock() @@ -206,7 +211,7 @@ func (mi *MasterIndex) FinalizeFullIndexes() []*Index { continue } - if IndexFull(idx) { + if IndexFull(idx, mi.compress) { debug.Log("index %p is full", idx) idx.Finalize() list = append(list, idx) @@ -334,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla for pbs := range idx.EachByPack(ctx, packBlacklist) { newIndex.StorePack(pbs.packID, pbs.blobs) p.Add(1) - if IndexFull(newIndex) { + if IndexFull(newIndex, mi.compress) { select { case ch <- newIndex: case <-ctx.Done(): diff --git a/internal/repository/repository.go b/internal/repository/repository.go index f028b57ad..bbbc1af68 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -698,6 +698,9 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int } else if err != nil { return errors.Fatalf("config cannot be loaded: %v", err) } + if r.Config().Version >= 2 { + r.idx.markCompressed() + } return nil } diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 7cc593e04..6da685ff3 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -367,7 +367,7 @@ func TestRepositoryIncrementalIndex(t *testing.T) { repo := r.(*repository.Repository) - repository.IndexFull = func(*repository.Index) bool { return true } + repository.IndexFull = func(*repository.Index, bool) bool { return true } // add 15 packs for j := 0; j < 5; j++ { From ba27d29d586db7161a79c7228ff975a727867ee6 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Mon, 11 Apr 2022 20:59:44 +0200 Subject: [PATCH 11/23] Print repository version when opening a repo --- cmd/restic/global.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/restic/global.go b/cmd/restic/global.go index f4a2df1b5..ca4b036c9 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -471,7 +471,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { id = id[:8] } if !opts.JSON { - Verbosef("repository %v opened successfully, password is correct\n", id) + Verbosef("repository %v opened (repo version %v) successfully, password is correct\n", id, s.Config().Version) } } From f38f457a6404df321d3a04cd665edb584bf0b719 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Mon, 11 Apr 2022 21:28:21 +0200 Subject: [PATCH 12/23] Add basic changelog for compression support --- changelog/unreleased/issue-21 | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 changelog/unreleased/issue-21 diff --git a/changelog/unreleased/issue-21 b/changelog/unreleased/issue-21 new file mode 100644 index 000000000..a38074643 --- /dev/null +++ b/changelog/unreleased/issue-21 @@ -0,0 +1,16 @@ +Enhancement: Add comppression support + +We have added compression support to the restic repository format. To create a +repository using the new format run `init --repository-version 2`. Please note +that the repository cannot be read by restic versions prior to 0.14.0. + +The new format version has not received much testing yet. Do not rely on it as +your only backup copy! Please run `check` in regular intervals to detect any +problems. + +Upgrading in place is not yet supported. As a workaround, first create a new +repository using `init --repository-version 2 --copy-chunker-params --repo2 path/to/old/repo`. +Then use the `copy` command to copy all snapshots to the new repository. + +https://github.com/restic/restic/issues/21 +https://github.com/restic/restic/pull/3666 From 8b11b8638349a596592e86e611187be42651a71b Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 13 Apr 2022 20:34:05 +0200 Subject: [PATCH 13/23] Add option global --compression --- cmd/restic/cmd_init.go | 2 +- cmd/restic/global.go | 4 +- internal/checker/checker_test.go | 2 +- internal/repository/repository.go | 69 +++++++++++++++++++++++++++++-- internal/repository/testing.go | 4 +- 5 files changed, 72 insertions(+), 9 deletions(-) diff --git a/cmd/restic/cmd_init.go b/cmd/restic/cmd_init.go index f5e43bbc9..058f1ed07 100644 --- a/cmd/restic/cmd_init.go +++ b/cmd/restic/cmd_init.go @@ -86,7 +86,7 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { return errors.Fatalf("create repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err) } - s := repository.New(be) + s := repository.New(be, repository.Options{Compression: gopts.Compression}) err = s.Init(gopts.ctx, version, gopts.password, chunkerPolynomial) if err != nil { diff --git a/cmd/restic/global.go b/cmd/restic/global.go index ca4b036c9..65dbbb6be 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -64,6 +64,7 @@ type GlobalOptions struct { InsecureTLS bool TLSClientCert string CleanupCache bool + Compression repository.CompressionMode LimitUploadKb int LimitDownloadKb int @@ -120,6 +121,7 @@ func init() { f.StringVar(&globalOptions.TLSClientCert, "tls-client-cert", "", "path to a `file` containing PEM encoded TLS client certificate and private key") f.BoolVar(&globalOptions.InsecureTLS, "insecure-tls", false, "skip TLS certificate verification when connecting to the repo (insecure)") f.BoolVar(&globalOptions.CleanupCache, "cleanup-cache", false, "auto remove old cache directories") + f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repo format version 2), one of (auto|off|max)") f.IntVar(&globalOptions.LimitUploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)") f.IntVar(&globalOptions.LimitDownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") @@ -435,7 +437,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { } } - s := repository.New(be) + s := repository.New(be, repository.Options{Compression: opts.Compression}) passwordTriesLeft := 1 if stdinIsTerminal() && opts.password == "" { diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 1330211eb..2a4384b15 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -350,7 +350,7 @@ func TestCheckerModifiedData(t *testing.T) { t.Logf("archived as %v", sn.ID().Str()) beError := &errorBackend{Backend: repo.Backend()} - checkRepo := repository.New(beError) + checkRepo := repository.New(beError, repository.Options{}) test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, "")) chkr := checker.New(checkRepo, false) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index bbbc1af68..b6c910cc7 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -37,6 +37,8 @@ type Repository struct { idx *MasterIndex Cache *cache.Cache + opts Options + noAutoIndexUpdate bool treePM *packerManager @@ -48,10 +50,58 @@ type Repository struct { dec *zstd.Decoder } +type Options struct { + Compression CompressionMode +} + +// CompressionMode configures if data should be compressed. +type CompressionMode uint + +// Constants for the different compression levels. +const ( + CompressionAuto CompressionMode = 0 + CompressionOff CompressionMode = 1 + CompressionMax CompressionMode = 2 +) + +// Set implements the method needed for pflag command flag parsing. +func (c *CompressionMode) Set(s string) error { + switch s { + case "auto": + *c = CompressionAuto + case "off": + *c = CompressionOff + case "max": + *c = CompressionMax + default: + return fmt.Errorf("invalid compression mode %q, must be one of (auto|off|max)", s) + } + + return nil +} + +func (c *CompressionMode) String() string { + switch *c { + case CompressionAuto: + return "auto" + case CompressionOff: + return "off" + case CompressionMax: + return "max" + default: + return "invalid" + } + +} +func (c *CompressionMode) Type() string { + return "mode" +} + // New returns a new repository with backend be. -func New(be restic.Backend) *Repository { +func New(be restic.Backend, opts Options) *Repository { repo := &Repository{ be: be, + opts: opts, idx: NewMasterIndex(), dataPM: newPackerManager(be, nil), treePM: newPackerManager(be, nil), @@ -274,7 +324,12 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo func (r *Repository) getZstdEncoder() *zstd.Encoder { r.allocEnc.Do(func() { - enc, err := zstd.NewWriter(nil) + level := zstd.SpeedDefault + if r.opts.Compression == CompressionMax { + level = zstd.SpeedBestCompression + } + + enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(level)) if err != nil { panic(err) } @@ -302,8 +357,14 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data uncompressedLength := 0 if r.cfg.Version > 1 { - uncompressedLength = len(data) - data = r.getZstdEncoder().EncodeAll(data, nil) + + // we have a repo v2, so compression is available. if the user opts to + // not compress, we won't compress any data, but everything else is + // compressed. + if r.opts.Compression != CompressionOff || t != restic.DataBlob { + uncompressedLength = len(data) + data = r.getZstdEncoder().EncodeAll(data, nil) + } } nonce := crypto.NewRandomNonce() diff --git a/internal/repository/testing.go b/internal/repository/testing.go index d752e107e..fbe334467 100644 --- a/internal/repository/testing.go +++ b/internal/repository/testing.go @@ -51,7 +51,7 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi be, beCleanup = TestBackend(t) } - repo := New(be) + repo := New(be, Options{}) cfg := restic.TestCreateConfig(t, TestChunkerPol) err := repo.init(context.TODO(), test.TestPassword, cfg) @@ -98,7 +98,7 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) { t.Fatal(err) } - repo := New(be) + repo := New(be, Options{}) err = repo.SearchKey(context.TODO(), test.TestPassword, 10, "") if err != nil { t.Fatal(err) From 94dc9a0fa799a61d570fa330c201ed88d73ece12 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 13 Apr 2022 20:38:30 +0200 Subject: [PATCH 14/23] Amend changelog --- changelog/unreleased/issue-21 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/changelog/unreleased/issue-21 b/changelog/unreleased/issue-21 index a38074643..0a3040add 100644 --- a/changelog/unreleased/issue-21 +++ b/changelog/unreleased/issue-21 @@ -4,6 +4,11 @@ We have added compression support to the restic repository format. To create a repository using the new format run `init --repository-version 2`. Please note that the repository cannot be read by restic versions prior to 0.14.0. +You can configure if data is compressed with the option `--compression`. It can +be set to `auto` (the default, which will compress very fast), `max` (which +will trade backup speed and CPU usage for better compression), or `off` (which +disables compression). Each setting is only applied for the single run of restic. + The new format version has not received much testing yet. Do not rely on it as your only backup copy! Please run `check` in regular intervals to detect any problems. From 2f36e044db01135758f89c3d3608bf507fdfad64 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 16 Apr 2022 21:05:15 +0200 Subject: [PATCH 15/23] Cleanup pack header check --- internal/repository/repository.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index b6c910cc7..8ca39d50b 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -439,7 +439,7 @@ func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) { return p, nil } - if len(p) < 1 { + if len(p) == 0 { // too short for version header return p, nil } From 5eb05a0afe9e32baad77e6ff4ea24ef7adb4c7bc Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 20 Apr 2022 20:46:11 +0200 Subject: [PATCH 16/23] Configure zstd encoder/decoder --- internal/repository/repository.go | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 8ca39d50b..ece395f3a 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -329,7 +329,18 @@ func (r *Repository) getZstdEncoder() *zstd.Encoder { level = zstd.SpeedBestCompression } - enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(level)) + opts := []zstd.EOption{ + // Set the compression level configured. + zstd.WithEncoderLevel(level), + // Disable CRC, we have enough checks in place, makes the + // compressed data four bytes shorter. + zstd.WithEncoderCRC(false), + // Set a window of 512kbyte, so we have good lookbehind for usual + // blob sizes. + zstd.WithWindowSize(512 * 1024), + } + + enc, err := zstd.NewWriter(nil, opts...) if err != nil { panic(err) } @@ -340,7 +351,15 @@ func (r *Repository) getZstdEncoder() *zstd.Encoder { func (r *Repository) getZstdDecoder() *zstd.Decoder { r.allocDec.Do(func() { - dec, err := zstd.NewReader(nil) + opts := []zstd.DOption{ + // Use all available cores. + zstd.WithDecoderConcurrency(0), + // Limit the maximum decompressed memory. Set to a very high, + // conservative value. + zstd.WithDecoderMaxMemory(16 * 1024 * 1024 * 1024), + } + + dec, err := zstd.NewReader(nil, opts...) if err != nil { panic(err) } From 8776031f960e2bb0acb18c3bde276e68a863eb02 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Wed, 20 Apr 2022 20:55:43 +0200 Subject: [PATCH 17/23] Leave allocating slices to the decompress code --- internal/repository/repository.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index ece395f3a..5dcd66e71 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -1021,9 +1021,8 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():] plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil) if err == nil && entry.IsCompressed() { - if cap(decode) < int(entry.DataLength()) { - decode = make([]byte, 0, entry.DataLength()) - } + // DecodeAll will allocate a slice if it is not large enough since it + // knows the decompressed size (because we're using EncodeAll) decode, err = dec.DecodeAll(plaintext, decode[:0]) plaintext = decode if err != nil { From abe59356930dbe69694272ce80e0c66001ce459d Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 29 Apr 2022 23:12:43 +0200 Subject: [PATCH 18/23] repository: unify repository version-specific initialization Mark the master index as compressed also when initializing a new repository. This is only relevant for testing. --- internal/repository/repository.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 5dcd66e71..d58406c7c 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -116,6 +116,14 @@ func (r *Repository) DisableAutoIndexUpdate() { r.noAutoIndexUpdate = true } +// setConfig assigns the given config and updates the repository parameters accordingly +func (r *Repository) setConfig(cfg restic.Config) { + r.cfg = cfg + if r.cfg.Version >= 2 { + r.idx.markCompressed() + } +} + // Config returns the repository configuration. func (r *Repository) Config() restic.Config { return r.cfg @@ -772,15 +780,14 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int r.dataPM.key = key.master r.treePM.key = key.master r.keyName = key.Name() - r.cfg, err = restic.LoadConfig(ctx, r) + cfg, err := restic.LoadConfig(ctx, r) if err == crypto.ErrUnauthenticated { return errors.Fatalf("config or key %v is damaged: %v", key.Name(), err) } else if err != nil { return errors.Fatalf("config cannot be loaded: %v", err) } - if r.Config().Version >= 2 { - r.idx.markCompressed() - } + + r.setConfig(cfg) return nil } @@ -826,7 +833,7 @@ func (r *Repository) init(ctx context.Context, password string, cfg restic.Confi r.dataPM.key = key.master r.treePM.key = key.master r.keyName = key.Name() - r.cfg = cfg + r.setConfig(cfg) _, err = r.SaveJSONUnpacked(ctx, restic.ConfigFile, cfg) return err } From 9ffb8920f15e5f741a7af23a780feb99fff43306 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 29 Apr 2022 23:16:16 +0200 Subject: [PATCH 19/23] repository: run blackbox tests using old and new repo version --- internal/archiver/archiver_test.go | 2 +- internal/repository/master_index_test.go | 10 ++-- internal/repository/repack_test.go | 20 +++++-- internal/repository/repository_test.go | 66 ++++++++++++++++++++---- internal/repository/testing.go | 34 ++++++++++-- internal/restic/config.go | 10 +++- 6 files changed, 117 insertions(+), 25 deletions(-) diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index e18156ceb..0d6295c39 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -1894,7 +1894,7 @@ func TestArchiverContextCanceled(t *testing.T) { defer removeTempdir() // Ensure that the archiver itself reports the canceled context and not just the backend - repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}) + repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}, 0) back := restictest.Chdir(t, tempdir) defer back() diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go index 2470dadfc..28a2c1251 100644 --- a/internal/repository/master_index_test.go +++ b/internal/repository/master_index_test.go @@ -335,8 +335,8 @@ var ( depth = 3 ) -func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Repository, func()) { - repo, cleanup := repository.TestRepository(t) +func createFilledRepo(t testing.TB, snapshots int, dup float32, version uint) (restic.Repository, func()) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) for i := 0; i < 3; i++ { restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup) @@ -346,7 +346,11 @@ func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Reposito } func TestIndexSave(t *testing.T) { - repo, cleanup := createFilledRepo(t, 3, 0) + repository.TestAllVersions(t, testIndexSave) +} + +func testIndexSave(t *testing.T, version uint) { + repo, cleanup := createFilledRepo(t, 3, 0, version) defer cleanup() err := repo.LoadIndex(context.TODO()) diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index e40f5f6af..b86c8c95d 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -212,7 +212,11 @@ func reloadIndex(t *testing.T, repo restic.Repository) { } func TestRepack(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepack) +} + +func testRepack(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() seed := time.Now().UnixNano() @@ -279,9 +283,13 @@ func TestRepack(t *testing.T) { } func TestRepackCopy(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepackCopy) +} + +func testRepackCopy(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() - dstRepo, dstCleanup := repository.TestRepository(t) + dstRepo, dstCleanup := repository.TestRepositoryWithVersion(t, version) defer dstCleanup() seed := time.Now().UnixNano() @@ -318,7 +326,11 @@ func TestRepackCopy(t *testing.T) { } func TestRepackWrongBlob(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepackWrongBlob) +} + +func testRepackWrongBlob(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() seed := time.Now().UnixNano() diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 6da685ff3..f2daaf194 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -28,7 +28,11 @@ var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20} var rnd = rand.New(rand.NewSource(time.Now().UnixNano())) func TestSave(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testSave) +} + +func testSave(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() for _, size := range testSizes { @@ -63,7 +67,11 @@ func TestSave(t *testing.T) { } func TestSaveFrom(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testSaveFrom) +} + +func testSaveFrom(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() for _, size := range testSizes { @@ -96,7 +104,11 @@ func TestSaveFrom(t *testing.T) { } func BenchmarkSaveAndEncrypt(t *testing.B) { - repo, cleanup := repository.TestRepository(t) + repository.BenchmarkAllVersions(t, benchmarkSaveAndEncrypt) +} + +func benchmarkSaveAndEncrypt(t *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() size := 4 << 20 // 4MiB @@ -118,7 +130,11 @@ func BenchmarkSaveAndEncrypt(t *testing.B) { } func TestLoadTree(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testLoadTree) +} + +func testLoadTree(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() if rtest.BenchArchiveDirectory == "" { @@ -134,7 +150,11 @@ func TestLoadTree(t *testing.T) { } func BenchmarkLoadTree(t *testing.B) { - repo, cleanup := repository.TestRepository(t) + repository.BenchmarkAllVersions(t, benchmarkLoadTree) +} + +func benchmarkLoadTree(t *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() if rtest.BenchArchiveDirectory == "" { @@ -154,7 +174,11 @@ func BenchmarkLoadTree(t *testing.B) { } func TestLoadBlob(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testLoadBlob) +} + +func testLoadBlob(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() length := 1000000 @@ -183,7 +207,11 @@ func TestLoadBlob(t *testing.T) { } func BenchmarkLoadBlob(b *testing.B) { - repo, cleanup := repository.TestRepository(b) + repository.BenchmarkAllVersions(b, benchmarkLoadBlob) +} + +func benchmarkLoadBlob(b *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(b, version) defer cleanup() length := 1000000 @@ -219,7 +247,11 @@ func BenchmarkLoadBlob(b *testing.B) { } func BenchmarkLoadUnpacked(b *testing.B) { - repo, cleanup := repository.TestRepository(b) + repository.BenchmarkAllVersions(b, benchmarkLoadUnpacked) +} + +func benchmarkLoadUnpacked(b *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(b, version) defer cleanup() length := 1000000 @@ -255,7 +287,11 @@ func BenchmarkLoadUnpacked(b *testing.B) { } func TestLoadJSONUnpacked(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testLoadJSONUnpacked) +} + +func testLoadJSONUnpacked(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() if rtest.BenchArchiveDirectory == "" { @@ -313,9 +349,13 @@ func loadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*repo } func BenchmarkLoadIndex(b *testing.B) { + repository.BenchmarkAllVersions(b, benchmarkLoadIndex) +} + +func benchmarkLoadIndex(b *testing.B, version uint) { repository.TestUseLowSecurityKDFParameters(b) - repo, cleanup := repository.TestRepository(b) + repo, cleanup := repository.TestRepositoryWithVersion(b, version) defer cleanup() idx := repository.NewIndex() @@ -362,7 +402,11 @@ func saveRandomDataBlobs(t testing.TB, repo restic.Repository, num int, sizeMax } func TestRepositoryIncrementalIndex(t *testing.T) { - r, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepositoryIncrementalIndex) +} + +func testRepositoryIncrementalIndex(t *testing.T, version uint) { + r, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() repo := r.(*repository.Repository) diff --git a/internal/repository/testing.go b/internal/repository/testing.go index fbe334467..05dfab64d 100644 --- a/internal/repository/testing.go +++ b/internal/repository/testing.go @@ -2,6 +2,7 @@ package repository import ( "context" + "fmt" "os" "testing" @@ -41,7 +42,7 @@ const TestChunkerPol = chunker.Pol(0x3DA3358B4DC173) // TestRepositoryWithBackend returns a repository initialized with a test // password. If be is nil, an in-memory backend is used. A constant polynomial // is used for the chunker and low-security test parameters. -func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Repository, cleanup func()) { +func TestRepositoryWithBackend(t testing.TB, be restic.Backend, version uint) (r restic.Repository, cleanup func()) { t.Helper() TestUseLowSecurityKDFParameters(t) restic.TestDisableCheckPolynomial(t) @@ -53,7 +54,7 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi repo := New(be, Options{}) - cfg := restic.TestCreateConfig(t, TestChunkerPol) + cfg := restic.TestCreateConfig(t, TestChunkerPol, version) err := repo.init(context.TODO(), test.TestPassword, cfg) if err != nil { t.Fatalf("TestRepository(): initialize repo failed: %v", err) @@ -71,6 +72,11 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi // a non-existing directory, a local backend is created there and this is used // instead. The directory is not removed, but left there for inspection. func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) { + t.Helper() + return TestRepositoryWithVersion(t, 0) +} + +func TestRepositoryWithVersion(t testing.TB, version uint) (r restic.Repository, cleanup func()) { t.Helper() dir := os.Getenv("RESTIC_TEST_REPO") if dir != "" { @@ -80,7 +86,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) { if err != nil { t.Fatalf("error creating local backend at %v: %v", dir, err) } - return TestRepositoryWithBackend(t, be) + return TestRepositoryWithBackend(t, be, version) } if err == nil { @@ -88,7 +94,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) { } } - return TestRepositoryWithBackend(t, nil) + return TestRepositoryWithBackend(t, nil, version) } // TestOpenLocal opens a local repository. @@ -106,3 +112,23 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) { return repo } + +type VersionedTest func(t *testing.T, version uint) + +func TestAllVersions(t *testing.T, test VersionedTest) { + for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ { + t.Run(fmt.Sprintf("v%d", version), func(t *testing.T) { + test(t, uint(version)) + }) + } +} + +type VersionedBenchmark func(b *testing.B, version uint) + +func BenchmarkAllVersions(b *testing.B, bench VersionedBenchmark) { + for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ { + b.Run(fmt.Sprintf("v%d", version), func(b *testing.B) { + bench(b, uint(version)) + }) + } +} diff --git a/internal/restic/config.go b/internal/restic/config.go index 3a6bab746..6df32e2ef 100644 --- a/internal/restic/config.go +++ b/internal/restic/config.go @@ -51,11 +51,17 @@ func CreateConfig(version uint) (Config, error) { } // TestCreateConfig creates a config for use within tests. -func TestCreateConfig(t testing.TB, pol chunker.Pol) (cfg Config) { +func TestCreateConfig(t testing.TB, pol chunker.Pol, version uint) (cfg Config) { cfg.ChunkerPolynomial = pol cfg.ID = NewRandomID().String() - cfg.Version = StableRepoVersion + if version == 0 { + version = StableRepoVersion + } + if version < MinRepoVersion || version > MaxRepoVersion { + t.Fatalf("version %d is out of range", version) + } + cfg.Version = version return cfg } From ec2b25565aaa021e15a786519df637df54291374 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 29 Apr 2022 23:17:01 +0200 Subject: [PATCH 20/23] repository: test uncompressedLength field and index example --- internal/repository/index_test.go | 160 ++++++++++++++++------- internal/repository/master_index_test.go | 23 ++-- 2 files changed, 125 insertions(+), 58 deletions(-) diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index c4f0179db..6940afe2f 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -23,11 +23,17 @@ func TestIndexSerialize(t *testing.T) { pos := uint(0) for j := 0; j < 20; j++ { length := uint(i*100 + j) + uncompressedLength := uint(0) + if i >= 25 { + // test a mix of compressed and uncompressed packs + uncompressedLength = 2 * length + } pb := restic.PackedBlob{ Blob: restic.Blob{ - BlobHandle: restic.NewRandomBlobHandle(), - Offset: pos, - Length: length, + BlobHandle: restic.NewRandomBlobHandle(), + Offset: pos, + Length: length, + UncompressedLength: uncompressedLength, }, PackID: packID, } @@ -164,7 +170,7 @@ func TestIndexSize(t *testing.T) { } // example index serialization from doc/Design.rst -var docExample = []byte(` +var docExampleV1 = []byte(` { "supersedes": [ "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452" @@ -177,12 +183,12 @@ var docExample = []byte(` "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "type": "data", "offset": 0, - "length": 25 + "length": 38 },{ "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "type": "tree", "offset": 38, - "length": 100 + "length": 112 }, { "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", @@ -196,6 +202,41 @@ var docExample = []byte(` } `) +var docExampleV2 = []byte(` +{ + "supersedes": [ + "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452" + ], + "packs": [ + { + "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c", + "blobs": [ + { + "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", + "type": "data", + "offset": 0, + "length": 38 + }, + { + "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", + "type": "tree", + "offset": 38, + "length": 112, + "uncompressed_length": 511 + }, + { + "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", + "type": "data", + "offset": 150, + "length": 123, + "uncompressed_length": 234 + } + ] + } + ] + } +`) + var docOldExample = []byte(` [ { "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c", @@ -204,12 +245,12 @@ var docOldExample = []byte(` "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "type": "data", "offset": 0, - "length": 25 + "length": 38 },{ "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "type": "tree", "offset": 38, - "length": 100 + "length": 112 }, { "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", @@ -222,22 +263,23 @@ var docOldExample = []byte(` `) var exampleTests = []struct { - id, packID restic.ID - tpe restic.BlobType - offset, length uint + id, packID restic.ID + tpe restic.BlobType + offset, length uint + uncompressedLength uint }{ { restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), - restic.DataBlob, 0, 25, + restic.DataBlob, 0, 38, 0, }, { restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), - restic.TreeBlob, 38, 100, + restic.TreeBlob, 38, 112, 511, }, { restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), - restic.DataBlob, 150, 123, + restic.DataBlob, 150, 123, 234, }, } @@ -254,41 +296,56 @@ var exampleLookupTest = struct { } func TestIndexUnserialize(t *testing.T) { - oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} + for _, task := range []struct { + idxBytes []byte + version int + }{ + {docExampleV1, 1}, + {docExampleV2, 2}, + } { + oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} - idx, oldFormat, err := repository.DecodeIndex(docExample, restic.NewRandomID()) - rtest.OK(t, err) - rtest.Assert(t, !oldFormat, "new index format recognized as old format") + idx, oldFormat, err := repository.DecodeIndex(task.idxBytes, restic.NewRandomID()) + rtest.OK(t, err) + rtest.Assert(t, !oldFormat, "new index format recognized as old format") - for _, test := range exampleTests { - list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil) - if len(list) != 1 { - t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) + for _, test := range exampleTests { + list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil) + if len(list) != 1 { + t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) + } + blob := list[0] + + t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob) + + rtest.Equals(t, test.packID, blob.PackID) + rtest.Equals(t, test.tpe, blob.Type) + rtest.Equals(t, test.offset, blob.Offset) + rtest.Equals(t, test.length, blob.Length) + if task.version == 1 { + rtest.Equals(t, uint(0), blob.UncompressedLength) + } else if task.version == 2 { + rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength) + } else { + t.Fatal("Invalid index version") + } } - blob := list[0] - t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob) + rtest.Equals(t, oldIdx, idx.Supersedes()) - rtest.Equals(t, test.packID, blob.PackID) - rtest.Equals(t, test.tpe, blob.Type) - rtest.Equals(t, test.offset, blob.Offset) - rtest.Equals(t, test.length, blob.Length) - } - - rtest.Equals(t, oldIdx, idx.Supersedes()) - - blobs := idx.ListPack(exampleLookupTest.packID) - if len(blobs) != len(exampleLookupTest.blobs) { - t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs)) - } - - for _, blob := range blobs { - b, ok := exampleLookupTest.blobs[blob.ID] - if !ok { - t.Errorf("unexpected blob %v found", blob.ID.Str()) + blobs := idx.ListPack(exampleLookupTest.packID) + if len(blobs) != len(exampleLookupTest.blobs) { + t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs)) } - if blob.Type != b { - t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type) + + for _, blob := range blobs { + b, ok := exampleLookupTest.blobs[blob.ID] + if !ok { + t.Errorf("unexpected blob %v found", blob.ID.Str()) + } + if blob.Type != b { + t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type) + } } } } @@ -403,8 +460,9 @@ func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lo Type: restic.DataBlob, ID: id, }, - Length: uint(size), - Offset: uint(offset), + Length: uint(size), + UncompressedLength: uint(2 * size), + Offset: uint(offset), }) offset += size @@ -475,11 +533,17 @@ func TestIndexHas(t *testing.T) { pos := uint(0) for j := 0; j < 20; j++ { length := uint(i*100 + j) + uncompressedLength := uint(0) + if i >= 25 { + // test a mix of compressed and uncompressed packs + uncompressedLength = 2 * length + } pb := restic.PackedBlob{ Blob: restic.Blob{ - BlobHandle: restic.NewRandomBlobHandle(), - Offset: pos, - Length: length, + BlobHandle: restic.NewRandomBlobHandle(), + Offset: pos, + Length: length, + UncompressedLength: uncompressedLength, }, PackID: packID, } diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go index 28a2c1251..79932af07 100644 --- a/internal/repository/master_index_test.go +++ b/internal/repository/master_index_test.go @@ -30,9 +30,10 @@ func TestMasterIndex(t *testing.T) { blob2 := restic.PackedBlob{ PackID: restic.NewRandomID(), Blob: restic.Blob{ - BlobHandle: bhInIdx2, - Length: uint(restic.CiphertextLength(100)), - Offset: 10, + BlobHandle: bhInIdx2, + Length: uint(restic.CiphertextLength(100)), + Offset: 10, + UncompressedLength: 200, }, } @@ -48,9 +49,10 @@ func TestMasterIndex(t *testing.T) { blob12b := restic.PackedBlob{ PackID: restic.NewRandomID(), Blob: restic.Blob{ - BlobHandle: bhInIdx12, - Length: uint(restic.CiphertextLength(123)), - Offset: 50, + BlobHandle: bhInIdx12, + Length: uint(restic.CiphertextLength(123)), + Offset: 50, + UncompressedLength: 80, }, } @@ -86,7 +88,7 @@ func TestMasterIndex(t *testing.T) { size, found = mIdx.LookupSize(bhInIdx2) rtest.Equals(t, true, found) - rtest.Equals(t, uint(100), size) + rtest.Equals(t, uint(200), size) // test idInIdx12 found = mIdx.Has(bhInIdx12) @@ -144,9 +146,10 @@ func TestMasterMergeFinalIndexes(t *testing.T) { blob2 := restic.PackedBlob{ PackID: restic.NewRandomID(), Blob: restic.Blob{ - BlobHandle: bhInIdx2, - Length: 100, - Offset: 10, + BlobHandle: bhInIdx2, + Length: 100, + Offset: 10, + UncompressedLength: 200, }, } From bcab5486171e0dd09f8fc4dcf87b58031fc8f13a Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 29 Apr 2022 23:41:03 +0200 Subject: [PATCH 21/23] pack: slightly expand testing of compressed blobs --- internal/pack/pack.go | 16 ++++++++--- internal/pack/pack_internal_test.go | 42 +++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 697a14a5d..2d7a5c3fb 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -54,10 +54,18 @@ var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + l // headerEntry describes the format of header entries. It serves only as // documentation. type headerEntry struct { - Type uint8 - Length uint32 - ID restic.ID - CompressedLength uint32 + Type uint8 + Length uint32 + ID restic.ID +} + +// compressedHeaderEntry describes the format of header entries for compressed blobs. +// It serves only as documentation. +type compressedHeaderEntry struct { + Type uint8 + Length uint32 + UncompressedLength uint32 + ID restic.ID } // Finalize writes the header for all added blobs and finalizes the pack. diff --git a/internal/pack/pack_internal_test.go b/internal/pack/pack_internal_test.go index 93d04f18e..c1a4867ea 100644 --- a/internal/pack/pack_internal_test.go +++ b/internal/pack/pack_internal_test.go @@ -13,7 +13,7 @@ import ( func TestParseHeaderEntry(t *testing.T) { h := headerEntry{ - Type: 0, // Blob. + Type: 0, // Blob Length: 100, } for i := range h.ID { @@ -28,21 +28,53 @@ func TestParseHeaderEntry(t *testing.T) { rtest.Equals(t, restic.DataBlob, b.Type) rtest.Equals(t, plainEntrySize, size) t.Logf("%v %v", h.ID, b.ID) - rtest.Assert(t, bytes.Equal(h.ID[:], b.ID[:]), "id mismatch") + rtest.Equals(t, h.ID[:], b.ID[:]) rtest.Equals(t, uint(h.Length), b.Length) + rtest.Equals(t, uint(0), b.UncompressedLength) + + c := compressedHeaderEntry{ + Type: 2, // compressed Blob + Length: 100, + UncompressedLength: 200, + } + for i := range c.ID { + c.ID[i] = byte(i) + } + + buf = new(bytes.Buffer) + _ = binary.Write(buf, binary.LittleEndian, &c) + + b, size, err = parseHeaderEntry(buf.Bytes()) + rtest.OK(t, err) + rtest.Equals(t, restic.DataBlob, b.Type) + rtest.Equals(t, entrySize, size) + t.Logf("%v %v", c.ID, b.ID) + rtest.Equals(t, c.ID[:], b.ID[:]) + rtest.Equals(t, uint(c.Length), b.Length) + rtest.Equals(t, uint(c.UncompressedLength), b.UncompressedLength) +} + +func TestParseHeaderEntryErrors(t *testing.T) { + h := headerEntry{ + Type: 0, // Blob + Length: 100, + } + for i := range h.ID { + h.ID[i] = byte(i) + } h.Type = 0xae - buf.Reset() + buf := new(bytes.Buffer) _ = binary.Write(buf, binary.LittleEndian, &h) - b, _, err = parseHeaderEntry(buf.Bytes()) + _, _, err := parseHeaderEntry(buf.Bytes()) rtest.Assert(t, err != nil, "no error for invalid type") h.Type = 0 buf.Reset() _ = binary.Write(buf, binary.LittleEndian, &h) - b, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1]) + _, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1]) rtest.Assert(t, err != nil, "no error for short input") } From 4b01b06f2f8ccb08ed1c93d5964c515e93dba392 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 30 Apr 2022 00:31:55 +0200 Subject: [PATCH 22/23] repository: Test compressed blobs in StreamPack --- internal/repository/repository_test.go | 47 +++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index f2daaf194..497fd2906 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/klauspost/compress/zstd" "github.com/restic/restic/internal/archiver" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/repository" @@ -461,10 +462,31 @@ func testRepositoryIncrementalIndex(t *testing.T, version uint) { } // buildPackfileWithoutHeader returns a manually built pack file without a header. -func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) (blobs []restic.Blob, packfile []byte) { +func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key, compress bool) (blobs []restic.Blob, packfile []byte) { + opts := []zstd.EOption{ + // Set the compression level configured. + zstd.WithEncoderLevel(zstd.SpeedDefault), + // Disable CRC, we have enough checks in place, makes the + // compressed data four bytes shorter. + zstd.WithEncoderCRC(false), + // Set a window of 512kbyte, so we have good lookbehind for usual + // blob sizes. + zstd.WithWindowSize(512 * 1024), + } + enc, err := zstd.NewWriter(nil, opts...) + if err != nil { + panic(err) + } + var offset uint for i, size := range blobSizes { plaintext := test.Random(800+i, size) + id := restic.Hash(plaintext) + uncompressedLength := uint(0) + if compress { + uncompressedLength = uint(len(plaintext)) + plaintext = enc.EncodeAll(plaintext, nil) + } // we use a deterministic nonce here so the whole process is // deterministic, last byte is the blob index @@ -482,11 +504,12 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) blobs = append(blobs, restic.Blob{ BlobHandle: restic.BlobHandle{ - ID: restic.Hash(plaintext), Type: restic.DataBlob, + ID: id, }, - Length: uint(ciphertextLength), - Offset: offset, + Length: uint(ciphertextLength), + UncompressedLength: uncompressedLength, + Offset: offset, }) offset = uint(len(packfile)) @@ -496,6 +519,10 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) } func TestStreamPack(t *testing.T) { + repository.TestAllVersions(t, testStreamPack) +} + +func testStreamPack(t *testing.T, version uint) { // always use the same key for deterministic output const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}` @@ -520,7 +547,17 @@ func TestStreamPack(t *testing.T) { 18883, } - packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key) + var compress bool + switch version { + case 1: + compress = false + case 2: + compress = true + default: + t.Fatal("test does not suport repository version", version) + } + + packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key, compress) load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error { data := packfile From dc5adef255ea31d37ed339ad9c93edbd0966ffe4 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 30 Apr 2022 10:04:09 +0200 Subject: [PATCH 23/23] Add documentation for --repository-version --- doc/030_preparing_a_new_repo.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/030_preparing_a_new_repo.rst b/doc/030_preparing_a_new_repo.rst index d1644cf82..5fe1a86bb 100644 --- a/doc/030_preparing_a_new_repo.rst +++ b/doc/030_preparing_a_new_repo.rst @@ -35,6 +35,13 @@ options exist: * Configuring a program to be called when the password is needed via the option ``--password-command`` or the environment variable ``RESTIC_PASSWORD_COMMAND`` + + * The ``init`` command has an option called ``--repository-version`` which can + be used to explicitely set the version for the new repository. By default, + the current stable version is used. Have a look at the `design documentation + `__ for + details. + Local ***** @@ -692,4 +699,3 @@ On MSYS2, you can install ``winpty`` as follows: $ pacman -S winpty $ winpty restic -r /srv/restic-repo init -