diff --git a/changelog/unreleased/issue-21 b/changelog/unreleased/issue-21 new file mode 100644 index 000000000..0a3040add --- /dev/null +++ b/changelog/unreleased/issue-21 @@ -0,0 +1,21 @@ +Enhancement: Add comppression support + +We have added compression support to the restic repository format. To create a +repository using the new format run `init --repository-version 2`. Please note +that the repository cannot be read by restic versions prior to 0.14.0. + +You can configure if data is compressed with the option `--compression`. It can +be set to `auto` (the default, which will compress very fast), `max` (which +will trade backup speed and CPU usage for better compression), or `off` (which +disables compression). Each setting is only applied for the single run of restic. + +The new format version has not received much testing yet. Do not rely on it as +your only backup copy! Please run `check` in regular intervals to detect any +problems. + +Upgrading in place is not yet supported. As a workaround, first create a new +repository using `init --repository-version 2 --copy-chunker-params --repo2 path/to/old/repo`. +Then use the `copy` command to copy all snapshots to the new repository. + +https://github.com/restic/restic/issues/21 +https://github.com/restic/restic/pull/3666 diff --git a/cmd/restic/cmd_debug.go b/cmd/restic/cmd_debug.go index 7947f789f..4c856c8ac 100644 --- a/cmd/restic/cmd_debug.go +++ b/cmd/restic/cmd_debug.go @@ -15,6 +15,7 @@ import ( "sort" "time" + "github.com/klauspost/compress/zstd" "github.com/spf13/cobra" "golang.org/x/sync/errgroup" @@ -309,6 +310,10 @@ func decryptUnsigned(ctx context.Context, k *crypto.Key, buf []byte) []byte { } func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list []restic.Blob) error { + dec, err := zstd.NewReader(nil) + if err != nil { + panic(err) + } be := repo.Backend() h := restic.Handle{ Name: pack.String(), @@ -333,44 +338,47 @@ func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list nonce, plaintext := buf[:key.NonceSize()], buf[key.NonceSize():] plaintext, err = key.Open(plaintext[:0], nonce, plaintext, nil) + outputPrefix := "" + filePrefix := "" if err != nil { Warnf("error decrypting blob: %v\n", err) - var plain []byte if tryRepair || repairByte { - plain = tryRepairWithBitflip(ctx, key, buf, repairByte) + plaintext = tryRepairWithBitflip(ctx, key, buf, repairByte) } - var prefix string - if plain != nil { - id := restic.Hash(plain) - if !id.Equal(blob.ID) { - Printf(" repaired blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plain), id, blob.ID) - prefix = "repaired-wrong-hash-" - } else { - Printf(" successfully repaired blob (length %v), hash is %v, ID matches\n", len(plain), id) - prefix = "repaired-" - } + if plaintext != nil { + outputPrefix = "repaired " + filePrefix = "repaired-" } else { - plain = decryptUnsigned(ctx, key, buf) - prefix = "damaged-" + plaintext = decryptUnsigned(ctx, key, buf) + err = storePlainBlob(blob.ID, "damaged-", plaintext) + if err != nil { + return err + } + continue } - err = storePlainBlob(blob.ID, prefix, plain) + } + + if blob.IsCompressed() { + decompressed, err := dec.DecodeAll(plaintext, nil) if err != nil { - return err + Printf(" failed to decompress blob %v\n", blob.ID) + } + if decompressed != nil { + plaintext = decompressed } - continue } id := restic.Hash(plaintext) var prefix string if !id.Equal(blob.ID) { - Printf(" successfully decrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plaintext), id, blob.ID) + Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", outputPrefix, len(plaintext), id, blob.ID) prefix = "wrong-hash-" } else { - Printf(" successfully decrypted blob (length %v), hash is %v, ID matches\n", len(plaintext), id) + Printf(" successfully %vdecrypted blob (length %v), hash is %v, ID matches\n", outputPrefix, len(plaintext), id) prefix = "correct-" } if extractPack { - err = storePlainBlob(id, prefix, plaintext) + err = storePlainBlob(id, filePrefix+prefix, plaintext) if err != nil { return err } @@ -476,27 +484,15 @@ func examinePack(ctx context.Context, repo restic.Repository, id restic.ID) erro blobsLoaded := false // examine all data the indexes have for the pack file - for _, idx := range repo.Index().(*repository.MasterIndex).All() { - idxIDs, err := idx.IDs() - if err != nil { - idxIDs = restic.IDs{} - } - - blobs := idx.ListPack(id) + for b := range repo.Index().ListPacks(ctx, restic.NewIDSet(id)) { + blobs := b.Blobs if len(blobs) == 0 { continue } - Printf(" index %v:\n", idxIDs) + checkPackSize(blobs, fi.Size) - // convert list of blobs to []restic.Blob - var list []restic.Blob - for _, b := range blobs { - list = append(list, b.Blob) - } - checkPackSize(list, fi.Size) - - err = loadBlobs(ctx, repo, id, list) + err = loadBlobs(ctx, repo, id, blobs) if err != nil { Warnf("error: %v\n", err) } else { @@ -532,14 +528,10 @@ func checkPackSize(blobs []restic.Blob, fileSize int64) { if offset != uint64(pb.Offset) { Printf(" hole in file, want offset %v, got %v\n", offset, pb.Offset) } - offset += uint64(pb.Length) + offset = uint64(pb.Offset + pb.Length) size += uint64(pb.Length) } - - // compute header size, per blob: 1 byte type, 4 byte length, 32 byte id - size += uint64(restic.CiphertextLength(len(blobs) * (1 + 4 + 32))) - // length in uint32 little endian - size += 4 + size += uint64(pack.CalculateHeaderSize(blobs)) if uint64(fileSize) != size { Printf(" file sizes do not match: computed %v from index, file size is %v\n", size, fileSize) diff --git a/cmd/restic/cmd_init.go b/cmd/restic/cmd_init.go index bbab3711d..058f1ed07 100644 --- a/cmd/restic/cmd_init.go +++ b/cmd/restic/cmd_init.go @@ -1,10 +1,13 @@ package main import ( + "strconv" + "github.com/restic/chunker" "github.com/restic/restic/internal/backend/location" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/restic" "github.com/spf13/cobra" ) @@ -30,6 +33,7 @@ Exit status is 0 if the command was successful, and non-zero if there was any er type InitOptions struct { secondaryRepoOptions CopyChunkerParameters bool + RepositoryVersion string } var initOptions InitOptions @@ -40,9 +44,26 @@ func init() { f := cmdInit.Flags() initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from") f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)") + f.StringVar(&initOptions.RepositoryVersion, "repository-version", "stable", "repository format version to use, allowed values are a format version, 'latest' and 'stable'") } func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { + var version uint + if opts.RepositoryVersion == "latest" || opts.RepositoryVersion == "" { + version = restic.MaxRepoVersion + } else if opts.RepositoryVersion == "stable" { + version = restic.StableRepoVersion + } else { + v, err := strconv.ParseUint(opts.RepositoryVersion, 10, 32) + if err != nil { + return errors.Fatal("invalid repository version") + } + version = uint(v) + } + if version < restic.MinRepoVersion || version > restic.MaxRepoVersion { + return errors.Fatalf("only repository versions between %v and %v are allowed", restic.MinRepoVersion, restic.MaxRepoVersion) + } + chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts) if err != nil { return err @@ -65,9 +86,9 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { return errors.Fatalf("create repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err) } - s := repository.New(be) + s := repository.New(be, repository.Options{Compression: gopts.Compression}) - err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial) + err = s.Init(gopts.ctx, version, gopts.password, chunkerPolynomial) if err != nil { return errors.Fatalf("create key in repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err) } diff --git a/cmd/restic/global.go b/cmd/restic/global.go index f4a2df1b5..65dbbb6be 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -64,6 +64,7 @@ type GlobalOptions struct { InsecureTLS bool TLSClientCert string CleanupCache bool + Compression repository.CompressionMode LimitUploadKb int LimitDownloadKb int @@ -120,6 +121,7 @@ func init() { f.StringVar(&globalOptions.TLSClientCert, "tls-client-cert", "", "path to a `file` containing PEM encoded TLS client certificate and private key") f.BoolVar(&globalOptions.InsecureTLS, "insecure-tls", false, "skip TLS certificate verification when connecting to the repo (insecure)") f.BoolVar(&globalOptions.CleanupCache, "cleanup-cache", false, "auto remove old cache directories") + f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repo format version 2), one of (auto|off|max)") f.IntVar(&globalOptions.LimitUploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)") f.IntVar(&globalOptions.LimitDownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") @@ -435,7 +437,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { } } - s := repository.New(be) + s := repository.New(be, repository.Options{Compression: opts.Compression}) passwordTriesLeft := 1 if stdinIsTerminal() && opts.password == "" { @@ -471,7 +473,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { id = id[:8] } if !opts.JSON { - Verbosef("repository %v opened successfully, password is correct\n", id) + Verbosef("repository %v opened (repo version %v) successfully, password is correct\n", id, s.Config().Version) } } diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 49121bb1d..ebf63e930 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1470,7 +1470,7 @@ func TestRebuildIndexAlwaysFull(t *testing.T) { defer func() { repository.IndexFull = indexFull }() - repository.IndexFull = func(*repository.Index) bool { return true } + repository.IndexFull = func(*repository.Index, bool) bool { return true } testRebuildIndex(t, nil) } diff --git a/doc/030_preparing_a_new_repo.rst b/doc/030_preparing_a_new_repo.rst index d1644cf82..5fe1a86bb 100644 --- a/doc/030_preparing_a_new_repo.rst +++ b/doc/030_preparing_a_new_repo.rst @@ -35,6 +35,13 @@ options exist: * Configuring a program to be called when the password is needed via the option ``--password-command`` or the environment variable ``RESTIC_PASSWORD_COMMAND`` + + * The ``init`` command has an option called ``--repository-version`` which can + be used to explicitely set the version for the new repository. By default, + the current stable version is used. Have a look at the `design documentation + `__ for + details. + Local ***** @@ -692,4 +699,3 @@ On MSYS2, you can install ``winpty`` as follows: $ pacman -S winpty $ winpty restic -r /srv/restic-repo init - diff --git a/doc/design.rst b/doc/design.rst index aad70e1f7..17ab4c1b5 100644 --- a/doc/design.rst +++ b/doc/design.rst @@ -62,28 +62,30 @@ like the following: .. code:: json { - "version": 1, + "version": 2, "id": "5956a3f67a6230d4a92cefb29529f10196c7d92582ec305fd71ff6d331d6271b", "chunker_polynomial": "25b468838dcb75" } After decryption, restic first checks that the version field contains a -version number that it understands, otherwise it aborts. At the moment, -the version is expected to be 1. The field ``id`` holds a unique ID -which consists of 32 random bytes, encoded in hexadecimal. This uniquely -identifies the repository, regardless if it is accessed via SFTP or -locally. The field ``chunker_polynomial`` contains a parameter that is -used for splitting large files into smaller chunks (see below). +version number that it understands, otherwise it aborts. At the moment, the +version is expected to be 1 or 2. The list of changes in the repository +format is contained in the section "Changes" below. + +The field ``id`` holds a unique ID which consists of 32 random bytes, encoded +in hexadecimal. This uniquely identifies the repository, regardless if it is +accessed via a remote storage backend or locally. The field +``chunker_polynomial`` contains a parameter that is used for splitting large +files into smaller chunks (see below). Repository Layout ----------------- The ``local`` and ``sftp`` backends are implemented using files and directories stored in a file system. The directory layout is the same -for both backend types. +for both backend types and is also used for all other remote backends. -The basic layout of a repository stored in a ``local`` or ``sftp`` -backend is shown here: +The basic layout of a repository is shown here: :: @@ -109,8 +111,7 @@ backend is shown here: │ └── 22a5af1bdc6e616f8a29579458c49627e01b32210d09adb288d1ecda7c5711ec └── tmp -A local repository can be initialized with the ``restic init`` command, -e.g.: +A local repository can be initialized with the ``restic init`` command, e.g.: .. code-block:: console @@ -186,40 +187,75 @@ After decryption, a Pack's header consists of the following elements: :: - Type_Blob1 || Length(EncryptedBlob1) || Hash(Plaintext_Blob1) || + Type_Blob1 || Data_Blob1 || [...] - Type_BlobN || Length(EncryptedBlobN) || Hash(Plaintext_Blobn) || + Type_BlobN || Data_BlobN || + +The Blob type field is a single byte. What follows it depends on the type. The +following Blob types are defined: + ++-----------+----------------------+-------------------------------------------------------------------------------+ +| Type | Meaning | Data | ++===========+======================+===============================================================================+ +| 0b00 | data blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ +| 0b01 | tree blob | ``Length(encrypted_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ +| 0b10 | compressed data blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ +| 0b11 | compressed tree blob | ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` | ++-----------+----------------------+-------------------------------------------------------------------------------+ This is enough to calculate the offsets for all the Blobs in the Pack. -Length is the length of a Blob as a four byte integer in little-endian -format. The type field is a one byte field and labels the content of a -blob according to the following table: +The length fields are encoded as four byte integers in little-endian +format. In the Data column, ``Length(plaintext_blob)`` means the length +of the decrypted and uncompressed data a blob consists of. -+--------+-----------+ -| Type | Meaning | -+========+===========+ -| 0 | data | -+--------+-----------+ -| 1 | tree | -+--------+-----------+ +All other types are invalid, more types may be added in the future. The +compressed types are only valid for repository format version 2. Data and +tree blobs may be compressed with the zstandard compression algorithm. -All other types are invalid, more types may be added in the future. +In repository format version 1, data and tree blobs should be stored in +separate pack files. In version 2, they must be stored in separate files. +Compressed and non-compress blobs of the same type may be mixed in a pack +file. For reconstructing the index or parsing a pack without an index, first the last four bytes must be read in order to find the length of the header. Afterwards, the header can be read and parsed, which yields all plaintext hashes, types, offsets and lengths of all included blobs. +Unpacked Data Format +==================== + +Individual files for the index, locks or snapshots are encrypted +and authenticated like Data and Tree Blobs, so the outer structure is +``IV || Ciphertext || MAC`` again. In repository format version 1 the +plaintext always consists of a JSON document which must either be an +object or an array. + +Repository format version 2 adds support for compression. The plaintext +now starts with a header to indicate the encoding version to distinguish +it from plain JSON and to allow for further evolution of the storage format: +``encoding_version || data`` +The ``encoding_version`` field is encoded as one byte. +For backwards compatibility the encoding versions '[' (0x5b) and '{' (0x7b) +are used to mark that the whole plaintext (including the encoding version +byte) should treated as JSON document. + +For new data the encoding version is currently always ``2``. For that +version ``data`` contains a JSON document compressed using the zstandard +compression algorithm. + Indexing ======== Index files contain information about Data and Tree Blobs and the Packs they are contained in and store this information in the repository. When the local cached index is not accessible any more, the index files can -be downloaded and used to reconstruct the index. The files are encrypted -and authenticated like Data and Tree Blobs, so the outer structure is -``IV || Ciphertext || MAC`` again. The plaintext consists of a JSON -document like the following: +be downloaded and used to reconstruct the index. The file encoding is +described in the "Unpacked Data Format" section. The plaintext consists +of a JSON document like the following: .. code:: json @@ -235,18 +271,22 @@ document like the following: "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "type": "data", "offset": 0, - "length": 25 - },{ + "length": 38, + // no 'uncompressed_length' as blob is not compressed + }, + { "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "type": "tree", "offset": 38, - "length": 100 + "length": 112, + "uncompressed_length": 511, }, { "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", "type": "data", "offset": 150, - "length": 123 + "length": 123, + "uncompressed_length": 234, } ] }, [...] @@ -255,7 +295,11 @@ document like the following: This JSON document lists Packs and the blobs contained therein. In this example, the Pack ``73d04e61`` contains two data Blobs and one Tree -blob, the plaintext hashes are listed afterwards. +blob, the plaintext hashes are listed afterwards. The ``length`` field +corresponds to ``Length(encrypted_blob)`` in the pack file header. +Field ``uncompressed_length`` is only present for compressed blobs and +therefore is never present in version 1. It is set to the value of +``Length(blob)``. The field ``supersedes`` lists the storage IDs of index files that have been replaced with the current index file. This happens when index files @@ -350,8 +394,9 @@ Snapshots A snapshot represents a directory with all files and sub-directories at a given point in time. For each backup that is made, a new snapshot is -created. A snapshot is a JSON document that is stored in an encrypted -file below the directory ``snapshots`` in the repository. The filename +created. A snapshot is a JSON document that is stored in a file below +the directory ``snapshots`` in the repository. It uses the file encoding +described in the "Unpacked Data Format" section. The filename is the storage ID. This string is unique and used within restic to uniquely identify a snapshot. @@ -412,7 +457,7 @@ Blobs of data. The SHA-256 hashes of all Blobs are saved in an ordered list which then represents the content of the file. In order to relate these plaintext hashes to the actual location within -a Pack file , an index is used. If the index is not available, the +a Pack file, an index is used. If the index is not available, the header of all data Blobs can be read. Trees and Data @@ -517,8 +562,8 @@ time there must not be any other locks (exclusive and non-exclusive). There may be multiple non-exclusive locks in parallel. A lock is a file in the subdir ``locks`` whose filename is the storage -ID of the contents. It is encrypted and authenticated the same way as -other files in the repository and contains the following JSON structure: +ID of the contents. It is stored in the file encoding described in the +"Unpacked Data Format" section and contains the following JSON structure: .. code:: json @@ -721,3 +766,11 @@ An adversary who has a leaked (decrypted) key for a repository could: only be done using the ``copy`` command, which moves the data into a new repository with a new master key, or by making a completely new repository and new backup. + +Changes +======= + +Repository Version 2 +-------------------- + + * Support compression for blobs (data/tree) and index / lock / snapshot files diff --git a/go.mod b/go.mod index b5d72874f..a20ae44a1 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/hashicorp/golang-lru v0.5.4 github.com/json-iterator/go v1.1.12 // indirect github.com/juju/ratelimit v1.0.1 - github.com/klauspost/compress v1.15.1 // indirect + github.com/klauspost/compress v1.15.1 github.com/klauspost/cpuid/v2 v2.0.12 // indirect github.com/kurin/blazer v0.5.4-0.20211030221322-ba894c124ac6 github.com/minio/md5-simd v1.1.2 // indirect diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index e18156ceb..0d6295c39 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -1894,7 +1894,7 @@ func TestArchiverContextCanceled(t *testing.T) { defer removeTempdir() // Ensure that the archiver itself reports the canceled context and not just the backend - repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}) + repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}, 0) back := restictest.Chdir(t, tempdir) defer back() diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 1330211eb..2a4384b15 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -350,7 +350,7 @@ func TestCheckerModifiedData(t *testing.T) { t.Logf("archived as %v", sn.ID().Str()) beError := &errorBackend{Backend: repo.Backend()} - checkRepo := repository.New(beError) + checkRepo := repository.New(beError, repository.Options{}) test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, "")) chkr := checker.New(checkRepo, false) diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 9fa209054..2d7a5c3fb 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -32,7 +32,7 @@ func NewPacker(k *crypto.Key, wr io.Writer) *Packer { // Add saves the data read from rd as a new blob to the packer. Returned is the // number of bytes written to the pack. -func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) { +func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte, uncompressedLength int) (int, error) { p.m.Lock() defer p.m.Unlock() @@ -41,13 +41,15 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) n, err := p.wr.Write(data) c.Length = uint(n) c.Offset = p.bytes + c.UncompressedLength = uint(uncompressedLength) p.bytes += uint(n) p.blobs = append(p.blobs, c) return n, errors.Wrap(err, "Write") } -var entrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) +var entrySize = uint(binary.Size(restic.BlobType(0)) + 2*headerLengthSize + len(restic.ID{})) +var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) // headerEntry describes the format of header entries. It serves only as // documentation. @@ -57,6 +59,15 @@ type headerEntry struct { ID restic.ID } +// compressedHeaderEntry describes the format of header entries for compressed blobs. +// It serves only as documentation. +type compressedHeaderEntry struct { + Type uint8 + Length uint32 + UncompressedLength uint32 + ID restic.ID +} + // Finalize writes the header for all added blobs and finalizes the pack. // Returned are the number of bytes written, including the header. func (p *Packer) Finalize() (uint, error) { @@ -70,7 +81,7 @@ func (p *Packer) Finalize() (uint, error) { return 0, err } - encryptedHeader := make([]byte, 0, len(header)+p.k.Overhead()+p.k.NonceSize()) + encryptedHeader := make([]byte, 0, restic.CiphertextLength(len(header))) nonce := crypto.NewRandomNonce() encryptedHeader = append(encryptedHeader, nonce...) encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil) @@ -81,7 +92,7 @@ func (p *Packer) Finalize() (uint, error) { return 0, errors.Wrap(err, "Write") } - hdrBytes := restic.CiphertextLength(len(header)) + hdrBytes := len(encryptedHeader) if n != hdrBytes { return 0, errors.New("wrong number of bytes written") } @@ -104,11 +115,15 @@ func (p *Packer) makeHeader() ([]byte, error) { buf := make([]byte, 0, len(p.blobs)*int(entrySize)) for _, b := range p.blobs { - switch b.Type { - case restic.DataBlob: + switch { + case b.Type == restic.DataBlob && b.UncompressedLength == 0: buf = append(buf, 0) - case restic.TreeBlob: + case b.Type == restic.TreeBlob && b.UncompressedLength == 0: buf = append(buf, 1) + case b.Type == restic.DataBlob && b.UncompressedLength != 0: + buf = append(buf, 2) + case b.Type == restic.TreeBlob && b.UncompressedLength != 0: + buf = append(buf, 3) default: return nil, errors.Errorf("invalid blob type %v", b.Type) } @@ -116,6 +131,10 @@ func (p *Packer) makeHeader() ([]byte, error) { var lenLE [4]byte binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length)) buf = append(buf, lenLE[:]...) + if b.UncompressedLength != 0 { + binary.LittleEndian.PutUint32(lenLE[:], uint32(b.UncompressedLength)) + buf = append(buf, lenLE[:]...) + } buf = append(buf, b.ID[:]...) } @@ -152,7 +171,7 @@ func (p *Packer) String() string { var ( // we require at least one entry in the header, and one blob for a pack file - minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) + minFileSize = plainEntrySize + crypto.Extension + uint(headerLengthSize) ) const ( @@ -167,16 +186,11 @@ const ( eagerEntries = 15 ) -// readRecords reads up to max records from the underlying ReaderAt, returning -// the raw header, the total number of records in the header, and any error. -// If the header contains fewer than max entries, the header is truncated to +// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning +// the raw header, the total number of bytes in the header, and any error. +// If the header contains fewer than bufsize bytes, the header is truncated to // the appropriate size. -func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { - var bufsize int - bufsize += max * int(entrySize) - bufsize += crypto.Extension - bufsize += headerLengthSize - +func readRecords(rd io.ReaderAt, size int64, bufsize int) ([]byte, int, error) { if bufsize > int(size) { bufsize = int(size) } @@ -197,8 +211,6 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { err = InvalidFileError{Message: "header length is zero"} case hlen < crypto.Extension: err = InvalidFileError{Message: "header length is too small"} - case (hlen-crypto.Extension)%uint32(entrySize) != 0: - err = InvalidFileError{Message: "header length is invalid"} case int64(hlen) > size-int64(headerLengthSize): err = InvalidFileError{Message: "header is larger than file"} case int64(hlen) > MaxHeaderSize-int64(headerLengthSize): @@ -208,8 +220,8 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { return nil, 0, errors.Wrap(err, "readHeader") } - total := (int(hlen) - crypto.Extension) / int(entrySize) - if total < max { + total := int(hlen + headerLengthSize) + if total < bufsize { // truncate to the beginning of the pack header b = b[len(b)-int(hlen):] } @@ -230,11 +242,12 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { // eagerly download eagerEntries header entries as part of header-length request. // only make second request if actual number of entries is greater than eagerEntries - b, c, err := readRecords(rd, size, eagerEntries) + eagerSize := eagerEntries*int(entrySize) + headerSize + b, c, err := readRecords(rd, size, eagerSize) if err != nil { return nil, err } - if c <= eagerEntries { + if c <= eagerSize { // eager read sufficed, return what we got return b, nil } @@ -262,7 +275,7 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr return nil, 0, err } - if len(buf) < k.NonceSize()+k.Overhead() { + if len(buf) < restic.CiphertextLength(0) { return nil, 0, errors.New("invalid header, too small") } @@ -274,11 +287,12 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr return nil, 0, err } - entries = make([]restic.Blob, 0, uint(len(buf))/entrySize) + // might over allocate a bit if all blobs have EntrySize but only by a few percent + entries = make([]restic.Blob, 0, uint(len(buf))/plainEntrySize) pos := uint(0) for len(buf) > 0 { - entry, err := parseHeaderEntry(buf) + entry, headerSize, err := parseHeaderEntry(buf) if err != nil { return nil, 0, err } @@ -286,36 +300,60 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr entries = append(entries, entry) pos += entry.Length - buf = buf[entrySize:] + buf = buf[headerSize:] } return entries, hdrSize, nil } -func parseHeaderEntry(p []byte) (b restic.Blob, err error) { - if uint(len(p)) < entrySize { +func parseHeaderEntry(p []byte) (b restic.Blob, size uint, err error) { + l := uint(len(p)) + size = plainEntrySize + if l < plainEntrySize { err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) - return b, err + return b, size, err } - p = p[:entrySize] + tpe := p[0] - switch p[0] { - case 0: + switch tpe { + case 0, 2: b.Type = restic.DataBlob - case 1: + case 1, 3: b.Type = restic.TreeBlob default: - return b, errors.Errorf("invalid type %d", p[0]) + return b, size, errors.Errorf("invalid type %d", tpe) } b.Length = uint(binary.LittleEndian.Uint32(p[1:5])) - copy(b.ID[:], p[5:]) + p = p[5:] + if tpe == 2 || tpe == 3 { + size = entrySize + if l < entrySize { + err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) + return b, size, err + } + b.UncompressedLength = uint(binary.LittleEndian.Uint32(p[0:4])) + p = p[4:] + } - return b, nil + copy(b.ID[:], p[:]) + + return b, size, nil +} + +func CalculateEntrySize(blob restic.Blob) int { + if blob.UncompressedLength != 0 { + return int(entrySize) + } + return int(plainEntrySize) } func CalculateHeaderSize(blobs []restic.Blob) int { - return headerSize + len(blobs)*int(entrySize) + size := headerSize + for _, blob := range blobs { + size += CalculateEntrySize(blob) + } + return size } // Size returns the size of all packs computed by index information. @@ -333,7 +371,7 @@ func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.I if !onlyHdr { size += int64(blob.Length) } - packSize[blob.PackID] = size + int64(entrySize) + packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob)) } return packSize diff --git a/internal/pack/pack_internal_test.go b/internal/pack/pack_internal_test.go index b8078c829..c1a4867ea 100644 --- a/internal/pack/pack_internal_test.go +++ b/internal/pack/pack_internal_test.go @@ -13,7 +13,7 @@ import ( func TestParseHeaderEntry(t *testing.T) { h := headerEntry{ - Type: 0, // Blob. + Type: 0, // Blob Length: 100, } for i := range h.ID { @@ -23,25 +23,58 @@ func TestParseHeaderEntry(t *testing.T) { buf := new(bytes.Buffer) _ = binary.Write(buf, binary.LittleEndian, &h) - b, err := parseHeaderEntry(buf.Bytes()) + b, size, err := parseHeaderEntry(buf.Bytes()) rtest.OK(t, err) rtest.Equals(t, restic.DataBlob, b.Type) + rtest.Equals(t, plainEntrySize, size) t.Logf("%v %v", h.ID, b.ID) - rtest.Assert(t, bytes.Equal(h.ID[:], b.ID[:]), "id mismatch") + rtest.Equals(t, h.ID[:], b.ID[:]) rtest.Equals(t, uint(h.Length), b.Length) + rtest.Equals(t, uint(0), b.UncompressedLength) + + c := compressedHeaderEntry{ + Type: 2, // compressed Blob + Length: 100, + UncompressedLength: 200, + } + for i := range c.ID { + c.ID[i] = byte(i) + } + + buf = new(bytes.Buffer) + _ = binary.Write(buf, binary.LittleEndian, &c) + + b, size, err = parseHeaderEntry(buf.Bytes()) + rtest.OK(t, err) + rtest.Equals(t, restic.DataBlob, b.Type) + rtest.Equals(t, entrySize, size) + t.Logf("%v %v", c.ID, b.ID) + rtest.Equals(t, c.ID[:], b.ID[:]) + rtest.Equals(t, uint(c.Length), b.Length) + rtest.Equals(t, uint(c.UncompressedLength), b.UncompressedLength) +} + +func TestParseHeaderEntryErrors(t *testing.T) { + h := headerEntry{ + Type: 0, // Blob + Length: 100, + } + for i := range h.ID { + h.ID[i] = byte(i) + } h.Type = 0xae - buf.Reset() + buf := new(bytes.Buffer) _ = binary.Write(buf, binary.LittleEndian, &h) - b, err = parseHeaderEntry(buf.Bytes()) + _, _, err := parseHeaderEntry(buf.Bytes()) rtest.Assert(t, err != nil, "no error for invalid type") h.Type = 0 buf.Reset() _ = binary.Write(buf, binary.LittleEndian, &h) - b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1]) + _, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1]) rtest.Assert(t, err != nil, "no error for short input") } @@ -97,7 +130,8 @@ func TestReadHeaderEagerLoad(t *testing.T) { func TestReadRecords(t *testing.T) { testReadRecords := func(dataSize, entryCount, totalRecords int) { totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension) - off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension) + bufSize := entryCount*int(entrySize) + crypto.Extension + off := len(totalHeader) - bufSize if off < 0 { off = 0 } @@ -110,10 +144,10 @@ func TestReadRecords(t *testing.T) { rd := bytes.NewReader(buf.Bytes()) - header, count, err := readRecords(rd, int64(rd.Len()), entryCount) + header, count, err := readRecords(rd, int64(rd.Len()), bufSize+4) rtest.OK(t, err) + rtest.Equals(t, len(totalHeader)+4, count) rtest.Equals(t, expectedHeader, header) - rtest.Equals(t, totalRecords, count) } // basic diff --git a/internal/pack/pack_test.go b/internal/pack/pack_test.go index 2b7ec7fea..6170e807c 100644 --- a/internal/pack/pack_test.go +++ b/internal/pack/pack_test.go @@ -38,7 +38,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) { var buf bytes.Buffer p := pack.NewPacker(k, &buf) for _, b := range bufs { - _, err := p.Add(restic.TreeBlob, b.id, b.data) + _, err := p.Add(restic.TreeBlob, b.id, b.data, 2*len(b.data)) rtest.OK(t, err) } diff --git a/internal/repository/index.go b/internal/repository/index.go index 3db19b3b8..520fcbd8e 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -75,12 +75,12 @@ const maxuint32 = 1<<32 - 1 func (idx *Index) store(packIndex int, blob restic.Blob) { // assert that offset and length fit into uint32! - if blob.Offset > maxuint32 || blob.Length > maxuint32 { + if blob.Offset > maxuint32 || blob.Length > maxuint32 || blob.UncompressedLength > maxuint32 { panic("offset or length does not fit in uint32. You have packs > 4GB!") } m := &idx.byType[blob.Type] - m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length)) + m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length), uint32(blob.UncompressedLength)) } // Final returns true iff the index is already written to the repository, it is @@ -93,12 +93,13 @@ func (idx *Index) Final() bool { } const ( - indexMaxBlobs = 50000 - indexMaxAge = 10 * time.Minute + indexMaxBlobs = 50000 + indexMaxBlobsCompressed = 3 * indexMaxBlobs + indexMaxAge = 10 * time.Minute ) // IndexFull returns true iff the index is "full enough" to be saved as a preliminary index. -var IndexFull = func(idx *Index) bool { +var IndexFull = func(idx *Index, compress bool) bool { idx.m.Lock() defer idx.m.Unlock() @@ -109,12 +110,18 @@ var IndexFull = func(idx *Index) bool { blobs += idx.byType[typ].len() } age := time.Since(idx.created) + var maxBlobs uint + if compress { + maxBlobs = indexMaxBlobsCompressed + } else { + maxBlobs = indexMaxBlobs + } switch { case age >= indexMaxAge: debug.Log("index %p is old enough", idx, age) return true - case blobs >= indexMaxBlobs: + case blobs >= maxBlobs: debug.Log("index %p has %d blobs", idx, blobs) return true } @@ -169,8 +176,9 @@ func (idx *Index) toPackedBlob(e *indexEntry, t restic.BlobType) restic.PackedBl BlobHandle: restic.BlobHandle{ ID: e.id, Type: t}, - Length: uint(e.length), - Offset: uint(e.offset), + Length: uint(e.length), + Offset: uint(e.offset), + UncompressedLength: uint(e.uncompressedLength), }, PackID: idx.packs[e.packIndex], } @@ -225,6 +233,9 @@ func (idx *Index) LookupSize(bh restic.BlobHandle) (plaintextLength uint, found if e == nil { return 0, false } + if e.uncompressedLength != 0 { + return uint(e.uncompressedLength), true + } return uint(restic.PlaintextLength(int(e.length))), true } @@ -357,10 +368,11 @@ type packJSON struct { } type blobJSON struct { - ID restic.ID `json:"id"` - Type restic.BlobType `json:"type"` - Offset uint `json:"offset"` - Length uint `json:"length"` + ID restic.ID `json:"id"` + Type restic.BlobType `json:"type"` + Offset uint `json:"offset"` + Length uint `json:"length"` + UncompressedLength uint `json:"uncompressed_length,omitempty"` } // generatePackList returns a list of packs. @@ -391,10 +403,11 @@ func (idx *Index) generatePackList() ([]*packJSON, error) { // add blob p.Blobs = append(p.Blobs, blobJSON{ - ID: e.id, - Type: restic.BlobType(typ), - Offset: uint(e.offset), - Length: uint(e.length), + ID: e.id, + Type: restic.BlobType(typ), + Offset: uint(e.offset), + Length: uint(e.length), + UncompressedLength: uint(e.uncompressedLength), }) return true @@ -553,7 +566,7 @@ func (idx *Index) merge(idx2 *Index) error { m2.foreach(func(e2 *indexEntry) bool { if !hasIdenticalEntry(e2) { // packIndex needs to be changed as idx2.pack was appended to idx.pack, see above - m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length) + m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length, e2.uncompressedLength) } return true }) @@ -601,8 +614,9 @@ func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err erro BlobHandle: restic.BlobHandle{ Type: blob.Type, ID: blob.ID}, - Offset: blob.Offset, - Length: blob.Length, + Offset: blob.Offset, + Length: blob.Length, + UncompressedLength: blob.UncompressedLength, }) switch blob.Type { @@ -648,6 +662,7 @@ func decodeOldIndex(buf []byte) (idx *Index, err error) { ID: blob.ID}, Offset: blob.Offset, Length: blob.Length, + // no compressed length in the old index format }) switch blob.Type { diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index c4f0179db..6940afe2f 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -23,11 +23,17 @@ func TestIndexSerialize(t *testing.T) { pos := uint(0) for j := 0; j < 20; j++ { length := uint(i*100 + j) + uncompressedLength := uint(0) + if i >= 25 { + // test a mix of compressed and uncompressed packs + uncompressedLength = 2 * length + } pb := restic.PackedBlob{ Blob: restic.Blob{ - BlobHandle: restic.NewRandomBlobHandle(), - Offset: pos, - Length: length, + BlobHandle: restic.NewRandomBlobHandle(), + Offset: pos, + Length: length, + UncompressedLength: uncompressedLength, }, PackID: packID, } @@ -164,7 +170,7 @@ func TestIndexSize(t *testing.T) { } // example index serialization from doc/Design.rst -var docExample = []byte(` +var docExampleV1 = []byte(` { "supersedes": [ "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452" @@ -177,12 +183,12 @@ var docExample = []byte(` "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "type": "data", "offset": 0, - "length": 25 + "length": 38 },{ "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "type": "tree", "offset": 38, - "length": 100 + "length": 112 }, { "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", @@ -196,6 +202,41 @@ var docExample = []byte(` } `) +var docExampleV2 = []byte(` +{ + "supersedes": [ + "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452" + ], + "packs": [ + { + "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c", + "blobs": [ + { + "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", + "type": "data", + "offset": 0, + "length": 38 + }, + { + "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", + "type": "tree", + "offset": 38, + "length": 112, + "uncompressed_length": 511 + }, + { + "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", + "type": "data", + "offset": 150, + "length": 123, + "uncompressed_length": 234 + } + ] + } + ] + } +`) + var docOldExample = []byte(` [ { "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c", @@ -204,12 +245,12 @@ var docOldExample = []byte(` "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", "type": "data", "offset": 0, - "length": 25 + "length": 38 },{ "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", "type": "tree", "offset": 38, - "length": 100 + "length": 112 }, { "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", @@ -222,22 +263,23 @@ var docOldExample = []byte(` `) var exampleTests = []struct { - id, packID restic.ID - tpe restic.BlobType - offset, length uint + id, packID restic.ID + tpe restic.BlobType + offset, length uint + uncompressedLength uint }{ { restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), - restic.DataBlob, 0, 25, + restic.DataBlob, 0, 38, 0, }, { restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), - restic.TreeBlob, 38, 100, + restic.TreeBlob, 38, 112, 511, }, { restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"), restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"), - restic.DataBlob, 150, 123, + restic.DataBlob, 150, 123, 234, }, } @@ -254,41 +296,56 @@ var exampleLookupTest = struct { } func TestIndexUnserialize(t *testing.T) { - oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} + for _, task := range []struct { + idxBytes []byte + version int + }{ + {docExampleV1, 1}, + {docExampleV2, 2}, + } { + oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} - idx, oldFormat, err := repository.DecodeIndex(docExample, restic.NewRandomID()) - rtest.OK(t, err) - rtest.Assert(t, !oldFormat, "new index format recognized as old format") + idx, oldFormat, err := repository.DecodeIndex(task.idxBytes, restic.NewRandomID()) + rtest.OK(t, err) + rtest.Assert(t, !oldFormat, "new index format recognized as old format") - for _, test := range exampleTests { - list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil) - if len(list) != 1 { - t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) + for _, test := range exampleTests { + list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil) + if len(list) != 1 { + t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) + } + blob := list[0] + + t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob) + + rtest.Equals(t, test.packID, blob.PackID) + rtest.Equals(t, test.tpe, blob.Type) + rtest.Equals(t, test.offset, blob.Offset) + rtest.Equals(t, test.length, blob.Length) + if task.version == 1 { + rtest.Equals(t, uint(0), blob.UncompressedLength) + } else if task.version == 2 { + rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength) + } else { + t.Fatal("Invalid index version") + } } - blob := list[0] - t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob) + rtest.Equals(t, oldIdx, idx.Supersedes()) - rtest.Equals(t, test.packID, blob.PackID) - rtest.Equals(t, test.tpe, blob.Type) - rtest.Equals(t, test.offset, blob.Offset) - rtest.Equals(t, test.length, blob.Length) - } - - rtest.Equals(t, oldIdx, idx.Supersedes()) - - blobs := idx.ListPack(exampleLookupTest.packID) - if len(blobs) != len(exampleLookupTest.blobs) { - t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs)) - } - - for _, blob := range blobs { - b, ok := exampleLookupTest.blobs[blob.ID] - if !ok { - t.Errorf("unexpected blob %v found", blob.ID.Str()) + blobs := idx.ListPack(exampleLookupTest.packID) + if len(blobs) != len(exampleLookupTest.blobs) { + t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs)) } - if blob.Type != b { - t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type) + + for _, blob := range blobs { + b, ok := exampleLookupTest.blobs[blob.ID] + if !ok { + t.Errorf("unexpected blob %v found", blob.ID.Str()) + } + if blob.Type != b { + t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type) + } } } } @@ -403,8 +460,9 @@ func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lo Type: restic.DataBlob, ID: id, }, - Length: uint(size), - Offset: uint(offset), + Length: uint(size), + UncompressedLength: uint(2 * size), + Offset: uint(offset), }) offset += size @@ -475,11 +533,17 @@ func TestIndexHas(t *testing.T) { pos := uint(0) for j := 0; j < 20; j++ { length := uint(i*100 + j) + uncompressedLength := uint(0) + if i >= 25 { + // test a mix of compressed and uncompressed packs + uncompressedLength = 2 * length + } pb := restic.PackedBlob{ Blob: restic.Blob{ - BlobHandle: restic.NewRandomBlobHandle(), - Offset: pos, - Length: length, + BlobHandle: restic.NewRandomBlobHandle(), + Offset: pos, + Length: length, + UncompressedLength: uncompressedLength, }, PackID: packID, } diff --git a/internal/repository/indexmap.go b/internal/repository/indexmap.go index f713a3304..6a8e86aad 100644 --- a/internal/repository/indexmap.go +++ b/internal/repository/indexmap.go @@ -32,7 +32,7 @@ const ( // add inserts an indexEntry for the given arguments into the map, // using id as the key. -func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) { +func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) { switch { case m.numentries == 0: // Lazy initialization. m.init() @@ -47,6 +47,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) { e.packIndex = packIdx e.offset = offset e.length = length + e.uncompressedLength = uncompressedLength m.buckets[h] = e m.numentries++ @@ -130,12 +131,12 @@ func (m *indexMap) len() uint { return m.numentries } func (m *indexMap) newEntry() *indexEntry { // Allocating in batches means that we get closer to optimal space usage, - // as Go's malloc will overallocate for structures of size 56 (indexEntry + // as Go's malloc will overallocate for structures of size 60 (indexEntry // on amd64). // - // 256*56 and 256*48 both have minimal malloc overhead among reasonable sizes. + // 128*60 and 128*60 both have low malloc overhead among reasonable sizes. // See src/runtime/sizeclasses.go in the standard library. - const entryAllocBatch = 256 + const entryAllocBatch = 128 if m.free == nil { free := new([entryAllocBatch]indexEntry) @@ -152,9 +153,10 @@ func (m *indexMap) newEntry() *indexEntry { } type indexEntry struct { - id restic.ID - next *indexEntry - packIndex int // Position in containing Index's packs field. - offset uint32 - length uint32 + id restic.ID + next *indexEntry + packIndex int // Position in containing Index's packs field. + offset uint32 + length uint32 + uncompressedLength uint32 } diff --git a/internal/repository/indexmap_test.go b/internal/repository/indexmap_test.go index d803bf3c5..6699b3601 100644 --- a/internal/repository/indexmap_test.go +++ b/internal/repository/indexmap_test.go @@ -22,7 +22,7 @@ func TestIndexMapBasic(t *testing.T) { r.Read(id[:]) rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id) - m.add(id, 0, 0, 0) + m.add(id, 0, 0, 0, 0) rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id) rtest.Equals(t, uint(i), m.len()) } @@ -41,7 +41,7 @@ func TestIndexMapForeach(t *testing.T) { for i := 0; i < N; i++ { var id restic.ID id[0] = byte(i) - m.add(id, i, uint32(i), uint32(i)) + m.add(id, i, uint32(i), uint32(i), uint32(i/2)) } seen := make(map[int]struct{}) @@ -51,6 +51,7 @@ func TestIndexMapForeach(t *testing.T) { rtest.Equals(t, i, e.packIndex) rtest.Equals(t, i, int(e.length)) rtest.Equals(t, i, int(e.offset)) + rtest.Equals(t, i/2, int(e.uncompressedLength)) seen[i] = struct{}{} return true @@ -85,13 +86,13 @@ func TestIndexMapForeachWithID(t *testing.T) { // Test insertion and retrieval of duplicates. for i := 0; i < ndups; i++ { - m.add(id, i, 0, 0) + m.add(id, i, 0, 0, 0) } for i := 0; i < 100; i++ { var otherid restic.ID r.Read(otherid[:]) - m.add(otherid, -1, 0, 0) + m.add(otherid, -1, 0, 0, 0) } n = 0 @@ -109,7 +110,7 @@ func TestIndexMapForeachWithID(t *testing.T) { func BenchmarkIndexMapHash(b *testing.B) { var m indexMap - m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization. + m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization. ids := make([]restic.ID, 128) // 4 KiB. r := rand.New(rand.NewSource(time.Now().UnixNano())) diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index 9056528a2..96462d4a4 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -16,6 +16,7 @@ type MasterIndex struct { idx []*Index pendingBlobs restic.BlobSet idxMutex sync.RWMutex + compress bool } // NewMasterIndex creates a new master index. @@ -28,6 +29,10 @@ func NewMasterIndex() *MasterIndex { return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()} } +func (mi *MasterIndex) markCompressed() { + mi.compress = true +} + // Lookup queries all known Indexes for the ID and returns all matches. func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) { mi.idxMutex.RLock() @@ -206,7 +211,7 @@ func (mi *MasterIndex) FinalizeFullIndexes() []*Index { continue } - if IndexFull(idx) { + if IndexFull(idx, mi.compress) { debug.Log("index %p is full", idx) idx.Finalize() list = append(list, idx) @@ -334,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla for pbs := range idx.EachByPack(ctx, packBlacklist) { newIndex.StorePack(pbs.packID, pbs.blobs) p.Add(1) - if IndexFull(newIndex) { + if IndexFull(newIndex, mi.compress) { select { case ch <- newIndex: case <-ctx.Done(): diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go index 2470dadfc..79932af07 100644 --- a/internal/repository/master_index_test.go +++ b/internal/repository/master_index_test.go @@ -30,9 +30,10 @@ func TestMasterIndex(t *testing.T) { blob2 := restic.PackedBlob{ PackID: restic.NewRandomID(), Blob: restic.Blob{ - BlobHandle: bhInIdx2, - Length: uint(restic.CiphertextLength(100)), - Offset: 10, + BlobHandle: bhInIdx2, + Length: uint(restic.CiphertextLength(100)), + Offset: 10, + UncompressedLength: 200, }, } @@ -48,9 +49,10 @@ func TestMasterIndex(t *testing.T) { blob12b := restic.PackedBlob{ PackID: restic.NewRandomID(), Blob: restic.Blob{ - BlobHandle: bhInIdx12, - Length: uint(restic.CiphertextLength(123)), - Offset: 50, + BlobHandle: bhInIdx12, + Length: uint(restic.CiphertextLength(123)), + Offset: 50, + UncompressedLength: 80, }, } @@ -86,7 +88,7 @@ func TestMasterIndex(t *testing.T) { size, found = mIdx.LookupSize(bhInIdx2) rtest.Equals(t, true, found) - rtest.Equals(t, uint(100), size) + rtest.Equals(t, uint(200), size) // test idInIdx12 found = mIdx.Has(bhInIdx12) @@ -144,9 +146,10 @@ func TestMasterMergeFinalIndexes(t *testing.T) { blob2 := restic.PackedBlob{ PackID: restic.NewRandomID(), Blob: restic.Blob{ - BlobHandle: bhInIdx2, - Length: 100, - Offset: 10, + BlobHandle: bhInIdx2, + Length: 100, + Offset: 10, + UncompressedLength: 200, }, } @@ -335,8 +338,8 @@ var ( depth = 3 ) -func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Repository, func()) { - repo, cleanup := repository.TestRepository(t) +func createFilledRepo(t testing.TB, snapshots int, dup float32, version uint) (restic.Repository, func()) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) for i := 0; i < 3; i++ { restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup) @@ -346,7 +349,11 @@ func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Reposito } func TestIndexSave(t *testing.T) { - repo, cleanup := createFilledRepo(t, 3, 0) + repository.TestAllVersions(t, testIndexSave) +} + +func testIndexSave(t *testing.T, version uint) { + repo, cleanup := createFilledRepo(t, 3, 0, version) defer cleanup() err := repo.LoadIndex(context.TODO()) diff --git a/internal/repository/packer_manager_test.go b/internal/repository/packer_manager_test.go index 1a810ab61..c5233ab4e 100644 --- a/internal/repository/packer_manager_test.go +++ b/internal/repository/packer_manager_test.go @@ -70,7 +70,7 @@ func fillPacks(t testing.TB, rnd *rand.Rand, be Saver, pm *packerManager, buf [] // Only change a few bytes so we know we're not benchmarking the RNG. rnd.Read(buf[:min(l, 4)]) - n, err := packer.Add(restic.DataBlob, id, buf) + n, err := packer.Add(restic.DataBlob, id, buf, 0) if err != nil { t.Fatal(err) } diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index e40f5f6af..b86c8c95d 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -212,7 +212,11 @@ func reloadIndex(t *testing.T, repo restic.Repository) { } func TestRepack(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepack) +} + +func testRepack(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() seed := time.Now().UnixNano() @@ -279,9 +283,13 @@ func TestRepack(t *testing.T) { } func TestRepackCopy(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepackCopy) +} + +func testRepackCopy(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() - dstRepo, dstCleanup := repository.TestRepository(t) + dstRepo, dstCleanup := repository.TestRepositoryWithVersion(t, version) defer dstCleanup() seed := time.Now().UnixNano() @@ -318,7 +326,11 @@ func TestRepackCopy(t *testing.T) { } func TestRepackWrongBlob(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepackWrongBlob) +} + +func testRepackWrongBlob(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() seed := time.Now().UnixNano() diff --git a/internal/repository/repository.go b/internal/repository/repository.go index d74868895..d58406c7c 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -12,6 +12,7 @@ import ( "sync" "github.com/cenkalti/backoff/v4" + "github.com/klauspost/compress/zstd" "github.com/restic/chunker" "github.com/restic/restic/internal/backend/dryrun" "github.com/restic/restic/internal/cache" @@ -36,16 +37,71 @@ type Repository struct { idx *MasterIndex Cache *cache.Cache + opts Options + noAutoIndexUpdate bool treePM *packerManager dataPM *packerManager + + allocEnc sync.Once + allocDec sync.Once + enc *zstd.Encoder + dec *zstd.Decoder +} + +type Options struct { + Compression CompressionMode +} + +// CompressionMode configures if data should be compressed. +type CompressionMode uint + +// Constants for the different compression levels. +const ( + CompressionAuto CompressionMode = 0 + CompressionOff CompressionMode = 1 + CompressionMax CompressionMode = 2 +) + +// Set implements the method needed for pflag command flag parsing. +func (c *CompressionMode) Set(s string) error { + switch s { + case "auto": + *c = CompressionAuto + case "off": + *c = CompressionOff + case "max": + *c = CompressionMax + default: + return fmt.Errorf("invalid compression mode %q, must be one of (auto|off|max)", s) + } + + return nil +} + +func (c *CompressionMode) String() string { + switch *c { + case CompressionAuto: + return "auto" + case CompressionOff: + return "off" + case CompressionMax: + return "max" + default: + return "invalid" + } + +} +func (c *CompressionMode) Type() string { + return "mode" } // New returns a new repository with backend be. -func New(be restic.Backend) *Repository { +func New(be restic.Backend, opts Options) *Repository { repo := &Repository{ be: be, + opts: opts, idx: NewMasterIndex(), dataPM: newPackerManager(be, nil), treePM: newPackerManager(be, nil), @@ -60,6 +116,14 @@ func (r *Repository) DisableAutoIndexUpdate() { r.noAutoIndexUpdate = true } +// setConfig assigns the given config and updates the repository parameters accordingly +func (r *Repository) setConfig(cfg restic.Config) { + r.cfg = cfg + if r.cfg.Version >= 2 { + r.idx.markCompressed() + } +} + // Config returns the repository configuration. func (r *Repository) Config() restic.Config { return r.cfg @@ -125,6 +189,9 @@ func (r *Repository) LoadUnpacked(ctx context.Context, buf []byte, t restic.File if err != nil { return nil, err } + if t != restic.ConfigFile { + return r.decompressUnpacked(plaintext) + } return plaintext, nil } @@ -218,12 +285,23 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. continue } + if blob.IsCompressed() { + plaintext, err = r.getZstdDecoder().DecodeAll(plaintext, make([]byte, 0, blob.DataLength())) + if err != nil { + lastError = errors.Errorf("decompressing blob %v failed: %v", id, err) + continue + } + } + // check hash if !restic.Hash(plaintext).Equal(id) { lastError = errors.Errorf("blob %v returned invalid hash", id) continue } + if len(plaintext) > cap(buf) { + return plaintext, nil + } // move decrypted data to the start of the buffer copy(buf, plaintext) return buf[:len(plaintext)], nil @@ -252,12 +330,70 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe}) } +func (r *Repository) getZstdEncoder() *zstd.Encoder { + r.allocEnc.Do(func() { + level := zstd.SpeedDefault + if r.opts.Compression == CompressionMax { + level = zstd.SpeedBestCompression + } + + opts := []zstd.EOption{ + // Set the compression level configured. + zstd.WithEncoderLevel(level), + // Disable CRC, we have enough checks in place, makes the + // compressed data four bytes shorter. + zstd.WithEncoderCRC(false), + // Set a window of 512kbyte, so we have good lookbehind for usual + // blob sizes. + zstd.WithWindowSize(512 * 1024), + } + + enc, err := zstd.NewWriter(nil, opts...) + if err != nil { + panic(err) + } + r.enc = enc + }) + return r.enc +} + +func (r *Repository) getZstdDecoder() *zstd.Decoder { + r.allocDec.Do(func() { + opts := []zstd.DOption{ + // Use all available cores. + zstd.WithDecoderConcurrency(0), + // Limit the maximum decompressed memory. Set to a very high, + // conservative value. + zstd.WithDecoderMaxMemory(16 * 1024 * 1024 * 1024), + } + + dec, err := zstd.NewReader(nil, opts...) + if err != nil { + panic(err) + } + r.dec = dec + }) + return r.dec +} + // saveAndEncrypt encrypts data and stores it to the backend as type t. If data // is small enough, it will be packed together with other small blobs. // The caller must ensure that the id matches the data. func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error { debug.Log("save id %v (%v, %d bytes)", id, t, len(data)) + uncompressedLength := 0 + if r.cfg.Version > 1 { + + // we have a repo v2, so compression is available. if the user opts to + // not compress, we won't compress any data, but everything else is + // compressed. + if r.opts.Compression != CompressionOff || t != restic.DataBlob { + uncompressedLength = len(data) + data = r.getZstdEncoder().EncodeAll(data, nil) + } + } + nonce := crypto.NewRandomNonce() ciphertext := make([]byte, 0, restic.CiphertextLength(len(data))) @@ -284,7 +420,7 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data } // save ciphertext - _, err = packer.Add(t, id, ciphertext) + _, err = packer.Add(t, id, ciphertext, uncompressedLength) if err != nil { return err } @@ -312,9 +448,50 @@ func (r *Repository) SaveJSONUnpacked(ctx context.Context, t restic.FileType, it return r.SaveUnpacked(ctx, t, plaintext) } +func (r *Repository) compressUnpacked(p []byte) ([]byte, error) { + // compression is only available starting from version 2 + if r.cfg.Version < 2 { + return p, nil + } + + // version byte + out := []byte{2} + out = r.getZstdEncoder().EncodeAll(p, out) + return out, nil +} + +func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) { + // compression is only available starting from version 2 + if r.cfg.Version < 2 { + return p, nil + } + + if len(p) == 0 { + // too short for version header + return p, nil + } + if p[0] == '[' || p[0] == '{' { + // probably raw JSON + return p, nil + } + // version + if p[0] != 2 { + return nil, errors.New("not supported encoding format") + } + + return r.getZstdDecoder().DecodeAll(p[1:], nil) +} + // SaveUnpacked encrypts data and stores it in the backend. Returned is the // storage hash. func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []byte) (id restic.ID, err error) { + if t != restic.ConfigFile { + p, err = r.compressUnpacked(p) + if err != nil { + return restic.ID{}, err + } + } + ciphertext := restic.NewBlobBuffer(len(p)) ciphertext = ciphertext[:0] nonce := crypto.NewRandomNonce() @@ -478,6 +655,17 @@ func (r *Repository) LoadIndex(ctx context.Context) error { return err } + if r.cfg.Version < 2 { + // sanity check + ctx, cancel := context.WithCancel(ctx) + defer cancel() + for blob := range r.idx.Each(ctx) { + if blob.IsCompressed() { + return errors.Fatal("index uses feature not supported by repository version 1") + } + } + } + // remove index files from the cache which have been removed in the repo return r.PrepareCache(validIndex) } @@ -592,18 +780,28 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int r.dataPM.key = key.master r.treePM.key = key.master r.keyName = key.Name() - r.cfg, err = restic.LoadConfig(ctx, r) + cfg, err := restic.LoadConfig(ctx, r) if err == crypto.ErrUnauthenticated { return errors.Fatalf("config or key %v is damaged: %v", key.Name(), err) } else if err != nil { return errors.Fatalf("config cannot be loaded: %v", err) } + + r.setConfig(cfg) return nil } // Init creates a new master key with the supplied password, initializes and // saves the repository config. -func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error { +func (r *Repository) Init(ctx context.Context, version uint, password string, chunkerPolynomial *chunker.Pol) error { + if version > restic.MaxRepoVersion { + return fmt.Errorf("repo version %v too high", version) + } + + if version < restic.MinRepoVersion { + return fmt.Errorf("repo version %v too low", version) + } + has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile}) if err != nil { return err @@ -612,7 +810,7 @@ func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomia return errors.New("repository master key and config already initialized") } - cfg, err := restic.CreateConfig() + cfg, err := restic.CreateConfig(version) if err != nil { return err } @@ -635,7 +833,7 @@ func (r *Repository) init(ctx context.Context, password string, cfg restic.Confi r.dataPM.key = key.master r.treePM.key = key.master r.keyName = key.Name() - r.cfg = cfg + r.setConfig(cfg) _, err = r.SaveJSONUnpacked(ctx, restic.ConfigFile, cfg) return err } @@ -768,9 +966,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs)) + dec, err := zstd.NewReader(nil) + if err != nil { + panic(dec) + } + defer dec.Close() + ctx, cancel := context.WithCancel(ctx) // stream blobs in pack - err := beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error { + err = beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error { // prevent callbacks after cancelation if ctx.Err() != nil { return ctx.Err() @@ -783,6 +987,7 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack bufRd := bufio.NewReaderSize(rd, bufferSize) currentBlobEnd := dataStart var buf []byte + var decode []byte for _, entry := range blobs { skipBytes := int(entry.Offset - currentBlobEnd) if skipBytes < 0 { @@ -822,6 +1027,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack // decryption errors are likely permanent, give the caller a chance to skip them nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():] plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil) + if err == nil && entry.IsCompressed() { + // DecodeAll will allocate a slice if it is not large enough since it + // knows the decompressed size (because we're using EncodeAll) + decode, err = dec.DecodeAll(plaintext, decode[:0]) + plaintext = decode + if err != nil { + err = errors.Errorf("decompressing blob %v failed: %v", h, err) + } + } if err == nil { id := restic.Hash(plaintext) if !id.Equal(entry.ID) { diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 7cc593e04..497fd2906 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/klauspost/compress/zstd" "github.com/restic/restic/internal/archiver" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/repository" @@ -28,7 +29,11 @@ var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20} var rnd = rand.New(rand.NewSource(time.Now().UnixNano())) func TestSave(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testSave) +} + +func testSave(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() for _, size := range testSizes { @@ -63,7 +68,11 @@ func TestSave(t *testing.T) { } func TestSaveFrom(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testSaveFrom) +} + +func testSaveFrom(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() for _, size := range testSizes { @@ -96,7 +105,11 @@ func TestSaveFrom(t *testing.T) { } func BenchmarkSaveAndEncrypt(t *testing.B) { - repo, cleanup := repository.TestRepository(t) + repository.BenchmarkAllVersions(t, benchmarkSaveAndEncrypt) +} + +func benchmarkSaveAndEncrypt(t *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() size := 4 << 20 // 4MiB @@ -118,7 +131,11 @@ func BenchmarkSaveAndEncrypt(t *testing.B) { } func TestLoadTree(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testLoadTree) +} + +func testLoadTree(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() if rtest.BenchArchiveDirectory == "" { @@ -134,7 +151,11 @@ func TestLoadTree(t *testing.T) { } func BenchmarkLoadTree(t *testing.B) { - repo, cleanup := repository.TestRepository(t) + repository.BenchmarkAllVersions(t, benchmarkLoadTree) +} + +func benchmarkLoadTree(t *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() if rtest.BenchArchiveDirectory == "" { @@ -154,7 +175,11 @@ func BenchmarkLoadTree(t *testing.B) { } func TestLoadBlob(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testLoadBlob) +} + +func testLoadBlob(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() length := 1000000 @@ -183,7 +208,11 @@ func TestLoadBlob(t *testing.T) { } func BenchmarkLoadBlob(b *testing.B) { - repo, cleanup := repository.TestRepository(b) + repository.BenchmarkAllVersions(b, benchmarkLoadBlob) +} + +func benchmarkLoadBlob(b *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(b, version) defer cleanup() length := 1000000 @@ -219,7 +248,11 @@ func BenchmarkLoadBlob(b *testing.B) { } func BenchmarkLoadUnpacked(b *testing.B) { - repo, cleanup := repository.TestRepository(b) + repository.BenchmarkAllVersions(b, benchmarkLoadUnpacked) +} + +func benchmarkLoadUnpacked(b *testing.B, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(b, version) defer cleanup() length := 1000000 @@ -255,7 +288,11 @@ func BenchmarkLoadUnpacked(b *testing.B) { } func TestLoadJSONUnpacked(t *testing.T) { - repo, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testLoadJSONUnpacked) +} + +func testLoadJSONUnpacked(t *testing.T, version uint) { + repo, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() if rtest.BenchArchiveDirectory == "" { @@ -313,9 +350,13 @@ func loadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*repo } func BenchmarkLoadIndex(b *testing.B) { + repository.BenchmarkAllVersions(b, benchmarkLoadIndex) +} + +func benchmarkLoadIndex(b *testing.B, version uint) { repository.TestUseLowSecurityKDFParameters(b) - repo, cleanup := repository.TestRepository(b) + repo, cleanup := repository.TestRepositoryWithVersion(b, version) defer cleanup() idx := repository.NewIndex() @@ -362,12 +403,16 @@ func saveRandomDataBlobs(t testing.TB, repo restic.Repository, num int, sizeMax } func TestRepositoryIncrementalIndex(t *testing.T) { - r, cleanup := repository.TestRepository(t) + repository.TestAllVersions(t, testRepositoryIncrementalIndex) +} + +func testRepositoryIncrementalIndex(t *testing.T, version uint) { + r, cleanup := repository.TestRepositoryWithVersion(t, version) defer cleanup() repo := r.(*repository.Repository) - repository.IndexFull = func(*repository.Index) bool { return true } + repository.IndexFull = func(*repository.Index, bool) bool { return true } // add 15 packs for j := 0; j < 5; j++ { @@ -417,10 +462,31 @@ func TestRepositoryIncrementalIndex(t *testing.T) { } // buildPackfileWithoutHeader returns a manually built pack file without a header. -func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) (blobs []restic.Blob, packfile []byte) { +func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key, compress bool) (blobs []restic.Blob, packfile []byte) { + opts := []zstd.EOption{ + // Set the compression level configured. + zstd.WithEncoderLevel(zstd.SpeedDefault), + // Disable CRC, we have enough checks in place, makes the + // compressed data four bytes shorter. + zstd.WithEncoderCRC(false), + // Set a window of 512kbyte, so we have good lookbehind for usual + // blob sizes. + zstd.WithWindowSize(512 * 1024), + } + enc, err := zstd.NewWriter(nil, opts...) + if err != nil { + panic(err) + } + var offset uint for i, size := range blobSizes { plaintext := test.Random(800+i, size) + id := restic.Hash(plaintext) + uncompressedLength := uint(0) + if compress { + uncompressedLength = uint(len(plaintext)) + plaintext = enc.EncodeAll(plaintext, nil) + } // we use a deterministic nonce here so the whole process is // deterministic, last byte is the blob index @@ -438,11 +504,12 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) blobs = append(blobs, restic.Blob{ BlobHandle: restic.BlobHandle{ - ID: restic.Hash(plaintext), Type: restic.DataBlob, + ID: id, }, - Length: uint(ciphertextLength), - Offset: offset, + Length: uint(ciphertextLength), + UncompressedLength: uncompressedLength, + Offset: offset, }) offset = uint(len(packfile)) @@ -452,6 +519,10 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) } func TestStreamPack(t *testing.T) { + repository.TestAllVersions(t, testStreamPack) +} + +func testStreamPack(t *testing.T, version uint) { // always use the same key for deterministic output const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}` @@ -476,7 +547,17 @@ func TestStreamPack(t *testing.T) { 18883, } - packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key) + var compress bool + switch version { + case 1: + compress = false + case 2: + compress = true + default: + t.Fatal("test does not suport repository version", version) + } + + packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key, compress) load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error { data := packfile diff --git a/internal/repository/testing.go b/internal/repository/testing.go index d752e107e..05dfab64d 100644 --- a/internal/repository/testing.go +++ b/internal/repository/testing.go @@ -2,6 +2,7 @@ package repository import ( "context" + "fmt" "os" "testing" @@ -41,7 +42,7 @@ const TestChunkerPol = chunker.Pol(0x3DA3358B4DC173) // TestRepositoryWithBackend returns a repository initialized with a test // password. If be is nil, an in-memory backend is used. A constant polynomial // is used for the chunker and low-security test parameters. -func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Repository, cleanup func()) { +func TestRepositoryWithBackend(t testing.TB, be restic.Backend, version uint) (r restic.Repository, cleanup func()) { t.Helper() TestUseLowSecurityKDFParameters(t) restic.TestDisableCheckPolynomial(t) @@ -51,9 +52,9 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi be, beCleanup = TestBackend(t) } - repo := New(be) + repo := New(be, Options{}) - cfg := restic.TestCreateConfig(t, TestChunkerPol) + cfg := restic.TestCreateConfig(t, TestChunkerPol, version) err := repo.init(context.TODO(), test.TestPassword, cfg) if err != nil { t.Fatalf("TestRepository(): initialize repo failed: %v", err) @@ -71,6 +72,11 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi // a non-existing directory, a local backend is created there and this is used // instead. The directory is not removed, but left there for inspection. func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) { + t.Helper() + return TestRepositoryWithVersion(t, 0) +} + +func TestRepositoryWithVersion(t testing.TB, version uint) (r restic.Repository, cleanup func()) { t.Helper() dir := os.Getenv("RESTIC_TEST_REPO") if dir != "" { @@ -80,7 +86,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) { if err != nil { t.Fatalf("error creating local backend at %v: %v", dir, err) } - return TestRepositoryWithBackend(t, be) + return TestRepositoryWithBackend(t, be, version) } if err == nil { @@ -88,7 +94,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) { } } - return TestRepositoryWithBackend(t, nil) + return TestRepositoryWithBackend(t, nil, version) } // TestOpenLocal opens a local repository. @@ -98,7 +104,7 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) { t.Fatal(err) } - repo := New(be) + repo := New(be, Options{}) err = repo.SearchKey(context.TODO(), test.TestPassword, 10, "") if err != nil { t.Fatal(err) @@ -106,3 +112,23 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) { return repo } + +type VersionedTest func(t *testing.T, version uint) + +func TestAllVersions(t *testing.T, test VersionedTest) { + for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ { + t.Run(fmt.Sprintf("v%d", version), func(t *testing.T) { + test(t, uint(version)) + }) + } +} + +type VersionedBenchmark func(b *testing.B, version uint) + +func BenchmarkAllVersions(b *testing.B, bench VersionedBenchmark) { + for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ { + b.Run(fmt.Sprintf("v%d", version), func(b *testing.B) { + bench(b, uint(version)) + }) + } +} diff --git a/internal/restic/blob.go b/internal/restic/blob.go index d365bb92e..a4fcdb1ac 100644 --- a/internal/restic/blob.go +++ b/internal/restic/blob.go @@ -9,13 +9,25 @@ import ( // Blob is one part of a file or a tree. type Blob struct { BlobHandle - Length uint - Offset uint + Length uint + Offset uint + UncompressedLength uint } func (b Blob) String() string { - return fmt.Sprintf("", - b.Type, b.ID.Str(), b.Offset, b.Length) + return fmt.Sprintf("", + b.Type, b.ID.Str(), b.Offset, b.Length, b.UncompressedLength) +} + +func (b Blob) DataLength() uint { + if b.UncompressedLength != 0 { + return b.UncompressedLength + } + return uint(PlaintextLength(int(b.Length))) +} + +func (b Blob) IsCompressed() bool { + return b.UncompressedLength != 0 } // PackedBlob is a blob stored within a file. diff --git a/internal/restic/config.go b/internal/restic/config.go index ded98ac1b..6df32e2ef 100644 --- a/internal/restic/config.go +++ b/internal/restic/config.go @@ -18,9 +18,12 @@ type Config struct { ChunkerPolynomial chunker.Pol `json:"chunker_polynomial"` } -// RepoVersion is the version that is written to the config when a repository +const MinRepoVersion = 1 +const MaxRepoVersion = 2 + +// StableRepoVersion is the version that is written to the config when a repository // is newly created with Init(). -const RepoVersion = 1 +const StableRepoVersion = 1 // JSONUnpackedLoader loads unpacked JSON. type JSONUnpackedLoader interface { @@ -29,7 +32,7 @@ type JSONUnpackedLoader interface { // CreateConfig creates a config file with a randomly selected polynomial and // ID. -func CreateConfig() (Config, error) { +func CreateConfig(version uint) (Config, error) { var ( err error cfg Config @@ -41,18 +44,24 @@ func CreateConfig() (Config, error) { } cfg.ID = NewRandomID().String() - cfg.Version = RepoVersion + cfg.Version = version debug.Log("New config: %#v", cfg) return cfg, nil } // TestCreateConfig creates a config for use within tests. -func TestCreateConfig(t testing.TB, pol chunker.Pol) (cfg Config) { +func TestCreateConfig(t testing.TB, pol chunker.Pol, version uint) (cfg Config) { cfg.ChunkerPolynomial = pol cfg.ID = NewRandomID().String() - cfg.Version = RepoVersion + if version == 0 { + version = StableRepoVersion + } + if version < MinRepoVersion || version > MaxRepoVersion { + t.Fatalf("version %d is out of range", version) + } + cfg.Version = version return cfg } @@ -77,7 +86,7 @@ func LoadConfig(ctx context.Context, r JSONUnpackedLoader) (Config, error) { return Config{}, err } - if cfg.Version != RepoVersion { + if cfg.Version < MinRepoVersion || cfg.Version > MaxRepoVersion { return Config{}, errors.Errorf("unsupported repository version %v", cfg.Version) } diff --git a/internal/restic/config_test.go b/internal/restic/config_test.go index 506381965..fd8e4aeed 100644 --- a/internal/restic/config_test.go +++ b/internal/restic/config_test.go @@ -32,7 +32,7 @@ func TestConfig(t *testing.T) { return restic.ID{}, nil } - cfg1, err := restic.CreateConfig() + cfg1, err := restic.CreateConfig(restic.MaxRepoVersion) rtest.OK(t, err) _, err = saver(save).SaveJSONUnpacked(restic.ConfigFile, cfg1) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 206703ce3..d255dad15 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -117,7 +117,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) { if largeFile { packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset}) - fileOffset += int64(restic.PlaintextLength(int(blob.Length))) + fileOffset += int64(blob.DataLength()) } pack, ok := packs[packID] if !ok { @@ -195,7 +195,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { if packID.Equal(pack.id) { addBlob(blob, fileOffset) } - fileOffset += int64(restic.PlaintextLength(int(blob.Length))) + fileOffset += int64(blob.DataLength()) }) if err != nil { // restoreFiles should have caught this error before