From 6fb408d90e6e808642212c7b3b60e11aab2728d5 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 13 Feb 2022 17:24:09 +0100 Subject: [PATCH] repository: implement pack compression --- internal/pack/pack.go | 114 +++++++++++++-------- internal/pack/pack_internal_test.go | 14 +-- internal/pack/pack_test.go | 2 +- internal/repository/index.go | 38 ++++--- internal/repository/indexmap.go | 14 +-- internal/repository/indexmap_test.go | 11 +- internal/repository/packer_manager_test.go | 2 +- internal/repository/repository.go | 49 ++++++++- internal/restic/blob.go | 20 +++- internal/restorer/filerestorer.go | 4 +- 10 files changed, 184 insertions(+), 84 deletions(-) diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 9fa209054..697a14a5d 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -32,7 +32,7 @@ func NewPacker(k *crypto.Key, wr io.Writer) *Packer { // Add saves the data read from rd as a new blob to the packer. Returned is the // number of bytes written to the pack. -func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) { +func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte, uncompressedLength int) (int, error) { p.m.Lock() defer p.m.Unlock() @@ -41,20 +41,23 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) n, err := p.wr.Write(data) c.Length = uint(n) c.Offset = p.bytes + c.UncompressedLength = uint(uncompressedLength) p.bytes += uint(n) p.blobs = append(p.blobs, c) return n, errors.Wrap(err, "Write") } -var entrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) +var entrySize = uint(binary.Size(restic.BlobType(0)) + 2*headerLengthSize + len(restic.ID{})) +var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) // headerEntry describes the format of header entries. It serves only as // documentation. type headerEntry struct { - Type uint8 - Length uint32 - ID restic.ID + Type uint8 + Length uint32 + ID restic.ID + CompressedLength uint32 } // Finalize writes the header for all added blobs and finalizes the pack. @@ -70,7 +73,7 @@ func (p *Packer) Finalize() (uint, error) { return 0, err } - encryptedHeader := make([]byte, 0, len(header)+p.k.Overhead()+p.k.NonceSize()) + encryptedHeader := make([]byte, 0, restic.CiphertextLength(len(header))) nonce := crypto.NewRandomNonce() encryptedHeader = append(encryptedHeader, nonce...) encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil) @@ -81,7 +84,7 @@ func (p *Packer) Finalize() (uint, error) { return 0, errors.Wrap(err, "Write") } - hdrBytes := restic.CiphertextLength(len(header)) + hdrBytes := len(encryptedHeader) if n != hdrBytes { return 0, errors.New("wrong number of bytes written") } @@ -104,11 +107,15 @@ func (p *Packer) makeHeader() ([]byte, error) { buf := make([]byte, 0, len(p.blobs)*int(entrySize)) for _, b := range p.blobs { - switch b.Type { - case restic.DataBlob: + switch { + case b.Type == restic.DataBlob && b.UncompressedLength == 0: buf = append(buf, 0) - case restic.TreeBlob: + case b.Type == restic.TreeBlob && b.UncompressedLength == 0: buf = append(buf, 1) + case b.Type == restic.DataBlob && b.UncompressedLength != 0: + buf = append(buf, 2) + case b.Type == restic.TreeBlob && b.UncompressedLength != 0: + buf = append(buf, 3) default: return nil, errors.Errorf("invalid blob type %v", b.Type) } @@ -116,6 +123,10 @@ func (p *Packer) makeHeader() ([]byte, error) { var lenLE [4]byte binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length)) buf = append(buf, lenLE[:]...) + if b.UncompressedLength != 0 { + binary.LittleEndian.PutUint32(lenLE[:], uint32(b.UncompressedLength)) + buf = append(buf, lenLE[:]...) + } buf = append(buf, b.ID[:]...) } @@ -152,7 +163,7 @@ func (p *Packer) String() string { var ( // we require at least one entry in the header, and one blob for a pack file - minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) + minFileSize = plainEntrySize + crypto.Extension + uint(headerLengthSize) ) const ( @@ -167,16 +178,11 @@ const ( eagerEntries = 15 ) -// readRecords reads up to max records from the underlying ReaderAt, returning -// the raw header, the total number of records in the header, and any error. -// If the header contains fewer than max entries, the header is truncated to +// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning +// the raw header, the total number of bytes in the header, and any error. +// If the header contains fewer than bufsize bytes, the header is truncated to // the appropriate size. -func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { - var bufsize int - bufsize += max * int(entrySize) - bufsize += crypto.Extension - bufsize += headerLengthSize - +func readRecords(rd io.ReaderAt, size int64, bufsize int) ([]byte, int, error) { if bufsize > int(size) { bufsize = int(size) } @@ -197,8 +203,6 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { err = InvalidFileError{Message: "header length is zero"} case hlen < crypto.Extension: err = InvalidFileError{Message: "header length is too small"} - case (hlen-crypto.Extension)%uint32(entrySize) != 0: - err = InvalidFileError{Message: "header length is invalid"} case int64(hlen) > size-int64(headerLengthSize): err = InvalidFileError{Message: "header is larger than file"} case int64(hlen) > MaxHeaderSize-int64(headerLengthSize): @@ -208,8 +212,8 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { return nil, 0, errors.Wrap(err, "readHeader") } - total := (int(hlen) - crypto.Extension) / int(entrySize) - if total < max { + total := int(hlen + headerLengthSize) + if total < bufsize { // truncate to the beginning of the pack header b = b[len(b)-int(hlen):] } @@ -230,11 +234,12 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { // eagerly download eagerEntries header entries as part of header-length request. // only make second request if actual number of entries is greater than eagerEntries - b, c, err := readRecords(rd, size, eagerEntries) + eagerSize := eagerEntries*int(entrySize) + headerSize + b, c, err := readRecords(rd, size, eagerSize) if err != nil { return nil, err } - if c <= eagerEntries { + if c <= eagerSize { // eager read sufficed, return what we got return b, nil } @@ -262,7 +267,7 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr return nil, 0, err } - if len(buf) < k.NonceSize()+k.Overhead() { + if len(buf) < restic.CiphertextLength(0) { return nil, 0, errors.New("invalid header, too small") } @@ -274,11 +279,12 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr return nil, 0, err } - entries = make([]restic.Blob, 0, uint(len(buf))/entrySize) + // might over allocate a bit if all blobs have EntrySize but only by a few percent + entries = make([]restic.Blob, 0, uint(len(buf))/plainEntrySize) pos := uint(0) for len(buf) > 0 { - entry, err := parseHeaderEntry(buf) + entry, headerSize, err := parseHeaderEntry(buf) if err != nil { return nil, 0, err } @@ -286,36 +292,60 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr entries = append(entries, entry) pos += entry.Length - buf = buf[entrySize:] + buf = buf[headerSize:] } return entries, hdrSize, nil } -func parseHeaderEntry(p []byte) (b restic.Blob, err error) { - if uint(len(p)) < entrySize { +func parseHeaderEntry(p []byte) (b restic.Blob, size uint, err error) { + l := uint(len(p)) + size = plainEntrySize + if l < plainEntrySize { err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) - return b, err + return b, size, err } - p = p[:entrySize] + tpe := p[0] - switch p[0] { - case 0: + switch tpe { + case 0, 2: b.Type = restic.DataBlob - case 1: + case 1, 3: b.Type = restic.TreeBlob default: - return b, errors.Errorf("invalid type %d", p[0]) + return b, size, errors.Errorf("invalid type %d", tpe) } b.Length = uint(binary.LittleEndian.Uint32(p[1:5])) - copy(b.ID[:], p[5:]) + p = p[5:] + if tpe == 2 || tpe == 3 { + size = entrySize + if l < entrySize { + err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) + return b, size, err + } + b.UncompressedLength = uint(binary.LittleEndian.Uint32(p[0:4])) + p = p[4:] + } - return b, nil + copy(b.ID[:], p[:]) + + return b, size, nil +} + +func CalculateEntrySize(blob restic.Blob) int { + if blob.UncompressedLength != 0 { + return int(entrySize) + } + return int(plainEntrySize) } func CalculateHeaderSize(blobs []restic.Blob) int { - return headerSize + len(blobs)*int(entrySize) + size := headerSize + for _, blob := range blobs { + size += CalculateEntrySize(blob) + } + return size } // Size returns the size of all packs computed by index information. @@ -333,7 +363,7 @@ func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.I if !onlyHdr { size += int64(blob.Length) } - packSize[blob.PackID] = size + int64(entrySize) + packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob)) } return packSize diff --git a/internal/pack/pack_internal_test.go b/internal/pack/pack_internal_test.go index b8078c829..93d04f18e 100644 --- a/internal/pack/pack_internal_test.go +++ b/internal/pack/pack_internal_test.go @@ -23,9 +23,10 @@ func TestParseHeaderEntry(t *testing.T) { buf := new(bytes.Buffer) _ = binary.Write(buf, binary.LittleEndian, &h) - b, err := parseHeaderEntry(buf.Bytes()) + b, size, err := parseHeaderEntry(buf.Bytes()) rtest.OK(t, err) rtest.Equals(t, restic.DataBlob, b.Type) + rtest.Equals(t, plainEntrySize, size) t.Logf("%v %v", h.ID, b.ID) rtest.Assert(t, bytes.Equal(h.ID[:], b.ID[:]), "id mismatch") rtest.Equals(t, uint(h.Length), b.Length) @@ -34,14 +35,14 @@ func TestParseHeaderEntry(t *testing.T) { buf.Reset() _ = binary.Write(buf, binary.LittleEndian, &h) - b, err = parseHeaderEntry(buf.Bytes()) + b, _, err = parseHeaderEntry(buf.Bytes()) rtest.Assert(t, err != nil, "no error for invalid type") h.Type = 0 buf.Reset() _ = binary.Write(buf, binary.LittleEndian, &h) - b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1]) + b, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1]) rtest.Assert(t, err != nil, "no error for short input") } @@ -97,7 +98,8 @@ func TestReadHeaderEagerLoad(t *testing.T) { func TestReadRecords(t *testing.T) { testReadRecords := func(dataSize, entryCount, totalRecords int) { totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension) - off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension) + bufSize := entryCount*int(entrySize) + crypto.Extension + off := len(totalHeader) - bufSize if off < 0 { off = 0 } @@ -110,10 +112,10 @@ func TestReadRecords(t *testing.T) { rd := bytes.NewReader(buf.Bytes()) - header, count, err := readRecords(rd, int64(rd.Len()), entryCount) + header, count, err := readRecords(rd, int64(rd.Len()), bufSize+4) rtest.OK(t, err) + rtest.Equals(t, len(totalHeader)+4, count) rtest.Equals(t, expectedHeader, header) - rtest.Equals(t, totalRecords, count) } // basic diff --git a/internal/pack/pack_test.go b/internal/pack/pack_test.go index 2b7ec7fea..6170e807c 100644 --- a/internal/pack/pack_test.go +++ b/internal/pack/pack_test.go @@ -38,7 +38,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) { var buf bytes.Buffer p := pack.NewPacker(k, &buf) for _, b := range bufs { - _, err := p.Add(restic.TreeBlob, b.id, b.data) + _, err := p.Add(restic.TreeBlob, b.id, b.data, 2*len(b.data)) rtest.OK(t, err) } diff --git a/internal/repository/index.go b/internal/repository/index.go index 3db19b3b8..5f6fe1997 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -75,12 +75,12 @@ const maxuint32 = 1<<32 - 1 func (idx *Index) store(packIndex int, blob restic.Blob) { // assert that offset and length fit into uint32! - if blob.Offset > maxuint32 || blob.Length > maxuint32 { + if blob.Offset > maxuint32 || blob.Length > maxuint32 || blob.UncompressedLength > maxuint32 { panic("offset or length does not fit in uint32. You have packs > 4GB!") } m := &idx.byType[blob.Type] - m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length)) + m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length), uint32(blob.UncompressedLength)) } // Final returns true iff the index is already written to the repository, it is @@ -169,8 +169,9 @@ func (idx *Index) toPackedBlob(e *indexEntry, t restic.BlobType) restic.PackedBl BlobHandle: restic.BlobHandle{ ID: e.id, Type: t}, - Length: uint(e.length), - Offset: uint(e.offset), + Length: uint(e.length), + Offset: uint(e.offset), + UncompressedLength: uint(e.uncompressedLength), }, PackID: idx.packs[e.packIndex], } @@ -225,6 +226,9 @@ func (idx *Index) LookupSize(bh restic.BlobHandle) (plaintextLength uint, found if e == nil { return 0, false } + if e.uncompressedLength != 0 { + return uint(e.uncompressedLength), true + } return uint(restic.PlaintextLength(int(e.length))), true } @@ -357,10 +361,11 @@ type packJSON struct { } type blobJSON struct { - ID restic.ID `json:"id"` - Type restic.BlobType `json:"type"` - Offset uint `json:"offset"` - Length uint `json:"length"` + ID restic.ID `json:"id"` + Type restic.BlobType `json:"type"` + Offset uint `json:"offset"` + Length uint `json:"length"` + UncompressedLength uint `json:"uncompressed_length,omitempty"` } // generatePackList returns a list of packs. @@ -391,10 +396,11 @@ func (idx *Index) generatePackList() ([]*packJSON, error) { // add blob p.Blobs = append(p.Blobs, blobJSON{ - ID: e.id, - Type: restic.BlobType(typ), - Offset: uint(e.offset), - Length: uint(e.length), + ID: e.id, + Type: restic.BlobType(typ), + Offset: uint(e.offset), + Length: uint(e.length), + UncompressedLength: uint(e.uncompressedLength), }) return true @@ -553,7 +559,7 @@ func (idx *Index) merge(idx2 *Index) error { m2.foreach(func(e2 *indexEntry) bool { if !hasIdenticalEntry(e2) { // packIndex needs to be changed as idx2.pack was appended to idx.pack, see above - m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length) + m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length, e2.uncompressedLength) } return true }) @@ -601,8 +607,9 @@ func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err erro BlobHandle: restic.BlobHandle{ Type: blob.Type, ID: blob.ID}, - Offset: blob.Offset, - Length: blob.Length, + Offset: blob.Offset, + Length: blob.Length, + UncompressedLength: blob.UncompressedLength, }) switch blob.Type { @@ -648,6 +655,7 @@ func decodeOldIndex(buf []byte) (idx *Index, err error) { ID: blob.ID}, Offset: blob.Offset, Length: blob.Length, + // no compressed length in the old index format }) switch blob.Type { diff --git a/internal/repository/indexmap.go b/internal/repository/indexmap.go index f713a3304..3d0ed5db4 100644 --- a/internal/repository/indexmap.go +++ b/internal/repository/indexmap.go @@ -32,7 +32,7 @@ const ( // add inserts an indexEntry for the given arguments into the map, // using id as the key. -func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) { +func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) { switch { case m.numentries == 0: // Lazy initialization. m.init() @@ -47,6 +47,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) { e.packIndex = packIdx e.offset = offset e.length = length + e.uncompressedLength = uncompressedLength m.buckets[h] = e m.numentries++ @@ -152,9 +153,10 @@ func (m *indexMap) newEntry() *indexEntry { } type indexEntry struct { - id restic.ID - next *indexEntry - packIndex int // Position in containing Index's packs field. - offset uint32 - length uint32 + id restic.ID + next *indexEntry + packIndex int // Position in containing Index's packs field. + offset uint32 + length uint32 + uncompressedLength uint32 } diff --git a/internal/repository/indexmap_test.go b/internal/repository/indexmap_test.go index d803bf3c5..6699b3601 100644 --- a/internal/repository/indexmap_test.go +++ b/internal/repository/indexmap_test.go @@ -22,7 +22,7 @@ func TestIndexMapBasic(t *testing.T) { r.Read(id[:]) rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id) - m.add(id, 0, 0, 0) + m.add(id, 0, 0, 0, 0) rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id) rtest.Equals(t, uint(i), m.len()) } @@ -41,7 +41,7 @@ func TestIndexMapForeach(t *testing.T) { for i := 0; i < N; i++ { var id restic.ID id[0] = byte(i) - m.add(id, i, uint32(i), uint32(i)) + m.add(id, i, uint32(i), uint32(i), uint32(i/2)) } seen := make(map[int]struct{}) @@ -51,6 +51,7 @@ func TestIndexMapForeach(t *testing.T) { rtest.Equals(t, i, e.packIndex) rtest.Equals(t, i, int(e.length)) rtest.Equals(t, i, int(e.offset)) + rtest.Equals(t, i/2, int(e.uncompressedLength)) seen[i] = struct{}{} return true @@ -85,13 +86,13 @@ func TestIndexMapForeachWithID(t *testing.T) { // Test insertion and retrieval of duplicates. for i := 0; i < ndups; i++ { - m.add(id, i, 0, 0) + m.add(id, i, 0, 0, 0) } for i := 0; i < 100; i++ { var otherid restic.ID r.Read(otherid[:]) - m.add(otherid, -1, 0, 0) + m.add(otherid, -1, 0, 0, 0) } n = 0 @@ -109,7 +110,7 @@ func TestIndexMapForeachWithID(t *testing.T) { func BenchmarkIndexMapHash(b *testing.B) { var m indexMap - m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization. + m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization. ids := make([]restic.ID, 128) // 4 KiB. r := rand.New(rand.NewSource(time.Now().UnixNano())) diff --git a/internal/repository/packer_manager_test.go b/internal/repository/packer_manager_test.go index 1a810ab61..c5233ab4e 100644 --- a/internal/repository/packer_manager_test.go +++ b/internal/repository/packer_manager_test.go @@ -70,7 +70,7 @@ func fillPacks(t testing.TB, rnd *rand.Rand, be Saver, pm *packerManager, buf [] // Only change a few bytes so we know we're not benchmarking the RNG. rnd.Read(buf[:min(l, 4)]) - n, err := packer.Add(restic.DataBlob, id, buf) + n, err := packer.Add(restic.DataBlob, id, buf, 0) if err != nil { t.Fatal(err) } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index b00dfa39a..4a340222f 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -235,12 +235,23 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. continue } + if blob.IsCompressed() { + plaintext, err = r.dec.DecodeAll(plaintext, make([]byte, 0, blob.DataLength())) + if err != nil { + lastError = errors.Errorf("decompressing blob %v failed: %v", id, err) + continue + } + } + // check hash if !restic.Hash(plaintext).Equal(id) { lastError = errors.Errorf("blob %v returned invalid hash", id) continue } + if len(plaintext) > cap(buf) { + return plaintext, nil + } // move decrypted data to the start of the buffer copy(buf, plaintext) return buf[:len(plaintext)], nil @@ -275,6 +286,12 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error { debug.Log("save id %v (%v, %d bytes)", id, t, len(data)) + uncompressedLength := 0 + if r.cfg.Version > 1 { + uncompressedLength = len(data) + data = r.enc.EncodeAll(data, nil) + } + nonce := crypto.NewRandomNonce() ciphertext := make([]byte, 0, restic.CiphertextLength(len(data))) @@ -301,7 +318,7 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data } // save ciphertext - _, err = packer.Add(t, id, ciphertext) + _, err = packer.Add(t, id, ciphertext, uncompressedLength) if err != nil { return err } @@ -536,6 +553,17 @@ func (r *Repository) LoadIndex(ctx context.Context) error { return err } + if r.cfg.Version < 2 { + // sanity check + ctx, cancel := context.WithCancel(ctx) + defer cancel() + for blob := range r.idx.Each(ctx) { + if blob.IsCompressed() { + return errors.Fatal("index uses feature not supported by repository version 1") + } + } + } + // remove index files from the cache which have been removed in the repo return r.PrepareCache(validIndex) } @@ -834,9 +862,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs)) + dec, err := zstd.NewReader(nil) + if err != nil { + panic(dec) + } + defer dec.Close() + ctx, cancel := context.WithCancel(ctx) // stream blobs in pack - err := beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error { + err = beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error { // prevent callbacks after cancelation if ctx.Err() != nil { return ctx.Err() @@ -849,6 +883,7 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack bufRd := bufio.NewReaderSize(rd, bufferSize) currentBlobEnd := dataStart var buf []byte + var decode []byte for _, entry := range blobs { skipBytes := int(entry.Offset - currentBlobEnd) if skipBytes < 0 { @@ -888,6 +923,16 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack // decryption errors are likely permanent, give the caller a chance to skip them nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():] plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil) + if err == nil && entry.IsCompressed() { + if cap(decode) < int(entry.DataLength()) { + decode = make([]byte, 0, entry.DataLength()) + } + decode, err = dec.DecodeAll(plaintext, decode[:0]) + plaintext = decode + if err != nil { + err = errors.Errorf("decompressing blob %v failed: %v", h, err) + } + } if err == nil { id := restic.Hash(plaintext) if !id.Equal(entry.ID) { diff --git a/internal/restic/blob.go b/internal/restic/blob.go index d365bb92e..a4fcdb1ac 100644 --- a/internal/restic/blob.go +++ b/internal/restic/blob.go @@ -9,13 +9,25 @@ import ( // Blob is one part of a file or a tree. type Blob struct { BlobHandle - Length uint - Offset uint + Length uint + Offset uint + UncompressedLength uint } func (b Blob) String() string { - return fmt.Sprintf("", - b.Type, b.ID.Str(), b.Offset, b.Length) + return fmt.Sprintf("", + b.Type, b.ID.Str(), b.Offset, b.Length, b.UncompressedLength) +} + +func (b Blob) DataLength() uint { + if b.UncompressedLength != 0 { + return b.UncompressedLength + } + return uint(PlaintextLength(int(b.Length))) +} + +func (b Blob) IsCompressed() bool { + return b.UncompressedLength != 0 } // PackedBlob is a blob stored within a file. diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 206703ce3..d255dad15 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -117,7 +117,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) { if largeFile { packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset}) - fileOffset += int64(restic.PlaintextLength(int(blob.Length))) + fileOffset += int64(blob.DataLength()) } pack, ok := packs[packID] if !ok { @@ -195,7 +195,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { if packID.Equal(pack.id) { addBlob(blob, fileOffset) } - fileOffset += int64(restic.PlaintextLength(int(blob.Length))) + fileOffset += int64(blob.DataLength()) }) if err != nil { // restoreFiles should have caught this error before