azure: check upload using md5 content hash

For files below 256MB this uses the md5 hash calculated while assembling
the pack file. For larger files the hash for each 100MB part is
calculated on the fly. That hash is also reused as temporary filename.
As restic only uploads encrypted data which includes among others a
random initialization vector, the file hash shouldn't be susceptible to
md5 collision attacks (even though the algorithm is broken).
This commit is contained in:
Michael Eischer 2020-12-19 12:40:50 +01:00
parent 9aa2eff384
commit 1d3e99f475

View file

@ -2,6 +2,7 @@ package azure
import (
"context"
"crypto/md5"
"encoding/base64"
"hash"
"io"
@ -115,7 +116,7 @@ func (be *Backend) Location() string {
// Hasher may return a hash function for calculating a content hash for the backend
func (be *Backend) Hasher() hash.Hash {
return nil
return md5.New()
}
// Path returns the path in the bucket that is used for this backend.
@ -154,7 +155,9 @@ func (be *Backend) Save(ctx context.Context, h restic.Handle, rd restic.RewindRe
dataReader := azureAdapter{rd}
// if it's smaller than 256miB, then just create the file directly from the reader
err = be.container.GetBlobReference(objName).CreateBlockBlobFromReader(dataReader, nil)
ref := be.container.GetBlobReference(objName)
ref.Properties.ContentMD5 = base64.StdEncoding.EncodeToString(rd.Hash())
err = ref.CreateBlockBlobFromReader(dataReader, nil)
} else {
// otherwise use the more complicated method
err = be.saveLarge(ctx, objName, rd)
@ -198,10 +201,10 @@ func (be *Backend) saveLarge(ctx context.Context, objName string, rd restic.Rewi
uploadedBytes += n
// upload it as a new "block", use the base64 hash for the ID
h := restic.Hash(buf)
h := md5.Sum(buf)
id := base64.StdEncoding.EncodeToString(h[:])
debug.Log("PutBlock %v with %d bytes", id, len(buf))
err = file.PutBlock(id, buf, nil)
err = file.PutBlock(id, buf, &storage.PutBlockOptions{ContentMD5: id})
if err != nil {
return errors.Wrap(err, "PutBlock")
}