dropbox: implement dropbox hasher #1302

This commit is contained in:
Nick Craig-Wood 2017-05-26 14:19:18 +01:00
parent 8e214e838e
commit 8916455e4f
2 changed files with 215 additions and 0 deletions

127
dropbox/dbhash/dbhash.go Normal file
View file

@ -0,0 +1,127 @@
// Package dbhash implements the dropbox hash as described in
//
// https://www.dropbox.com/developers/reference/content-hash
package dbhash
import (
"crypto/sha256"
"hash"
)
const (
// BlockSize of the checksum in bytes.
BlockSize = sha256.BlockSize
// Size of the checksum in bytes.
Size = sha256.BlockSize
bytesPerBlock = 4 * 1024 * 1024
hashReturnedError = "hash function returned error"
)
type digest struct {
n int // bytes written into blockHash so far
blockHash hash.Hash
totalHash hash.Hash
sumCalled bool
writtenMore bool
}
// New returns a new hash.Hash computing the Dropbox checksum.
func New() hash.Hash {
d := &digest{}
d.Reset()
return d
}
// writeBlockHash writes the current block hash into the total hash
func (d *digest) writeBlockHash() {
blockHash := d.blockHash.Sum(nil)
_, err := d.totalHash.Write(blockHash)
if err != nil {
panic(hashReturnedError)
}
// reset counters for blockhash
d.n = 0
d.blockHash.Reset()
}
// Write writes len(p) bytes from p to the underlying data stream. It returns
// the number of bytes written from p (0 <= n <= len(p)) and any error
// encountered that caused the write to stop early. Write must return a non-nil
// error if it returns n < len(p). Write must not modify the slice data, even
// temporarily.
//
// Implementations must not retain p.
func (d *digest) Write(p []byte) (n int, err error) {
n = len(p)
for len(p) > 0 {
d.writtenMore = true
toWrite := bytesPerBlock - d.n
if toWrite > len(p) {
toWrite = len(p)
}
_, err = d.blockHash.Write(p[:toWrite])
if err != nil {
panic(hashReturnedError)
}
d.n += toWrite
p = p[toWrite:]
// Accumulate the total hash
if d.n == bytesPerBlock {
d.writeBlockHash()
}
}
return n, nil
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
//
// TODO(ncw) Sum() can only be called once for this type of hash.
// If you call Sum(), then Write() then Sum() it will result in
// a panic. Calling Write() then Sum(), then Sum() is OK.
func (d *digest) Sum(b []byte) []byte {
if d.sumCalled && d.writtenMore {
panic("digest.Sum() called more than once")
}
d.sumCalled = true
d.writtenMore = false
if d.n != 0 {
d.writeBlockHash()
}
return d.totalHash.Sum(b)
}
// Reset resets the Hash to its initial state.
func (d *digest) Reset() {
d.n = 0
d.totalHash = sha256.New()
d.blockHash = sha256.New()
d.sumCalled = false
d.writtenMore = false
}
// Size returns the number of bytes Sum will return.
func (d *digest) Size() int {
return d.totalHash.Size()
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (d *digest) BlockSize() int {
return d.totalHash.BlockSize()
}
// Sum returns the Dropbox checksum of the data.
func Sum(data []byte) [Size]byte {
var d digest
d.Reset()
d.Write(data)
var out [Size]byte
d.Sum(out[:0])
return out
}
// must implement this interface
var _ hash.Hash = (*digest)(nil)

View file

@ -0,0 +1,88 @@
package dbhash_test
import (
"encoding/hex"
"fmt"
"testing"
"github.com/ncw/rclone/dropbox/dbhash"
"github.com/stretchr/testify/assert"
)
func testChunk(t *testing.T, chunk int) {
data := make([]byte, chunk)
for i := 0; i < chunk; i++ {
data[i] = 'A'
}
for _, test := range []struct {
n int
want string
}{
{0, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"},
{1, "1cd6ef71e6e0ff46ad2609d403dc3fee244417089aa4461245a4e4fe23a55e42"},
{2, "01e0655fb754d10418a73760f57515f4903b298e6d67dda6bf0987fa79c22c88"},
{4096, "8620913d33852befe09f16fff8fd75f77a83160d29f76f07e0276e9690903035"},
{4194303, "647c8627d70f7a7d13ce96b1e7710a771a55d41a62c3da490d92e56044d311fa"},
{4194304, "d4d63bac5b866c71620185392a8a6218ac1092454a2d16f820363b69852befa3"},
{4194305, "8f553da8d00d0bf509d8470e242888be33019c20c0544811f5b2b89e98360b92"},
{8388607, "83b30cf4fb5195b04a937727ae379cf3d06673bf8f77947f6a92858536e8369c"},
{8388608, "e08b3ba1f538804075c5f939accdeaa9efc7b5c01865c94a41e78ca6550a88e7"},
{8388609, "02c8a4aefc2bfc9036f89a7098001865885938ca580e5c9e5db672385edd303c"},
} {
d := dbhash.New()
var toWrite int
for toWrite = test.n; toWrite >= chunk; toWrite -= chunk {
n, err := d.Write(data)
assert.Nil(t, err)
assert.Equal(t, chunk, n)
}
n, err := d.Write(data[:toWrite])
assert.Nil(t, err)
assert.Equal(t, toWrite, n)
got := hex.EncodeToString(d.Sum(nil))
assert.Equal(t, test.want, got, fmt.Sprintf("when testing length %d", n))
}
}
func TestHashChunk16M(t *testing.T) { testChunk(t, 16*1024*1024) }
func TestHashChunk8M(t *testing.T) { testChunk(t, 8*1024*1024) }
func TestHashChunk4M(t *testing.T) { testChunk(t, 4*1024*1024) }
func TestHashChunk2M(t *testing.T) { testChunk(t, 2*1024*1024) }
func TestHashChunk1M(t *testing.T) { testChunk(t, 1*1024*1024) }
func TestHashChunk64k(t *testing.T) { testChunk(t, 64*1024) }
func TestHashChunk32k(t *testing.T) { testChunk(t, 32*1024) }
func TestHashChunk2048(t *testing.T) { testChunk(t, 2048) }
func TestHashChunk2047(t *testing.T) { testChunk(t, 2047) }
func TestSumCalledTwice(t *testing.T) {
d := dbhash.New()
assert.NotPanics(t, func() { d.Sum(nil) })
d.Reset()
assert.NotPanics(t, func() { d.Sum(nil) })
assert.NotPanics(t, func() { d.Sum(nil) })
_, _ = d.Write([]byte{1})
assert.Panics(t, func() { d.Sum(nil) })
}
func TestSize(t *testing.T) {
d := dbhash.New()
assert.Equal(t, 32, d.Size())
}
func TestBlockSize(t *testing.T) {
d := dbhash.New()
assert.Equal(t, 64, d.BlockSize())
}
func TestSum(t *testing.T) {
assert.Equal(t,
[64]byte{
0x1c, 0xd6, 0xef, 0x71, 0xe6, 0xe0, 0xff, 0x46,
0xad, 0x26, 0x09, 0xd4, 0x03, 0xdc, 0x3f, 0xee,
0x24, 0x44, 0x17, 0x08, 0x9a, 0xa4, 0x46, 0x12,
0x45, 0xa4, 0xe4, 0xfe, 0x23, 0xa5, 0x5e, 0x42,
},
dbhash.Sum([]byte{'A'}),
)
}