Add mailru hash (mrhash)
This commit is contained in:
parent
94eb9a4014
commit
bdcd0b4c64
4 changed files with 229 additions and 1 deletions
134
backend/mailru/mrhash/mrhash.go
Normal file
134
backend/mailru/mrhash/mrhash.go
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
// Package mrhash implements the mailru hash, which is a modified SHA1.
|
||||||
|
// If file size is less than or equal to the SHA1 block size (20 bytes),
|
||||||
|
// its hash is simply its data right-padded with zero bytes.
|
||||||
|
// Hash sum of a larger file is computed as a SHA1 sum of the file data
|
||||||
|
// bytes concatenated with a decimal representation of the data length.
|
||||||
|
package mrhash
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha1"
|
||||||
|
"encoding"
|
||||||
|
"encoding/hex"
|
||||||
|
"errors"
|
||||||
|
"hash"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// BlockSize of the checksum in bytes.
|
||||||
|
BlockSize = sha1.BlockSize
|
||||||
|
// Size of the checksum in bytes.
|
||||||
|
Size = sha1.Size
|
||||||
|
startString = "mrCloud"
|
||||||
|
hashError = "hash function returned error"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Global errors
|
||||||
|
var (
|
||||||
|
ErrorInvalidHash = errors.New("invalid hash")
|
||||||
|
)
|
||||||
|
|
||||||
|
type digest struct {
|
||||||
|
total int // bytes written into hash so far
|
||||||
|
sha hash.Hash // underlying SHA1
|
||||||
|
small []byte // small content
|
||||||
|
}
|
||||||
|
|
||||||
|
// New returns a new hash.Hash computing the Mailru checksum.
|
||||||
|
func New() hash.Hash {
|
||||||
|
d := &digest{}
|
||||||
|
d.Reset()
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write writes len(p) bytes from p to the underlying data stream. It returns
|
||||||
|
// the number of bytes written from p (0 <= n <= len(p)) and any error
|
||||||
|
// encountered that caused the write to stop early. Write must return a non-nil
|
||||||
|
// error if it returns n < len(p). Write must not modify the slice data, even
|
||||||
|
// temporarily.
|
||||||
|
//
|
||||||
|
// Implementations must not retain p.
|
||||||
|
func (d *digest) Write(p []byte) (n int, err error) {
|
||||||
|
n, err = d.sha.Write(p)
|
||||||
|
if err != nil {
|
||||||
|
panic(hashError)
|
||||||
|
}
|
||||||
|
d.total += n
|
||||||
|
if d.total <= Size {
|
||||||
|
d.small = append(d.small, p...)
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum appends the current hash to b and returns the resulting slice.
|
||||||
|
// It does not change the underlying hash state.
|
||||||
|
func (d *digest) Sum(b []byte) []byte {
|
||||||
|
// If content is small, return it padded to Size
|
||||||
|
if d.total <= Size {
|
||||||
|
padded := make([]byte, Size)
|
||||||
|
copy(padded, d.small)
|
||||||
|
return append(b, padded...)
|
||||||
|
}
|
||||||
|
endString := strconv.Itoa(d.total)
|
||||||
|
copy, err := cloneSHA1(d.sha)
|
||||||
|
if err == nil {
|
||||||
|
_, err = copy.Write([]byte(endString))
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
panic(hashError)
|
||||||
|
}
|
||||||
|
return copy.Sum(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// cloneSHA1 clones state of SHA1 hash
|
||||||
|
func cloneSHA1(orig hash.Hash) (clone hash.Hash, err error) {
|
||||||
|
state, err := orig.(encoding.BinaryMarshaler).MarshalBinary()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
clone = sha1.New()
|
||||||
|
err = clone.(encoding.BinaryUnmarshaler).UnmarshalBinary(state)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset resets the Hash to its initial state.
|
||||||
|
func (d *digest) Reset() {
|
||||||
|
d.sha = sha1.New()
|
||||||
|
_, _ = d.sha.Write([]byte(startString))
|
||||||
|
d.total = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns the number of bytes Sum will return.
|
||||||
|
func (d *digest) Size() int {
|
||||||
|
return Size
|
||||||
|
}
|
||||||
|
|
||||||
|
// BlockSize returns the hash's underlying block size.
|
||||||
|
// The Write method must be able to accept any amount
|
||||||
|
// of data, but it may operate more efficiently if all writes
|
||||||
|
// are a multiple of the block size.
|
||||||
|
func (d *digest) BlockSize() int {
|
||||||
|
return BlockSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum returns the Mailru checksum of the data.
|
||||||
|
func Sum(data []byte) []byte {
|
||||||
|
var d digest
|
||||||
|
d.Reset()
|
||||||
|
_, _ = d.Write(data)
|
||||||
|
return d.Sum(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DecodeString converts a string to the Mailru hash
|
||||||
|
func DecodeString(s string) ([]byte, error) {
|
||||||
|
b, err := hex.DecodeString(s)
|
||||||
|
if err != nil || len(b) != Size {
|
||||||
|
return nil, ErrorInvalidHash
|
||||||
|
}
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// must implement this interface
|
||||||
|
var (
|
||||||
|
_ hash.Hash = (*digest)(nil)
|
||||||
|
)
|
81
backend/mailru/mrhash/mrhash_test.go
Normal file
81
backend/mailru/mrhash/mrhash_test.go
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
package mrhash_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/rclone/rclone/backend/mailru/mrhash"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func testChunk(t *testing.T, chunk int) {
|
||||||
|
data := make([]byte, chunk)
|
||||||
|
for i := 0; i < chunk; i++ {
|
||||||
|
data[i] = 'A'
|
||||||
|
}
|
||||||
|
for _, test := range []struct {
|
||||||
|
n int
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{0, "0000000000000000000000000000000000000000"},
|
||||||
|
{1, "4100000000000000000000000000000000000000"},
|
||||||
|
{2, "4141000000000000000000000000000000000000"},
|
||||||
|
{19, "4141414141414141414141414141414141414100"},
|
||||||
|
{20, "4141414141414141414141414141414141414141"},
|
||||||
|
{21, "eb1d05e78a18691a5aa196a6c2b60cd40b5faafb"},
|
||||||
|
{22, "037e6d960601118a0639afbeff30fe716c66ed2d"},
|
||||||
|
{4096, "45a16aa192502b010280fb5b44274c601a91fd9f"},
|
||||||
|
{4194303, "fa019d5bd26498cf6abe35e0d61801bf19bf704b"},
|
||||||
|
{4194304, "5ed0e07aa6ea5c1beb9402b4d807258f27d40773"},
|
||||||
|
{4194305, "67bd0b9247db92e0e7d7e29a0947a50fedcb5452"},
|
||||||
|
{8388607, "41a8e2eb044c2e242971b5445d7be2a13fc0dd84"},
|
||||||
|
{8388608, "267a970917c624c11fe624276ec60233a66dc2c0"},
|
||||||
|
{8388609, "37b60b308d553d2732aefb62b3ea88f74acfa13f"},
|
||||||
|
} {
|
||||||
|
d := mrhash.New()
|
||||||
|
var toWrite int
|
||||||
|
for toWrite = test.n; toWrite >= chunk; toWrite -= chunk {
|
||||||
|
n, err := d.Write(data)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, chunk, n)
|
||||||
|
}
|
||||||
|
n, err := d.Write(data[:toWrite])
|
||||||
|
assert.Nil(t, err)
|
||||||
|
assert.Equal(t, toWrite, n)
|
||||||
|
got1 := hex.EncodeToString(d.Sum(nil))
|
||||||
|
assert.Equal(t, test.want, got1, fmt.Sprintf("when testing length %d", n))
|
||||||
|
got2 := hex.EncodeToString(d.Sum(nil))
|
||||||
|
assert.Equal(t, test.want, got2, fmt.Sprintf("when testing length %d (2nd sum)", n))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHashChunk16M(t *testing.T) { testChunk(t, 16*1024*1024) }
|
||||||
|
func TestHashChunk8M(t *testing.T) { testChunk(t, 8*1024*1024) }
|
||||||
|
func TestHashChunk4M(t *testing.T) { testChunk(t, 4*1024*1024) }
|
||||||
|
func TestHashChunk2M(t *testing.T) { testChunk(t, 2*1024*1024) }
|
||||||
|
func TestHashChunk1M(t *testing.T) { testChunk(t, 1*1024*1024) }
|
||||||
|
func TestHashChunk64k(t *testing.T) { testChunk(t, 64*1024) }
|
||||||
|
func TestHashChunk32k(t *testing.T) { testChunk(t, 32*1024) }
|
||||||
|
func TestHashChunk2048(t *testing.T) { testChunk(t, 2048) }
|
||||||
|
func TestHashChunk2047(t *testing.T) { testChunk(t, 2047) }
|
||||||
|
|
||||||
|
func TestSumCalledTwice(t *testing.T) {
|
||||||
|
d := mrhash.New()
|
||||||
|
assert.NotPanics(t, func() { d.Sum(nil) })
|
||||||
|
d.Reset()
|
||||||
|
assert.NotPanics(t, func() { d.Sum(nil) })
|
||||||
|
assert.NotPanics(t, func() { d.Sum(nil) })
|
||||||
|
_, _ = d.Write([]byte{1})
|
||||||
|
assert.NotPanics(t, func() { d.Sum(nil) })
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSize(t *testing.T) {
|
||||||
|
d := mrhash.New()
|
||||||
|
assert.Equal(t, 20, d.Size())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBlockSize(t *testing.T) {
|
||||||
|
d := mrhash.New()
|
||||||
|
assert.Equal(t, 64, d.BlockSize())
|
||||||
|
}
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"github.com/jzelinskie/whirlpool"
|
"github.com/jzelinskie/whirlpool"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/rclone/rclone/backend/dropbox/dbhash"
|
"github.com/rclone/rclone/backend/dropbox/dbhash"
|
||||||
|
"github.com/rclone/rclone/backend/mailru/mrhash"
|
||||||
"github.com/rclone/rclone/backend/onedrive/quickxorhash"
|
"github.com/rclone/rclone/backend/onedrive/quickxorhash"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -44,13 +45,16 @@ const (
|
||||||
// CRC32 indicates CRC-32 support
|
// CRC32 indicates CRC-32 support
|
||||||
CRC32
|
CRC32
|
||||||
|
|
||||||
|
// Mailru indicates Mailru special hash
|
||||||
|
Mailru
|
||||||
|
|
||||||
// None indicates no hashes are supported
|
// None indicates no hashes are supported
|
||||||
None Type = 0
|
None Type = 0
|
||||||
)
|
)
|
||||||
|
|
||||||
// Supported returns a set of all the supported hashes by
|
// Supported returns a set of all the supported hashes by
|
||||||
// HashStream and MultiHasher.
|
// HashStream and MultiHasher.
|
||||||
var Supported = NewHashSet(MD5, SHA1, Dropbox, QuickXorHash, Whirlpool, CRC32)
|
var Supported = NewHashSet(MD5, SHA1, Dropbox, QuickXorHash, Whirlpool, CRC32, Mailru)
|
||||||
|
|
||||||
// Width returns the width in characters for any HashType
|
// Width returns the width in characters for any HashType
|
||||||
var Width = map[Type]int{
|
var Width = map[Type]int{
|
||||||
|
@ -60,6 +64,7 @@ var Width = map[Type]int{
|
||||||
QuickXorHash: 40,
|
QuickXorHash: 40,
|
||||||
Whirlpool: 128,
|
Whirlpool: 128,
|
||||||
CRC32: 8,
|
CRC32: 8,
|
||||||
|
Mailru: 40,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stream will calculate hashes of all supported hash types.
|
// Stream will calculate hashes of all supported hash types.
|
||||||
|
@ -103,6 +108,8 @@ func (h Type) String() string {
|
||||||
return "Whirlpool"
|
return "Whirlpool"
|
||||||
case CRC32:
|
case CRC32:
|
||||||
return "CRC-32"
|
return "CRC-32"
|
||||||
|
case Mailru:
|
||||||
|
return "MailruHash"
|
||||||
default:
|
default:
|
||||||
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
|
err := fmt.Sprintf("internal error: unknown hash type: 0x%x", int(h))
|
||||||
panic(err)
|
panic(err)
|
||||||
|
@ -126,6 +133,8 @@ func (h *Type) Set(s string) error {
|
||||||
*h = Whirlpool
|
*h = Whirlpool
|
||||||
case "CRC-32":
|
case "CRC-32":
|
||||||
*h = CRC32
|
*h = CRC32
|
||||||
|
case "MailruHash":
|
||||||
|
*h = Mailru
|
||||||
default:
|
default:
|
||||||
return errors.Errorf("Unknown hash type %q", s)
|
return errors.Errorf("Unknown hash type %q", s)
|
||||||
}
|
}
|
||||||
|
@ -160,6 +169,8 @@ func fromTypes(set Set) (map[Type]hash.Hash, error) {
|
||||||
hashers[t] = whirlpool.New()
|
hashers[t] = whirlpool.New()
|
||||||
case CRC32:
|
case CRC32:
|
||||||
hashers[t] = crc32.NewIEEE()
|
hashers[t] = crc32.NewIEEE()
|
||||||
|
case Mailru:
|
||||||
|
hashers[t] = mrhash.New()
|
||||||
default:
|
default:
|
||||||
err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
|
err := fmt.Sprintf("internal error: Unsupported hash type %v", t)
|
||||||
panic(err)
|
panic(err)
|
||||||
|
|
|
@ -75,6 +75,7 @@ var hashTestSet = []hashTest{
|
||||||
hash.QuickXorHash: "0110c000085000031c0001095ec00218d0000700",
|
hash.QuickXorHash: "0110c000085000031c0001095ec00218d0000700",
|
||||||
hash.Whirlpool: "eddf52133d4566d763f716e853d6e4efbabd29e2c2e63f56747b1596172851d34c2df9944beb6640dbdbe3d9b4eb61180720a79e3d15baff31c91e43d63869a4",
|
hash.Whirlpool: "eddf52133d4566d763f716e853d6e4efbabd29e2c2e63f56747b1596172851d34c2df9944beb6640dbdbe3d9b4eb61180720a79e3d15baff31c91e43d63869a4",
|
||||||
hash.CRC32: "a6041d7e",
|
hash.CRC32: "a6041d7e",
|
||||||
|
hash.Mailru: "0102030405060708090a0b0c0d0e000000000000",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
// Empty data set
|
// Empty data set
|
||||||
|
@ -87,6 +88,7 @@ var hashTestSet = []hashTest{
|
||||||
hash.QuickXorHash: "0000000000000000000000000000000000000000",
|
hash.QuickXorHash: "0000000000000000000000000000000000000000",
|
||||||
hash.Whirlpool: "19fa61d75522a4669b44e39c1d2e1726c530232130d407f89afee0964997f7a73e83be698b288febcf88e3e03c4f0757ea8964e59b63d93708b138cc42a66eb3",
|
hash.Whirlpool: "19fa61d75522a4669b44e39c1d2e1726c530232130d407f89afee0964997f7a73e83be698b288febcf88e3e03c4f0757ea8964e59b63d93708b138cc42a66eb3",
|
||||||
hash.CRC32: "00000000",
|
hash.CRC32: "00000000",
|
||||||
|
hash.Mailru: "0000000000000000000000000000000000000000",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue