Merge pull request #5 from nspcc-dev/feat/sl2avx2
Make use of AVX2 in Sum() by default Add benchmarks regarding AVX vs. AVX2 performance.
This commit is contained in:
commit
500c652dcc
5 changed files with 193 additions and 12 deletions
|
@ -26,6 +26,15 @@ are concatable: hash sum of data can be calculated based on hashes of chunks.
|
|||
|
||||
The example of how it works can be seen in tests.
|
||||
|
||||
# Benchmarks
|
||||
|
||||
## AVX vs AVX2 version
|
||||
|
||||
```
|
||||
BenchmarkAVX-8 300 3566248 ns/op 64 B/op 4 allocs/op
|
||||
BenchmarkAVX2-8 500 2857174 ns/op 64 B/op 2 allocs/op
|
||||
```
|
||||
|
||||
# Contributing
|
||||
|
||||
At this moment, we do not accept contributions. Follow us.
|
||||
|
|
26
tz/hash.go
26
tz/hash.go
|
@ -21,7 +21,7 @@ type digest struct {
|
|||
}
|
||||
|
||||
// type assertion
|
||||
var _ hash.Hash = new(digest)
|
||||
var _ hash.Hash = (*digest)(nil)
|
||||
|
||||
var (
|
||||
minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}}
|
||||
|
@ -47,11 +47,10 @@ func (d *digest) checkSum() [hashSize]byte {
|
|||
}
|
||||
|
||||
func (d *digest) byteArray() (b [hashSize]byte) {
|
||||
var t []byte
|
||||
for i := 0; i < 4; i++ {
|
||||
t = d.x[i].ByteArray()
|
||||
copy(b[i*16:], t)
|
||||
}
|
||||
copy(b[:], d.x[0].ByteArray())
|
||||
copy(b[16:], d.x[1].ByteArray())
|
||||
copy(b[32:], d.x[2].ByteArray())
|
||||
copy(b[48:], d.x[3].ByteArray())
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -85,11 +84,20 @@ func (d *digest) BlockSize() int {
|
|||
return hashBlockSize
|
||||
}
|
||||
|
||||
// Sum returnz Tillich-Zémor checksum of data
|
||||
func Sum(data []byte) [hashSize]byte {
|
||||
// Sum returnz Tillich-Zémor checksum of data.
|
||||
// It uses only AVX instructions (no AVX2).
|
||||
func SumAVX(data []byte) [hashSize]byte {
|
||||
d := new(digest)
|
||||
d.Reset()
|
||||
d.Write(data)
|
||||
_, _ = d.Write(data) // no errors
|
||||
return d.checkSum()
|
||||
}
|
||||
|
||||
// Sum returns Tillich-Zémor checksum of data.
|
||||
func Sum(data []byte) [hashSize]byte {
|
||||
d := new(digest2)
|
||||
d.Reset()
|
||||
_, _ = d.Write(data) // no errors
|
||||
return d.checkSum()
|
||||
}
|
||||
|
||||
|
|
55
tz/hash_avx2.go
Normal file
55
tz/hash_avx2.go
Normal file
|
@ -0,0 +1,55 @@
|
|||
package tz
|
||||
|
||||
import (
|
||||
"hash"
|
||||
|
||||
"github.com/nspcc-dev/tzhash/gf127"
|
||||
)
|
||||
|
||||
type digest2 struct {
|
||||
x [2]gf127.GF127x2
|
||||
}
|
||||
|
||||
var _ hash.Hash = (*digest2)(nil)
|
||||
|
||||
func (d *digest2) Write(data []byte) (n int, err error) {
|
||||
n = len(data)
|
||||
for _, b := range data {
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>7)&1])
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>6)&1])
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>5)&1])
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>4)&1])
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>3)&1])
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>2)&1])
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>1)&1])
|
||||
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>0)&1])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (d *digest2) Sum(in []byte) []byte {
|
||||
// Make a copy of d so that caller can keep writing and summing.
|
||||
d0 := *d
|
||||
h := d0.checkSum()
|
||||
return append(in, h[:]...)
|
||||
}
|
||||
func (d *digest2) Reset() {
|
||||
d.x[0] = gf127.GF127x2{1, 0, 0, 0}
|
||||
d.x[1] = gf127.GF127x2{0, 0, 1, 0}
|
||||
}
|
||||
func (d *digest2) Size() int { return hashSize }
|
||||
func (d *digest2) BlockSize() int { return hashBlockSize }
|
||||
func (d *digest2) checkSum() (b [hashSize]byte) {
|
||||
// Matrix is stored transposed,
|
||||
// but we need to use order consistent with digest.
|
||||
h := d.x[0].ByteArray()
|
||||
copy(b[:], h[:16])
|
||||
copy(b[32:], h[16:])
|
||||
|
||||
h = d.x[1].ByteArray()
|
||||
copy(b[16:], h[:16])
|
||||
copy(b[48:], h[16:])
|
||||
return
|
||||
}
|
||||
|
||||
func mulBitRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, e *gf127.GF127)
|
|
@ -2,13 +2,95 @@ package tz
|
|||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const benchDataSize = 100000
|
||||
|
||||
var testCases = []struct {
|
||||
input []byte
|
||||
hash string
|
||||
}{
|
||||
{
|
||||
[]byte{},
|
||||
"00000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001",
|
||||
},
|
||||
{
|
||||
[]byte{0, 1, 2, 3, 4, 5, 6, 7, 8},
|
||||
"00000000000001e4a545e5b90fb6882b00000000000000c849cd88f79307f67100000000000000cd0c898cb68356e624000000000000007cbcdc7c5e89b16e4b",
|
||||
},
|
||||
{
|
||||
[]byte{4, 8, 15, 16, 23, 42, 255, 0, 127, 65, 32, 123, 42, 45, 201, 210, 213, 244},
|
||||
"4db8a8e253903c70ab0efb65fe6de05a36d1dc9f567a147152d0148a86817b2062908d9b026a506007c1118e86901b672a39317c55ee3c10ac8efafa79efe8ee",
|
||||
},
|
||||
}
|
||||
|
||||
func TestHash(t *testing.T) {
|
||||
t.Run("test AVX digest", func(t *testing.T) {
|
||||
d := new(digest)
|
||||
for _, tc := range testCases {
|
||||
d.Reset()
|
||||
_, _ = d.Write(tc.input)
|
||||
sum := d.checkSum()
|
||||
|
||||
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("test AVX2 digest", func(t *testing.T) {
|
||||
d := new(digest2)
|
||||
for _, tc := range testCases {
|
||||
d.Reset()
|
||||
_, _ = d.Write(tc.input)
|
||||
sum := d.checkSum()
|
||||
|
||||
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func newBuffer() (data []byte) {
|
||||
data = make([]byte, benchDataSize)
|
||||
|
||||
r := rand.New(rand.NewSource(0))
|
||||
_, err := io.ReadFull(r, data)
|
||||
if err != nil {
|
||||
panic("cant initialize buffer")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func BenchmarkAVX(b *testing.B) {
|
||||
data := newBuffer()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
d := new(digest)
|
||||
for i := 0; i < b.N; i++ {
|
||||
d.Reset()
|
||||
_, _ = d.Write(data)
|
||||
d.checkSum()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAVX2(b *testing.B) {
|
||||
data := newBuffer()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
d := new(digest2)
|
||||
for i := 0; i < b.N; i++ {
|
||||
d.Reset()
|
||||
_, _ = d.Write(data)
|
||||
d.checkSum()
|
||||
}
|
||||
}
|
||||
|
||||
func TestHomomorphism(t *testing.T) {
|
||||
var (
|
||||
c1, c2 sl2
|
||||
n int
|
||||
|
@ -36,7 +118,7 @@ func TestHash(t *testing.T) {
|
|||
require.Equal(t, h, c1.ByteArray())
|
||||
}
|
||||
|
||||
var testCases = []struct {
|
||||
var testCasesConcat = []struct {
|
||||
Hash string
|
||||
Parts []string
|
||||
}{{
|
||||
|
@ -62,7 +144,7 @@ func TestConcat(t *testing.T) {
|
|||
err error
|
||||
)
|
||||
|
||||
for _, tc := range testCases {
|
||||
for _, tc := range testCasesConcat {
|
||||
expect, err = hex.DecodeString(tc.Hash)
|
||||
require.NoError(t, err)
|
||||
|
||||
|
@ -86,7 +168,7 @@ func TestValidate(t *testing.T) {
|
|||
err error
|
||||
)
|
||||
|
||||
for _, tc := range testCases {
|
||||
for _, tc := range testCasesConcat {
|
||||
hash, _ = hex.DecodeString(tc.Hash)
|
||||
require.NoError(t, err)
|
||||
|
||||
|
|
|
@ -60,3 +60,30 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0
|
|||
XORPD X9, X3
|
||||
MOVUPD X3, (DX)
|
||||
RET
|
||||
|
||||
|
||||
// func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64)
|
||||
TEXT ·mulBitRightx2(SB),NOSPLIT,$0
|
||||
MOVQ c00c10+0(FP), AX
|
||||
VMOVDQA (AX), Y0
|
||||
MOVQ c01c11+8(FP), BX
|
||||
VMOVDQA (BX), Y8
|
||||
|
||||
VPSLLQ $1, Y0, Y1
|
||||
VPALIGNR $8, Y1, Y0, Y2
|
||||
VPSRLQ $63, Y2, Y2
|
||||
VPXOR Y1, Y2, Y2
|
||||
VPSRLQ $63, Y1, Y3
|
||||
VPSLLQ $63, Y3, Y3
|
||||
VPUNPCKHQDQ Y3, Y3, Y3
|
||||
VPXOR Y2, Y3, Y3
|
||||
|
||||
MOVQ e+16(FP), CX
|
||||
VBROADCASTI128 (CX), Y2
|
||||
|
||||
VPXOR Y3, Y8, Y3
|
||||
VPAND Y3, Y2, Y4
|
||||
VPXOR Y4, Y0, Y8
|
||||
VMOVDQA Y8, (BX)
|
||||
VMOVDQA Y3, (AX)
|
||||
RET
|
||||
|
|
Loading…
Reference in a new issue