Merge pull request #5 from nspcc-dev/feat/sl2avx2

Make use of AVX2 in Sum() by default

Add benchmarks regarding AVX vs. AVX2 performance.
This commit is contained in:
fyrchik 2019-06-24 10:34:58 +03:00 committed by GitHub
commit 500c652dcc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 193 additions and 12 deletions

View file

@ -26,6 +26,15 @@ are concatable: hash sum of data can be calculated based on hashes of chunks.
The example of how it works can be seen in tests.
# Benchmarks
## AVX vs AVX2 version
```
BenchmarkAVX-8 300 3566248 ns/op 64 B/op 4 allocs/op
BenchmarkAVX2-8 500 2857174 ns/op 64 B/op 2 allocs/op
```
# Contributing
At this moment, we do not accept contributions. Follow us.

View file

@ -21,7 +21,7 @@ type digest struct {
}
// type assertion
var _ hash.Hash = new(digest)
var _ hash.Hash = (*digest)(nil)
var (
minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}}
@ -47,11 +47,10 @@ func (d *digest) checkSum() [hashSize]byte {
}
func (d *digest) byteArray() (b [hashSize]byte) {
var t []byte
for i := 0; i < 4; i++ {
t = d.x[i].ByteArray()
copy(b[i*16:], t)
}
copy(b[:], d.x[0].ByteArray())
copy(b[16:], d.x[1].ByteArray())
copy(b[32:], d.x[2].ByteArray())
copy(b[48:], d.x[3].ByteArray())
return
}
@ -85,11 +84,20 @@ func (d *digest) BlockSize() int {
return hashBlockSize
}
// Sum returnz Tillich-Zémor checksum of data
func Sum(data []byte) [hashSize]byte {
// Sum returnz Tillich-Zémor checksum of data.
// It uses only AVX instructions (no AVX2).
func SumAVX(data []byte) [hashSize]byte {
d := new(digest)
d.Reset()
d.Write(data)
_, _ = d.Write(data) // no errors
return d.checkSum()
}
// Sum returns Tillich-Zémor checksum of data.
func Sum(data []byte) [hashSize]byte {
d := new(digest2)
d.Reset()
_, _ = d.Write(data) // no errors
return d.checkSum()
}

55
tz/hash_avx2.go Normal file
View file

@ -0,0 +1,55 @@
package tz
import (
"hash"
"github.com/nspcc-dev/tzhash/gf127"
)
type digest2 struct {
x [2]gf127.GF127x2
}
var _ hash.Hash = (*digest2)(nil)
func (d *digest2) Write(data []byte) (n int, err error) {
n = len(data)
for _, b := range data {
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>7)&1])
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>6)&1])
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>5)&1])
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>4)&1])
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>3)&1])
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>2)&1])
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>1)&1])
mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>0)&1])
}
return
}
func (d *digest2) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
h := d0.checkSum()
return append(in, h[:]...)
}
func (d *digest2) Reset() {
d.x[0] = gf127.GF127x2{1, 0, 0, 0}
d.x[1] = gf127.GF127x2{0, 0, 1, 0}
}
func (d *digest2) Size() int { return hashSize }
func (d *digest2) BlockSize() int { return hashBlockSize }
func (d *digest2) checkSum() (b [hashSize]byte) {
// Matrix is stored transposed,
// but we need to use order consistent with digest.
h := d.x[0].ByteArray()
copy(b[:], h[:16])
copy(b[32:], h[16:])
h = d.x[1].ByteArray()
copy(b[16:], h[:16])
copy(b[48:], h[16:])
return
}
func mulBitRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, e *gf127.GF127)

View file

@ -2,13 +2,95 @@ package tz
import (
"encoding/hex"
"io"
"math/rand"
"testing"
"github.com/stretchr/testify/require"
)
const benchDataSize = 100000
var testCases = []struct {
input []byte
hash string
}{
{
[]byte{},
"00000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001",
},
{
[]byte{0, 1, 2, 3, 4, 5, 6, 7, 8},
"00000000000001e4a545e5b90fb6882b00000000000000c849cd88f79307f67100000000000000cd0c898cb68356e624000000000000007cbcdc7c5e89b16e4b",
},
{
[]byte{4, 8, 15, 16, 23, 42, 255, 0, 127, 65, 32, 123, 42, 45, 201, 210, 213, 244},
"4db8a8e253903c70ab0efb65fe6de05a36d1dc9f567a147152d0148a86817b2062908d9b026a506007c1118e86901b672a39317c55ee3c10ac8efafa79efe8ee",
},
}
func TestHash(t *testing.T) {
t.Run("test AVX digest", func(t *testing.T) {
d := new(digest)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.checkSum()
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
}
})
t.Run("test AVX2 digest", func(t *testing.T) {
d := new(digest2)
for _, tc := range testCases {
d.Reset()
_, _ = d.Write(tc.input)
sum := d.checkSum()
require.Equal(t, tc.hash, hex.EncodeToString(sum[:]))
}
})
}
func newBuffer() (data []byte) {
data = make([]byte, benchDataSize)
r := rand.New(rand.NewSource(0))
_, err := io.ReadFull(r, data)
if err != nil {
panic("cant initialize buffer")
}
return
}
func BenchmarkAVX(b *testing.B) {
data := newBuffer()
b.ResetTimer()
b.ReportAllocs()
d := new(digest)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.checkSum()
}
}
func BenchmarkAVX2(b *testing.B) {
data := newBuffer()
b.ResetTimer()
b.ReportAllocs()
d := new(digest2)
for i := 0; i < b.N; i++ {
d.Reset()
_, _ = d.Write(data)
d.checkSum()
}
}
func TestHomomorphism(t *testing.T) {
var (
c1, c2 sl2
n int
@ -36,7 +118,7 @@ func TestHash(t *testing.T) {
require.Equal(t, h, c1.ByteArray())
}
var testCases = []struct {
var testCasesConcat = []struct {
Hash string
Parts []string
}{{
@ -62,7 +144,7 @@ func TestConcat(t *testing.T) {
err error
)
for _, tc := range testCases {
for _, tc := range testCasesConcat {
expect, err = hex.DecodeString(tc.Hash)
require.NoError(t, err)
@ -86,7 +168,7 @@ func TestValidate(t *testing.T) {
err error
)
for _, tc := range testCases {
for _, tc := range testCasesConcat {
hash, _ = hex.DecodeString(tc.Hash)
require.NoError(t, err)

View file

@ -60,3 +60,30 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0
XORPD X9, X3
MOVUPD X3, (DX)
RET
// func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64)
TEXT ·mulBitRightx2(SB),NOSPLIT,$0
MOVQ c00c10+0(FP), AX
VMOVDQA (AX), Y0
MOVQ c01c11+8(FP), BX
VMOVDQA (BX), Y8
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ e+16(FP), CX
VBROADCASTI128 (CX), Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y8, (BX)
VMOVDQA Y3, (AX)
RET