From a967cc9d3dc857de6da5a07faec8225f4ae172d0 Mon Sep 17 00:00:00 2001 From: Evgenii Date: Fri, 21 Jun 2019 18:47:01 +0300 Subject: [PATCH 1/7] Make use of AVX2 in Sum() by default --- tz/hash.go | 56 ++++++++++++++++++++++++++++++++++++++++++----- tz/tzbits_amd64.s | 27 +++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/tz/hash.go b/tz/hash.go index dc46290..dd12474 100644 --- a/tz/hash.go +++ b/tz/hash.go @@ -7,6 +7,7 @@ import ( "errors" "hash" "math" + "unsafe" "github.com/nspcc-dev/tzhash/gf127" ) @@ -16,12 +17,19 @@ const ( hashBlockSize = 128 ) -type digest struct { - x [4]gf127.GF127 -} +type ( + digest struct { + x [4]gf127.GF127 + } -// type assertion -var _ hash.Hash = new(digest) + digest2 digest +) + +// type assertions +var ( + _ hash.Hash = new(digest) + _ hash.Hash = new(digest2) +) var ( minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} @@ -85,9 +93,44 @@ func (d *digest) BlockSize() int { return hashBlockSize } +func (d *digest2) Write(data []byte) (n int, err error) { + n = len(data) + + // We need to transpose matrix, because + // mulBitRightx2 accepts matrix by columns, not rows + a := d.x[1] + d.x[1] = d.x[2] + d.x[2] = a + + h1 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[0])) + h2 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[2])) + for _, b := range data { + mulBitRightx2(h1, h2, &minmax[(b>>7)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>6)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>5)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>4)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>3)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>2)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>1)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>0)&1]) + } + + // transpose matrix back + a = d.x[1] + d.x[1] = d.x[2] + d.x[2] = a + + return +} +func (d *digest2) Sum(b []byte) []byte { return (*digest)(d).Sum(b) } +func (d *digest2) Reset() { (*digest)(d).Reset() } +func (d *digest2) Size() int { return (*digest)(d).Size() } +func (d *digest2) BlockSize() int { return (*digest)(d).BlockSize() } +func (d *digest2) checkSum() [hashSize]byte { return (*digest)(d).checkSum() } + // Sum returnz Tillich-Zémor checksum of data func Sum(data []byte) [hashSize]byte { - d := new(digest) + d := new(digest2) d.Reset() d.Write(data) return d.checkSum() @@ -172,3 +215,4 @@ func SubtractL(c, a []byte) (b []byte, err error) { } func mulBitRight(c00, c01, c10, c11, e *gf127.GF127) +func mulBitRightx2(c00c01 *gf127.GF127x2, c10c11 *gf127.GF127x2, e *gf127.GF127) diff --git a/tz/tzbits_amd64.s b/tz/tzbits_amd64.s index 7ad3a0e..ecd170c 100644 --- a/tz/tzbits_amd64.s +++ b/tz/tzbits_amd64.s @@ -60,3 +60,30 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0 XORPD X9, X3 MOVUPD X3, (DX) RET + + +// func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64) +TEXT ·mulBitRightx2(SB),NOSPLIT,$0 + MOVQ c00c01+0(FP), AX + VMOVDQA (AX), Y0 + MOVQ c10c11+8(FP), BX + VMOVDQA (BX), Y8 + + VPSLLQ $1, Y0, Y1 + VPALIGNR $8, Y1, Y0, Y2 + VPSRLQ $63, Y2, Y2 + VPXOR Y1, Y2, Y2 + VPSRLQ $63, Y1, Y3 + VPSLLQ $63, Y3, Y3 + VPUNPCKHQDQ Y3, Y3, Y3 + VPXOR Y2, Y3, Y3 + + MOVQ e+16(FP), CX + VBROADCASTI128 (CX), Y2 + + VPXOR Y3, Y8, Y3 + VPAND Y3, Y2, Y4 + VPXOR Y4, Y0, Y8 + VMOVDQA Y8, (BX) + VMOVDQA Y3, (AX) + RET From 9485f49f3b512f4728e17354843c99bb1d8cfc78 Mon Sep 17 00:00:00 2001 From: Evgenii Date: Fri, 21 Jun 2019 22:29:08 +0300 Subject: [PATCH 2/7] Get rid of unsafe usage and add tests --- tz/hash.go | 58 ++++++------------------------------------------- tz/hash_avx2.go | 55 ++++++++++++++++++++++++++++++++++++++++++++++ tz/hash_test.go | 48 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 107 insertions(+), 54 deletions(-) create mode 100644 tz/hash_avx2.go diff --git a/tz/hash.go b/tz/hash.go index dd12474..4d9b65f 100644 --- a/tz/hash.go +++ b/tz/hash.go @@ -7,7 +7,6 @@ import ( "errors" "hash" "math" - "unsafe" "github.com/nspcc-dev/tzhash/gf127" ) @@ -17,19 +16,12 @@ const ( hashBlockSize = 128 ) -type ( - digest struct { - x [4]gf127.GF127 - } +type digest struct { + x [4]gf127.GF127 +} - digest2 digest -) - -// type assertions -var ( - _ hash.Hash = new(digest) - _ hash.Hash = new(digest2) -) +// type assertion +var _ hash.Hash = new(digest) var ( minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} @@ -93,46 +85,11 @@ func (d *digest) BlockSize() int { return hashBlockSize } -func (d *digest2) Write(data []byte) (n int, err error) { - n = len(data) - - // We need to transpose matrix, because - // mulBitRightx2 accepts matrix by columns, not rows - a := d.x[1] - d.x[1] = d.x[2] - d.x[2] = a - - h1 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[0])) - h2 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[2])) - for _, b := range data { - mulBitRightx2(h1, h2, &minmax[(b>>7)&1]) - mulBitRightx2(h1, h2, &minmax[(b>>6)&1]) - mulBitRightx2(h1, h2, &minmax[(b>>5)&1]) - mulBitRightx2(h1, h2, &minmax[(b>>4)&1]) - mulBitRightx2(h1, h2, &minmax[(b>>3)&1]) - mulBitRightx2(h1, h2, &minmax[(b>>2)&1]) - mulBitRightx2(h1, h2, &minmax[(b>>1)&1]) - mulBitRightx2(h1, h2, &minmax[(b>>0)&1]) - } - - // transpose matrix back - a = d.x[1] - d.x[1] = d.x[2] - d.x[2] = a - - return -} -func (d *digest2) Sum(b []byte) []byte { return (*digest)(d).Sum(b) } -func (d *digest2) Reset() { (*digest)(d).Reset() } -func (d *digest2) Size() int { return (*digest)(d).Size() } -func (d *digest2) BlockSize() int { return (*digest)(d).BlockSize() } -func (d *digest2) checkSum() [hashSize]byte { return (*digest)(d).checkSum() } - // Sum returnz Tillich-Zémor checksum of data func Sum(data []byte) [hashSize]byte { - d := new(digest2) + d := new(digest) d.Reset() - d.Write(data) + _, _ = d.Write(data) // no errors return d.checkSum() } @@ -215,4 +172,3 @@ func SubtractL(c, a []byte) (b []byte, err error) { } func mulBitRight(c00, c01, c10, c11, e *gf127.GF127) -func mulBitRightx2(c00c01 *gf127.GF127x2, c10c11 *gf127.GF127x2, e *gf127.GF127) diff --git a/tz/hash_avx2.go b/tz/hash_avx2.go new file mode 100644 index 0000000..3acfb56 --- /dev/null +++ b/tz/hash_avx2.go @@ -0,0 +1,55 @@ +package tz + +import ( + "hash" + + "github.com/nspcc-dev/tzhash/gf127" +) + +type digest2 struct { + x [2]gf127.GF127x2 +} + +var _ hash.Hash = new(digest2) + +func (d *digest2) Write(data []byte) (n int, err error) { + n = len(data) + for _, b := range data { + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>7)&1]) + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>6)&1]) + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>5)&1]) + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>4)&1]) + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>3)&1]) + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>2)&1]) + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>1)&1]) + mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>0)&1]) + } + return +} + +func (d *digest2) Sum(in []byte) []byte { + // Make a copy of d so that caller can keep writing and summing. + d0 := *d + h := d0.checkSum() + return append(in, h[:]...) +} +func (d *digest2) Reset() { + d.x[0] = gf127.GF127x2{1, 0, 0, 0} + d.x[1] = gf127.GF127x2{0, 0, 0, 1} +} +func (d *digest2) Size() int { return hashSize } +func (d *digest2) BlockSize() int { return hashBlockSize } +func (d *digest2) checkSum() (b [hashSize]byte) { + // Matrix is stored transposed, + // but we need to use order consistent with digest. + h := d.x[0].ByteArray() + copy(b[:], h[:8]) + copy(b[16:], h[8:]) + + h = d.x[1].ByteArray() + copy(b[8:], h[:8]) + copy(b[24:], h[8:]) + return +} + +func mulBitRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, e *gf127.GF127) diff --git a/tz/hash_test.go b/tz/hash_test.go index a9d5105..08ec0c0 100644 --- a/tz/hash_test.go +++ b/tz/hash_test.go @@ -8,7 +8,49 @@ import ( "github.com/stretchr/testify/require" ) +var testCases = []struct { + input []byte + hash string +}{ + { + []byte{0, 1, 2, 3, 4, 5, 6, 7, 8}, + "00000000000001e4a545e5b90fb6882b00000000000000c849cd88f79307f67100000000000000cd0c898cb68356e624000000000000007cbcdc7c5e89b16e4b", + }, + { + []byte{4, 8, 15, 16, 23, 42, 255, 0, 127, 65, 32, 123, 42, 45, 201, 210, 213, 244}, + "4db8a8e253903c70ab0efb65fe6de05a36d1dc9f567a147152d0148a86817b2062908d9b026a506007c1118e86901b672a39317c55ee3c10ac8efafa79efe8ee", + }, +} + func TestHash(t *testing.T) { + t.Run("test AVX digest", func(t *testing.T) { + d := new(digest) + for _, tc := range testCases { + d.Reset() + _, _ = d.Write(tc.input) + sum := d.checkSum() + hash := hex.EncodeToString(sum[:]) + if hash != tc.hash { + t.Errorf("expected (%s), got (%s)", tc.hash, hash) + } + } + }) + + t.Run("test AVX2 digest", func(t *testing.T) { + d := new(digest) + for _, tc := range testCases { + d.Reset() + _, _ = d.Write(tc.input) + sum := d.checkSum() + hash := hex.EncodeToString(sum[:]) + if hash != tc.hash { + t.Errorf("expected (%s), got (%s)", tc.hash, hash) + } + } + }) +} + +func TestHomomorphism(t *testing.T) { var ( c1, c2 sl2 n int @@ -36,7 +78,7 @@ func TestHash(t *testing.T) { require.Equal(t, h, c1.ByteArray()) } -var testCases = []struct { +var testCasesConcat = []struct { Hash string Parts []string }{{ @@ -62,7 +104,7 @@ func TestConcat(t *testing.T) { err error ) - for _, tc := range testCases { + for _, tc := range testCasesConcat { expect, err = hex.DecodeString(tc.Hash) require.NoError(t, err) @@ -86,7 +128,7 @@ func TestValidate(t *testing.T) { err error ) - for _, tc := range testCases { + for _, tc := range testCasesConcat { hash, _ = hex.DecodeString(tc.Hash) require.NoError(t, err) From eaeceead2fab8d778c3a0fd59def01f49ef9bebe Mon Sep 17 00:00:00 2001 From: Evgenii Date: Fri, 21 Jun 2019 22:40:17 +0300 Subject: [PATCH 3/7] Add benchmarks --- tz/hash_test.go | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tz/hash_test.go b/tz/hash_test.go index 08ec0c0..222f363 100644 --- a/tz/hash_test.go +++ b/tz/hash_test.go @@ -2,12 +2,15 @@ package tz import ( "encoding/hex" + "io" "math/rand" "testing" "github.com/stretchr/testify/require" ) +const benchDataSize = 1000000 + var testCases = []struct { input []byte hash string @@ -50,6 +53,43 @@ func TestHash(t *testing.T) { }) } +func newBuffer() (data []byte) { + data = make([]byte, benchDataSize) + + r := rand.New(rand.NewSource(0)) + _, err := io.ReadFull(r, data) + if err != nil { + panic("cant initialize buffer") + } + return +} + +func BenchmarkAVX(b *testing.B) { + data := newBuffer() + + b.ResetTimer() + b.ReportAllocs() + d := new(digest) + for i := 0; i < b.N; i++ { + d.Reset() + _, _ = d.Write(data) + d.checkSum() + } +} + +func BenchmarkAVX2(b *testing.B) { + data := newBuffer() + + b.ResetTimer() + b.ReportAllocs() + d := new(digest2) + for i := 0; i < b.N; i++ { + d.Reset() + _, _ = d.Write(data) + d.checkSum() + } +} + func TestHomomorphism(t *testing.T) { var ( c1, c2 sl2 From 4b11f50264c54a4d6b0e321ea6068ac1455a8009 Mon Sep 17 00:00:00 2001 From: Evgenii Date: Fri, 21 Jun 2019 23:10:08 +0300 Subject: [PATCH 4/7] Fix error in AVX2 implementation --- tz/hash.go | 22 +++++++++++++++------- tz/hash_avx2.go | 10 +++++----- tz/hash_test.go | 6 +++++- tz/tzbits_amd64.s | 4 ++-- 4 files changed, 27 insertions(+), 15 deletions(-) diff --git a/tz/hash.go b/tz/hash.go index 4d9b65f..018466f 100644 --- a/tz/hash.go +++ b/tz/hash.go @@ -47,11 +47,10 @@ func (d *digest) checkSum() [hashSize]byte { } func (d *digest) byteArray() (b [hashSize]byte) { - var t []byte - for i := 0; i < 4; i++ { - t = d.x[i].ByteArray() - copy(b[i*16:], t) - } + copy(b[:], d.x[0].ByteArray()) + copy(b[16:], d.x[1].ByteArray()) + copy(b[32:], d.x[2].ByteArray()) + copy(b[48:], d.x[3].ByteArray()) return } @@ -85,14 +84,23 @@ func (d *digest) BlockSize() int { return hashBlockSize } -// Sum returnz Tillich-Zémor checksum of data -func Sum(data []byte) [hashSize]byte { +// Sum returnz Tillich-Zémor checksum of data. +// It uses only AVX instructions (no AVX2). +func SumAVX(data []byte) [hashSize]byte { d := new(digest) d.Reset() _, _ = d.Write(data) // no errors return d.checkSum() } +// Sum returns Tillich-Zémor checksum of data. +func Sum(data []byte) [hashSize]byte { + d := new(digest2) + d.Reset() + _, _ = d.Write(data) // no errors + return d.checkSum() +} + // Concat performs combining of hashes based on homomorphic property. func Concat(hs [][]byte) ([]byte, error) { var b, c sl2 diff --git a/tz/hash_avx2.go b/tz/hash_avx2.go index 3acfb56..c64a682 100644 --- a/tz/hash_avx2.go +++ b/tz/hash_avx2.go @@ -35,7 +35,7 @@ func (d *digest2) Sum(in []byte) []byte { } func (d *digest2) Reset() { d.x[0] = gf127.GF127x2{1, 0, 0, 0} - d.x[1] = gf127.GF127x2{0, 0, 0, 1} + d.x[1] = gf127.GF127x2{0, 0, 1, 0} } func (d *digest2) Size() int { return hashSize } func (d *digest2) BlockSize() int { return hashBlockSize } @@ -43,12 +43,12 @@ func (d *digest2) checkSum() (b [hashSize]byte) { // Matrix is stored transposed, // but we need to use order consistent with digest. h := d.x[0].ByteArray() - copy(b[:], h[:8]) - copy(b[16:], h[8:]) + copy(b[:], h[:16]) + copy(b[32:], h[16:]) h = d.x[1].ByteArray() - copy(b[8:], h[:8]) - copy(b[24:], h[8:]) + copy(b[16:], h[:16]) + copy(b[48:], h[16:]) return } diff --git a/tz/hash_test.go b/tz/hash_test.go index 222f363..c79dcc5 100644 --- a/tz/hash_test.go +++ b/tz/hash_test.go @@ -15,6 +15,10 @@ var testCases = []struct { input []byte hash string }{ + { + []byte{}, + "00000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001", + }, { []byte{0, 1, 2, 3, 4, 5, 6, 7, 8}, "00000000000001e4a545e5b90fb6882b00000000000000c849cd88f79307f67100000000000000cd0c898cb68356e624000000000000007cbcdc7c5e89b16e4b", @@ -40,7 +44,7 @@ func TestHash(t *testing.T) { }) t.Run("test AVX2 digest", func(t *testing.T) { - d := new(digest) + d := new(digest2) for _, tc := range testCases { d.Reset() _, _ = d.Write(tc.input) diff --git a/tz/tzbits_amd64.s b/tz/tzbits_amd64.s index ecd170c..063a154 100644 --- a/tz/tzbits_amd64.s +++ b/tz/tzbits_amd64.s @@ -64,9 +64,9 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0 // func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64) TEXT ·mulBitRightx2(SB),NOSPLIT,$0 - MOVQ c00c01+0(FP), AX + MOVQ c00c10+0(FP), AX VMOVDQA (AX), Y0 - MOVQ c10c11+8(FP), BX + MOVQ c01c11+8(FP), BX VMOVDQA (BX), Y8 VPSLLQ $1, Y0, Y1 From e1d9fc80583b02d0925ad69ef95f2e395f917f16 Mon Sep 17 00:00:00 2001 From: Evgenii Date: Fri, 21 Jun 2019 23:16:58 +0300 Subject: [PATCH 5/7] Use testify in tests --- tz/hash_test.go | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tz/hash_test.go b/tz/hash_test.go index c79dcc5..e52ed90 100644 --- a/tz/hash_test.go +++ b/tz/hash_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/require" ) -const benchDataSize = 1000000 +const benchDataSize = 100000 var testCases = []struct { input []byte @@ -36,10 +36,8 @@ func TestHash(t *testing.T) { d.Reset() _, _ = d.Write(tc.input) sum := d.checkSum() - hash := hex.EncodeToString(sum[:]) - if hash != tc.hash { - t.Errorf("expected (%s), got (%s)", tc.hash, hash) - } + + require.Equal(t, tc.hash, hex.EncodeToString(sum[:])) } }) @@ -49,10 +47,8 @@ func TestHash(t *testing.T) { d.Reset() _, _ = d.Write(tc.input) sum := d.checkSum() - hash := hex.EncodeToString(sum[:]) - if hash != tc.hash { - t.Errorf("expected (%s), got (%s)", tc.hash, hash) - } + + require.Equal(t, tc.hash, hex.EncodeToString(sum[:])) } }) } From ad8c7bce1b0c7c623b771094d094180ae47e9904 Mon Sep 17 00:00:00 2001 From: Evgenii Date: Mon, 24 Jun 2019 10:07:16 +0300 Subject: [PATCH 6/7] Fix type assertions --- tz/hash.go | 2 +- tz/hash_avx2.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tz/hash.go b/tz/hash.go index 018466f..b8b9e50 100644 --- a/tz/hash.go +++ b/tz/hash.go @@ -21,7 +21,7 @@ type digest struct { } // type assertion -var _ hash.Hash = new(digest) +var _ hash.Hash = (*digest)(nil) var ( minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} diff --git a/tz/hash_avx2.go b/tz/hash_avx2.go index c64a682..5d43516 100644 --- a/tz/hash_avx2.go +++ b/tz/hash_avx2.go @@ -10,7 +10,7 @@ type digest2 struct { x [2]gf127.GF127x2 } -var _ hash.Hash = new(digest2) +var _ hash.Hash = (*digest2)(nil) func (d *digest2) Write(data []byte) (n int, err error) { n = len(data) From ec6649ba1c46202464d1498d20b9874cc2c682ee Mon Sep 17 00:00:00 2001 From: Evgenii Date: Mon, 24 Jun 2019 10:33:00 +0300 Subject: [PATCH 7/7] Add benchmark result to README --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 58b0b03..d9cd741 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,15 @@ are concatable: hash sum of data can be calculated based on hashes of chunks. The example of how it works can be seen in tests. +# Benchmarks + +## AVX vs AVX2 version + +``` +BenchmarkAVX-8 300 3566248 ns/op 64 B/op 4 allocs/op +BenchmarkAVX2-8 500 2857174 ns/op 64 B/op 2 allocs/op +``` + # Contributing At this moment, we do not accept contributions. Follow us.