From f296adb043ad35d1d9ce52dca1b40b903d4d406b Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 10 Oct 2019 11:04:15 +0300 Subject: [PATCH 1/2] Remove usage of unsafe --- gf127/gf127x2.go | 21 +++++++++------------ gf127/gf127x2_test.go | 34 ++++++++++++++++++++++++++-------- tz/avx2.go | 4 ++-- tz/avx2_inline.go | 4 ++-- 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/gf127/gf127x2.go b/gf127/gf127x2.go index 1087050..22ab9a8 100644 --- a/gf127/gf127x2.go +++ b/gf127/gf127x2.go @@ -3,28 +3,25 @@ package gf127 import ( "encoding/binary" "encoding/hex" - "unsafe" ) // GF127x2 represents a pair of elements of GF(2^127) stored together. -type GF127x2 [4]uint64 +type GF127x2 [2]GF127 // Split returns 2 components of pair without additional allocations. func Split(a *GF127x2) (*GF127, *GF127) { - return (*GF127)(unsafe.Pointer(a)), (*GF127)(unsafe.Pointer(&(*a)[2])) + return &a[0], &a[1] } // CombineTo 2 elements of GF(2^127) to the respective components of pair. func CombineTo(a *GF127, b *GF127, c *GF127x2) { - c[0] = a[0] - c[1] = a[1] - c[2] = b[0] - c[3] = b[1] + c[0] = *a + c[1] = *b } // Equal checks if both elements of GF(2^127) pair are equal. func (a *GF127x2) Equal(b *GF127x2) bool { - return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] + return a[0] == b[0] && a[1] == b[1] } // String returns hex-encoded representation, starting with MSB. @@ -37,10 +34,10 @@ func (a *GF127x2) String() string { // ByteArray represents element of GF(2^127) as byte array of length 32. func (a *GF127x2) ByteArray() (buf []byte) { buf = make([]byte, 32) - binary.BigEndian.PutUint64(buf, a[1]) - binary.BigEndian.PutUint64(buf[8:], a[0]) - binary.BigEndian.PutUint64(buf[16:], a[3]) - binary.BigEndian.PutUint64(buf[24:], a[2]) + binary.BigEndian.PutUint64(buf, a[0][1]) + binary.BigEndian.PutUint64(buf[8:], a[0][0]) + binary.BigEndian.PutUint64(buf[16:], a[1][1]) + binary.BigEndian.PutUint64(buf[24:], a[1][0]) return } diff --git a/gf127/gf127x2_test.go b/gf127/gf127x2_test.go index 9baa2d1..73a4a68 100644 --- a/gf127/gf127x2_test.go +++ b/gf127/gf127x2_test.go @@ -11,8 +11,8 @@ var testCasesSplit = []struct { h1 *GF127 h2 *GF127 }{ - {&GF127x2{123, 31, 141, 9}, &GF127{123, 31}, &GF127{141, 9}}, - {&GF127x2{maxUint64, 0, 0, maxUint64}, &GF127{maxUint64, 0}, &GF127{0, maxUint64}}, + {&GF127x2{GF127{123, 31}, GF127{141, 9}}, &GF127{123, 31}, &GF127{141, 9}}, + {&GF127x2{GF127{maxUint64, 0}, GF127{0, maxUint64}}, &GF127{maxUint64, 0}, &GF127{0, maxUint64}}, } func TestSplit(t *testing.T) { @@ -32,9 +32,18 @@ func TestCombineTo(t *testing.T) { } var testCasesMul10x2 = [][2]*GF127x2{ - {&GF127x2{123, 0, 123, 0}, &GF127x2{246, 0, 246, 0}}, - {&GF127x2{maxUint64, 2, 0, 1}, &GF127x2{maxUint64 - 1, 5, 0, 2}}, - {&GF127x2{0, maxUint64 >> 1, maxUint64, 2}, &GF127x2{1 + 1<<63, maxUint64>>1 - 1, maxUint64 - 1, 5}}, + { + &GF127x2{GF127{123, 0}, GF127{123, 0}}, + &GF127x2{GF127{246, 0}, GF127{246, 0}}, + }, + { + &GF127x2{GF127{maxUint64, 2}, GF127{0, 1}}, + &GF127x2{GF127{maxUint64 - 1, 5}, GF127{0, 2}}, + }, + { + &GF127x2{GF127{0, maxUint64 >> 1}, GF127{maxUint64, 2}}, + &GF127x2{GF127{1 + 1<<63, maxUint64>>1 - 1}, GF127{maxUint64 - 1, 5}}, + }, } func TestMul10x2(t *testing.T) { @@ -46,9 +55,18 @@ func TestMul10x2(t *testing.T) { } var testCasesMul11x2 = [][2]*GF127x2{ - {&GF127x2{123, 0, 123, 0}, &GF127x2{141, 0, 141, 0}}, - {&GF127x2{maxUint64, 2, 0, 1}, &GF127x2{1, 7, 0, 3}}, - {&GF127x2{0, maxUint64 >> 1, maxUint64, 2}, &GF127x2{1 + 1<<63, 1, 1, 7}}, + { + &GF127x2{GF127{123, 0}, GF127{123, 0}}, + &GF127x2{GF127{141, 0}, GF127{141, 0}}, + }, + { + &GF127x2{GF127{maxUint64, 2}, GF127{0, 1}}, + &GF127x2{GF127{1, 7}, GF127{0, 3}}, + }, + { + &GF127x2{GF127{0, maxUint64 >> 1}, GF127{maxUint64, 2}}, + &GF127x2{GF127{1 + 1<<63, 1}, GF127{1, 7}}, + }, } func TestMul11x2(t *testing.T) { diff --git a/tz/avx2.go b/tz/avx2.go index 2fe54e0..00ec3b3 100644 --- a/tz/avx2.go +++ b/tz/avx2.go @@ -44,8 +44,8 @@ func (d *digest2) Sum(in []byte) []byte { return append(in, h[:]...) } func (d *digest2) Reset() { - d.x[0] = gf127.GF127x2{1, 0, 0, 0} - d.x[1] = gf127.GF127x2{0, 0, 1, 0} + d.x[0] = gf127.GF127x2{gf127.GF127{1, 0}, gf127.GF127{0, 0}} + d.x[1] = gf127.GF127x2{gf127.GF127{0, 0}, gf127.GF127{1, 0}} } func (d *digest2) Size() int { return hashSize } func (d *digest2) BlockSize() int { return hashBlockSize } diff --git a/tz/avx2_inline.go b/tz/avx2_inline.go index de863b7..286e1b7 100644 --- a/tz/avx2_inline.go +++ b/tz/avx2_inline.go @@ -38,8 +38,8 @@ func (d *digest3) Sum(in []byte) []byte { return append(in, h[:]...) } func (d *digest3) Reset() { - d.x[0] = gf127.GF127x2{1, 0, 0, 0} - d.x[1] = gf127.GF127x2{0, 0, 1, 0} + d.x[0] = gf127.GF127x2{gf127.GF127{1, 0}, gf127.GF127{0, 0}} + d.x[1] = gf127.GF127x2{gf127.GF127{0, 0}, gf127.GF127{1, 0}} } func (d *digest3) Size() int { return hashSize } func (d *digest3) BlockSize() int { return hashBlockSize } From 1d4e7550fc1f01e4a4d4e76c661b89e55d1a0f19 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 10 Oct 2019 11:29:40 +0300 Subject: [PATCH 2/2] Use macros in AVX hash implementation --- tz/avx_amd64.s | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tz/avx_amd64.s b/tz/avx_amd64.s index 063a154..97e41b9 100644 --- a/tz/avx_amd64.s +++ b/tz/avx_amd64.s @@ -1,5 +1,17 @@ #include "textflag.h" +// mul2 multiplicates FROM by 2, stores result in R1 +// and uses R1, R2 and R3 for internal computations. +#define mul2(FROM, TO, R2, R3) \ + VPSLLQ $1, FROM, TO \ + VPALIGNR $8, TO, FROM, R2 \ + PSRLQ $63, R2 \ + MOVUPD ·x127x63(SB), R3 \ + ANDPD TO, R3 \ + VPUNPCKHQDQ R3, R3, R3 \ + XORPD R2, TO \ + XORPD R3, TO + // func mulBitRight(c00, c01, c10, c11, e *[2]uint64) TEXT ·mulBitRight(SB),NOSPLIT,$0 MOVQ c00+0(FP), AX @@ -14,14 +26,7 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0 MOVUPD (DX), X3 // c00 *= 2 - VPSLLQ $1, X0, X5 - VPALIGNR $8, X5, X0, X6 - PSRLQ $63, X6 - MOVUPD ·x127x63(SB), X7 - ANDPD X5, X7 - VPUNPCKHQDQ X7, X7, X7 - XORPD X6, X5 - XORPD X7, X5 + mul2(X0, X5, X6, X7) MOVUPD X5, X0 // c00 += c01 @@ -29,14 +34,7 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0 MOVUPD X0, (AX) // c10 *= 2 - VPSLLQ $1, X2, X5 - VPALIGNR $8, X5, X2, X6 - PSRLQ $63, X6 - MOVUPD ·x127x63(SB), X7 - ANDPD X5, X7 - VPUNPCKHQDQ X7, X7, X7 - XORPD X6, X5 - XORPD X7, X5 + mul2(X2, X5, X6, X7) MOVUPD X5, X2 // c10 += c11