diff --git a/gf127/arith.go b/gf127/arith.go deleted file mode 100644 index 1a5243f..0000000 --- a/gf127/arith.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2019 (c) NSPCC -// -// Package gf127 implements the GF(2^127) arithmetic -// modulo reduction polynomial x^127 + x^63 + 1 . -// Implementation is in pure Go. -package gf127 - -import ( - "math/bits" -) - -var ( - // x126x631 is reduction polynomial x^127+x^63+1 - x127x631 = GF127{msb64 + 1, msb64} -) - -// New constructs new element of GF(2^127) as hi*x^64 + lo. -// It is assumed that hi has zero MSB. -func New(lo, hi uint64) *GF127 { - return &GF127{lo, hi} -} - -// Inv sets b to a^-1 -// Algorithm is based on Extended Euclidean Algorithm -// and is described by Hankerson, Hernandez, Menezes in -// https://link.springer.com/content/pdf/10.1007/3-540-44499-8_1.pdf -func Inv(a, b *GF127) { - var ( - v = x127x631 - u = *a - c, d = &GF127{1, 0}, &GF127{0, 0} - t = new(GF127) - x *GF127 - ) - - // degree of polynomial is a position of most significant bit - for du, dv := msb(&u), msb(&v); du != 0; du, dv = msb(&u), msb(&v) { - if du < dv { - v, u = u, v - dv, du = du, dv - d, c = c, d - } - - x = xN(du - dv) - - Mul(x, &v, t) - Add(&u, t, &u) - - // becasuse mul performs reduction on t, we need - // manually reduce u at first step - if msb(&u) == 127 { - Add(&u, &x127x631, &u) - } - - Mul(x, d, t) - Add(c, t, c) - } - *b = *c -} - -func xN(n int) *GF127 { - if n < 64 { - return &GF127{1 << uint(n), 0} - } - return &GF127{0, 1 << uint(n-64)} -} - -func msb(a *GF127) (x int) { - x = bits.LeadingZeros64(a[1]) - if x == 64 { - x = bits.LeadingZeros64(a[0]) + 64 - } - return 127 - x -} - -// Mul1 copies a to b. -func Mul1(a, b *GF127) { - b[0] = a[0] - b[1] = a[1] -} - -// And sets c to a & b (bitwise-and). -func And(a, b, c *GF127) { - c[0] = a[0] & b[0] - c[1] = a[1] & b[1] -} - -// Add sets c to a+b. -func Add(a, b, c *GF127) { - c[0] = a[0] ^ b[0] - c[1] = a[1] ^ b[1] -} - -// Mul sets c to a*b. -func Mul(a, b, c *GF127) { - r := new(GF127) - d := *a - for i := uint(0); i < 64; i++ { - if b[0]&(1<> 63 - b[0] = a[0] << 1 - b[1] = (a[1] << 1) ^ c - - mask := b[1] & msb64 - b[0] ^= mask | (mask >> 63) - b[1] ^= mask -} - -// Mul11 sets b to a*(x+1). -func Mul11(a, b *GF127) { - c := a[0] >> 63 - b[0] = a[0] ^ (a[0] << 1) - b[1] = a[1] ^ (a[1] << 1) ^ c - - mask := b[1] & msb64 - b[0] ^= mask | (mask >> 63) - b[1] ^= mask -} diff --git a/gf127/avx/gf127.go b/gf127/avx/gf127.go deleted file mode 100644 index e77aeb2..0000000 --- a/gf127/avx/gf127.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2018 (c) NSPCC -// -// Package gf127 implements the GF(2^127) arithmetic -// modulo reduction polynomial x^127 + x^63 + 1 . -// This is rather straight-forward re-implementation of C library -// available here https://github.com/srijs/hwsl2-core . -// Interfaces are highly influenced by math/big . -package avx - -import ( - "github.com/nspcc-dev/tzhash/gf127" -) - -// GF127 is an alias for a main type. -type GF127 = gf127.GF127 - -const msb64 = uint64(1) << 63 - -// x127x63 represents x^127 + x^63. Used in assembly file. -var x127x63 = GF127{msb64, msb64} //nolint:deadcode,varcheck - -// Add sets c to a+b. -func Add(a, b, c *GF127) - -// Mul sets c to a*b. -func Mul(a, b, c *GF127) - -// Mul10 sets b to a*x. -func Mul10(a, b *GF127) - -// Mul11 sets b to a*(x+1). -func Mul11(a, b *GF127) diff --git a/gf127/avx/gf127_test.go b/gf127/avx/gf127_test.go deleted file mode 100644 index 9da629c..0000000 --- a/gf127/avx/gf127_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package avx - -import ( - "testing" - - "github.com/nspcc-dev/tzhash/gf127" - "github.com/stretchr/testify/require" -) - -const maxUint64 = ^uint64(0) - -func TestAdd(t *testing.T) { - var ( - a = gf127.Random() - b = gf127.Random() - e = &GF127{a[0] ^ b[0], a[1] ^ b[1]} - c = new(GF127) - ) - Add(a, b, c) - require.Equal(t, e, c) -} - -var testCasesMul = [][3]*GF127{ - // (x+1)*(x^63+x^62+...+1) == x^64+1 - {&GF127{3, 0}, &GF127{maxUint64, 0}, &GF127{1, 1}}, - - // x^126 * x^2 == x^128 == x^64 + x - {&GF127{0, 1 << 62}, &GF127{4, 0}, &GF127{2, 1}}, - - // (x^64+x^63+1) * (x^64+x) == x^128+x^65+x^127+x^64+x^64+x == x^65+x^64+x^63+1 - {&GF127{1 + 1<<63, 1}, &GF127{2, 1}, &GF127{0x8000000000000001, 3}}, -} - -func TestMul(t *testing.T) { - c := new(GF127) - for _, tc := range testCasesMul { - Mul(tc[0], tc[1], c) - require.Equal(t, tc[2], c) - } -} - -var testCasesMul10 = [][2]*GF127{ - {&GF127{123, 0}, &GF127{246, 0}}, - {&GF127{maxUint64, 2}, &GF127{maxUint64 - 1, 5}}, - {&GF127{0, maxUint64 >> 1}, &GF127{1 + 1<<63, maxUint64>>1 - 1}}, -} - -func TestMul10(t *testing.T) { - c := new(GF127) - for _, tc := range testCasesMul10 { - Mul10(tc[0], c) - require.Equal(t, tc[1], c) - } -} - -var testCasesMul11 = [][2]*GF127{ - {&GF127{123, 0}, &GF127{141, 0}}, - {&GF127{maxUint64, 2}, &GF127{1, 7}}, - {&GF127{0, maxUint64 >> 1}, &GF127{1 + 1<<63, 1}}, -} - -func TestMul11(t *testing.T) { - c := new(GF127) - for _, tc := range testCasesMul11 { - Mul11(tc[0], c) - require.Equal(t, tc[1], c) - } -} diff --git a/gf127/doc.go b/gf127/doc.go new file mode 100644 index 0000000..1b100c8 --- /dev/null +++ b/gf127/doc.go @@ -0,0 +1,7 @@ +// Package gf127 implements the GF(2^127) arithmetic +// modulo reduction polynomial x^127 + x^63 + 1 . +// gf127.go contains common definitions. +// Other files contain architecture-specific implementations. +// +// Copyright 2019 (c) NSPCC +package gf127 diff --git a/gf127/gf127.go b/gf127/gf127.go index de3c276..414f795 100644 --- a/gf127/gf127.go +++ b/gf127/gf127.go @@ -4,17 +4,135 @@ import ( "encoding/binary" "encoding/hex" "errors" + "math/bits" "math/rand" ) +// GF127 represents element of GF(2^127) +type GF127 [2]uint64 + const ( byteSize = 16 maxUint64 = ^uint64(0) msb64 = uint64(1) << 63 ) -// GF127 represents element of GF(2^127) -type GF127 [2]uint64 +// x127x631 is reduction polynomial x^127 + x^63 + 1 +var x127x631 = GF127{msb64 + 1, msb64} + +// New constructs new element of GF(2^127) as hi*x^64 + lo. +// It is assumed that hi has zero MSB. +func New(lo, hi uint64) *GF127 { + return &GF127{lo, hi} +} + +func addGeneric(a, b, c *GF127) { + c[0] = a[0] ^ b[0] + c[1] = a[1] ^ b[1] +} + +func mulGeneric(a, b, c *GF127) { + r := new(GF127) + d := *a + for i := uint(0); i < 64; i++ { + if b[0]&(1<> 63 + b[0] = a[0] << 1 + b[1] = (a[1] << 1) ^ c + + mask := b[1] & msb64 + b[0] ^= mask | (mask >> 63) + b[1] ^= mask +} + +func mul11Generic(a, b *GF127) { + c := a[0] >> 63 + b[0] = a[0] ^ (a[0] << 1) + b[1] = a[1] ^ (a[1] << 1) ^ c + + mask := b[1] & msb64 + b[0] ^= mask | (mask >> 63) + b[1] ^= mask +} + +// Inv sets b to a^-1 +// Algorithm is based on Extended Euclidean Algorithm +// and is described by Hankerson, Hernandez, Menezes in +// https://link.springer.com/content/pdf/10.1007/3-540-44499-8_1.pdf +func Inv(a, b *GF127) { + var ( + v = x127x631 + u = *a + c, d = &GF127{1, 0}, &GF127{0, 0} + t = new(GF127) + x *GF127 + ) + + // degree of polynomial is a position of most significant bit + for du, dv := msb(&u), msb(&v); du != 0; du, dv = msb(&u), msb(&v) { + if du < dv { + v, u = u, v + dv, du = du, dv + d, c = c, d + } + + x = xN(du - dv) + + Mul(x, &v, t) + Add(&u, t, &u) + + // becasuse mulAVX performs reduction on t, we need + // manually reduce u at first step + if msb(&u) == 127 { + Add(&u, &x127x631, &u) + } + + Mul(x, d, t) + Add(c, t, c) + } + *b = *c +} + +func xN(n int) *GF127 { + if n < 64 { + return &GF127{1 << uint(n), 0} + } + return &GF127{0, 1 << uint(n-64)} +} + +func msb(a *GF127) (x int) { + x = bits.LeadingZeros64(a[1]) + if x == 64 { + x = bits.LeadingZeros64(a[0]) + 64 + } + return 127 - x +} + +// Mul1 copies b into a. +func Mul1(a, b *GF127) { + a[0] = b[0] + a[1] = b[1] +} + +// And sets c to a & b (bitwise-and). +func And(a, b, c *GF127) { + c[0] = a[0] & b[0] + c[1] = a[1] & b[1] +} // Random returns random element from GF(2^127). // Is used mostly for testing. @@ -24,7 +142,8 @@ func Random() *GF127 { // String returns hex-encoded representation, starting with MSB. func (c *GF127) String() string { - return hex.EncodeToString(c.ByteArray()) + buf := c.ByteArray() + return hex.EncodeToString(buf[:]) } // Equals checks if two reduced (zero MSB) elements of GF(2^127) are equal @@ -33,16 +152,17 @@ func (c *GF127) Equals(b *GF127) bool { } // ByteArray represents element of GF(2^127) as byte array of length 16. -func (c *GF127) ByteArray() (buf []byte) { - buf = make([]byte, 16) +func (c *GF127) ByteArray() []byte { + buf := make([]byte, 16) binary.BigEndian.PutUint64(buf[:8], c[1]) binary.BigEndian.PutUint64(buf[8:], c[0]) - return + return buf } // MarshalBinary implements encoding.BinaryMarshaler. func (c *GF127) MarshalBinary() (data []byte, err error) { - return c.ByteArray(), nil + buf := c.ByteArray() + return buf[:], nil } // UnmarshalBinary implements encoding.BinaryUnmarshaler. diff --git a/gf127/gf127_amd64.go b/gf127/gf127_amd64.go new file mode 100644 index 0000000..186666a --- /dev/null +++ b/gf127/gf127_amd64.go @@ -0,0 +1,55 @@ +//go:build amd64 && !generic +// +build amd64,!generic + +// Package gf127 implements the GF(2^127) arithmetic +// modulo reduction polynomial x^127 + x^63 + 1 . +// This is rather straight-forward re-implementation of C library +// available here https://github.com/srijs/hwsl2-core . +// Interfaces are highly influenced by math/big . +package gf127 + +import "golang.org/x/sys/cpu" + +// x127x63 represents x^127 + x^63 +var x127x63 = GF127{msb64, msb64} //nolint:deadcode,varcheck + +// Add sets c to a+b. +func Add(a, b, c *GF127) { + if cpu.X86.HasAVX { + addAVX(a, b, c) + } else { + addGeneric(a, b, c) + } +} + +// Mul sets c to a*b. +func Mul(a, b, c *GF127) { + if cpu.X86.HasAVX { + mulAVX(a, b, c) + } else { + mulGeneric(a, b, c) + } +} + +// Mul10 sets b to a*x. +func Mul10(a, b *GF127) { + if cpu.X86.HasAVX { + mul10AVX(a, b) + } else { + mul10Generic(a, b) + } +} + +// Mul11 sets b to a*(x+1). +func Mul11(a, b *GF127) { + if cpu.X86.HasAVX { + mul11AVX(a, b) + } else { + mul11Generic(a, b) + } +} + +func addAVX(a, b, c *GF127) +func mulAVX(a, b, c *GF127) +func mul10AVX(a, b *GF127) +func mul11AVX(a, b *GF127) diff --git a/gf127/avx/gf127_amd64.s b/gf127/gf127_amd64.s similarity index 93% rename from gf127/avx/gf127_amd64.s rename to gf127/gf127_amd64.s index 281efc6..3e64b50 100644 --- a/gf127/avx/gf127_amd64.s +++ b/gf127/gf127_amd64.s @@ -1,7 +1,7 @@ #include "textflag.h" // func Add(a, b, c *[2]uint64) -TEXT ·Add(SB), NOSPLIT, $0 +TEXT ·addAVX(SB), NOSPLIT, $0 MOVQ a+0(FP), AX MOVUPD (AX), X0 MOVQ b+8(FP), BX @@ -12,7 +12,7 @@ TEXT ·Add(SB), NOSPLIT, $0 RET // func Mul10(a, b *[2]uint64) -TEXT ·Mul10(SB), NOSPLIT, $0 +TEXT ·mul10AVX(SB), NOSPLIT, $0 MOVQ a+0(FP), AX MOVUPD (AX), X0 VPSLLQ $1, X0, X1 @@ -28,7 +28,7 @@ TEXT ·Mul10(SB), NOSPLIT, $0 RET // func Mul11(a, b *[2]uint64) -TEXT ·Mul11(SB), NOSPLIT, $0 +TEXT ·mul11AVX(SB), NOSPLIT, $0 MOVQ a+0(FP), AX MOVUPD (AX), X0 VPSLLQ $1, X0, X1 @@ -45,7 +45,7 @@ TEXT ·Mul11(SB), NOSPLIT, $0 RET // func Mul(a, b, c *[2]uint64) -TEXT ·Mul(SB), NOSPLIT, $0 +TEXT ·mulAVX(SB), NOSPLIT, $0 MOVQ a+0(FP), AX // X0 = a0 . a1 MOVUPD (AX), X0 // X0 = a0 . a1 MOVQ b+8(FP), BX // X1 = b0 . b1 diff --git a/gf127/gf127_generic.go b/gf127/gf127_generic.go new file mode 100644 index 0000000..33919d9 --- /dev/null +++ b/gf127/gf127_generic.go @@ -0,0 +1,24 @@ +//go:build !amd64 || generic +// +build !amd64 generic + +package gf127 + +// Add sets c to a+b. +func Add(a, b, c *GF127) { + addGeneric(a, b, c) +} + +// Mul sets c to a*b. +func Mul(a, b, c *GF127) { + mulGeneric(a, b, c) +} + +// Mul10 sets b to a*x. +func Mul10(a, b *GF127) { + mul10Generic(a, b) +} + +// Mul11 sets b to a*(x+1). +func Mul11(a, b *GF127) { + mul11Generic(a, b) +} diff --git a/gf127/arith_test.go b/gf127/gf127_test.go similarity index 100% rename from gf127/arith_test.go rename to gf127/gf127_test.go diff --git a/gf127/avx2/gf127x2.go b/gf127/gf127x2.go similarity index 81% rename from gf127/avx2/gf127x2.go rename to gf127/gf127x2.go index 7dbca58..12d5cef 100644 --- a/gf127/avx2/gf127x2.go +++ b/gf127/gf127x2.go @@ -1,18 +1,23 @@ -package avx2 +package gf127 import ( "encoding/binary" "encoding/hex" - - "github.com/nspcc-dev/tzhash/gf127" ) -// GF127 is an alias for a main type. -type GF127 = gf127.GF127 - // GF127x2 represents a pair of elements of GF(2^127) stored together. type GF127x2 [2]GF127 +func mul10x2Generic(a, b *GF127x2) { + mul10Generic(&a[0], &b[0]) + mul10Generic(&a[1], &b[1]) +} + +func mul11x2Generic(a, b *GF127x2) { + mul11Generic(&a[0], &b[0]) + mul11Generic(&a[1], &b[1]) +} + // Split returns 2 components of pair without additional allocations. func Split(a *GF127x2) (*GF127, *GF127) { return &a[0], &a[1] @@ -45,9 +50,3 @@ func (a *GF127x2) ByteArray() (buf []byte) { binary.BigEndian.PutUint64(buf[24:], a[1][0]) return } - -// Mul10x2 sets (b1, b2) to (a1*x, a2*x) -func Mul10x2(a, b *GF127x2) - -// Mul10x2 sets (b1, b2) to (a1*(x+1), a2*(x+1)) -func Mul11x2(a, b *GF127x2) diff --git a/gf127/gf127x2_amd64.go b/gf127/gf127x2_amd64.go new file mode 100644 index 0000000..6cf8787 --- /dev/null +++ b/gf127/gf127x2_amd64.go @@ -0,0 +1,27 @@ +//go:build amd64 && !generic +// +build amd64,!generic + +package gf127 + +import "golang.org/x/sys/cpu" + +// Mul10x2 sets (b1, b2) to (a1*x, a2*x) +func Mul10x2(a, b *GF127x2) { + if cpu.X86.HasAVX && cpu.X86.HasAVX2 { + mul10x2AVX2(a, b) + } else { + mul10x2Generic(a, b) + } +} + +// Mul11x2 sets (b1, b2) to (a1*(x+1), a2*(x+1)) +func Mul11x2(a, b *GF127x2) { + if cpu.X86.HasAVX && cpu.X86.HasAVX2 { + mul11x2AVX2(a, b) + } else { + mul11x2Generic(a, b) + } +} + +func mul10x2AVX2(a, b *GF127x2) +func mul11x2AVX2(a, b *GF127x2) diff --git a/gf127/avx2/gf127x2_amd64.s b/gf127/gf127x2_amd64.s similarity index 90% rename from gf127/avx2/gf127x2_amd64.s rename to gf127/gf127x2_amd64.s index ac708e5..9596be5 100644 --- a/gf127/avx2/gf127x2_amd64.s +++ b/gf127/gf127x2_amd64.s @@ -1,7 +1,7 @@ #include "textflag.h" // func Mul10x2(a, b) *[4]uint64 -TEXT ·Mul10x2(SB), NOSPLIT, $0 +TEXT ·mul10x2AVX2(SB), NOSPLIT, $0 MOVQ a+0(FP), AX VMOVDQA (AX), Y0 VPSLLQ $1, Y0, Y1 @@ -17,7 +17,7 @@ TEXT ·Mul10x2(SB), NOSPLIT, $0 RET // func Mul11x2(a, b) *[4]uint64 -TEXT ·Mul11x2(SB), NOSPLIT, $0 +TEXT ·mul11x2AVX2(SB), NOSPLIT, $0 MOVQ a+0(FP), AX VMOVDQA (AX), Y0 VPSLLQ $1, Y0, Y1 diff --git a/gf127/gf127x2_generic.go b/gf127/gf127x2_generic.go new file mode 100644 index 0000000..4ca4c36 --- /dev/null +++ b/gf127/gf127x2_generic.go @@ -0,0 +1,14 @@ +//go:build !(amd64 && !generic) +// +build !amd64 generic + +package gf127 + +// Mul10x2 sets (b1, b2) to (a1*x, a2*x) +func Mul10x2(a, b *GF127x2) { + mul10x2Generic(a, b) +} + +// Mul11x2 sets (b1, b2) to (a1*(x+1), a2*(x+1)) +func Mul11x2(a, b *GF127x2) { + mul11x2Generic(a, b) +} diff --git a/gf127/avx2/gf127x2_test.go b/gf127/gf127x2_test.go similarity index 97% rename from gf127/avx2/gf127x2_test.go rename to gf127/gf127x2_test.go index 5de5865..73a4a68 100644 --- a/gf127/avx2/gf127x2_test.go +++ b/gf127/gf127x2_test.go @@ -1,4 +1,4 @@ -package avx2 +package gf127 import ( "testing" @@ -6,8 +6,6 @@ import ( "github.com/stretchr/testify/require" ) -const maxUint64 = ^uint64(0) - var testCasesSplit = []struct { num *GF127x2 h1 *GF127 diff --git a/tz/avx2.go b/tz/avx2.go index 48c7025..b41c182 100644 --- a/tz/avx2.go +++ b/tz/avx2.go @@ -3,11 +3,11 @@ package tz import ( "hash" - "github.com/nspcc-dev/tzhash/gf127/avx2" + "github.com/nspcc-dev/tzhash/gf127" ) type digest2 struct { - x [2]avx2.GF127x2 + x [2]gf127.GF127x2 } // type assertion @@ -41,8 +41,8 @@ func (d *digest2) Sum(in []byte) []byte { return append(in, h[:]...) } func (d *digest2) Reset() { - d.x[0] = avx2.GF127x2{GF127{1, 0}, GF127{0, 0}} - d.x[1] = avx2.GF127x2{GF127{0, 0}, GF127{1, 0}} + d.x[0] = gf127.GF127x2{GF127{1, 0}, GF127{0, 0}} + d.x[1] = gf127.GF127x2{GF127{0, 0}, GF127{1, 0}} } func (d *digest2) Size() int { return Size } func (d *digest2) BlockSize() int { return hashBlockSize } @@ -59,4 +59,4 @@ func (d *digest2) checkSum() (b [Size]byte) { return } -func mulBitRightx2(c00c10 *avx2.GF127x2, c01c11 *avx2.GF127x2, e *GF127) +func mulBitRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, e *GF127) diff --git a/tz/avx2_inline.go b/tz/avx2_inline.go index eb30a9a..f5fcecf 100644 --- a/tz/avx2_inline.go +++ b/tz/avx2_inline.go @@ -3,11 +3,11 @@ package tz import ( "hash" - "github.com/nspcc-dev/tzhash/gf127/avx2" + "github.com/nspcc-dev/tzhash/gf127" ) type digest3 struct { - x [2]avx2.GF127x2 + x [2]gf127.GF127x2 } // type assertion @@ -34,8 +34,8 @@ func (d *digest3) Sum(in []byte) []byte { return append(in, h[:]...) } func (d *digest3) Reset() { - d.x[0] = avx2.GF127x2{GF127{1, 0}, GF127{0, 0}} - d.x[1] = avx2.GF127x2{GF127{0, 0}, GF127{1, 0}} + d.x[0] = gf127.GF127x2{GF127{1, 0}, GF127{0, 0}} + d.x[1] = gf127.GF127x2{GF127{0, 0}, GF127{1, 0}} } func (d *digest3) Size() int { return Size } func (d *digest3) BlockSize() int { return hashBlockSize } @@ -52,4 +52,4 @@ func (d *digest3) checkSum() (b [Size]byte) { return } -func mulByteSliceRightx2(c00c10 *avx2.GF127x2, c01c11 *avx2.GF127x2, n int, data *byte) +func mulByteSliceRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, n int, data *byte) diff --git a/tz/sl2.go b/tz/sl2.go index 4676eb1..14f963b 100644 --- a/tz/sl2.go +++ b/tz/sl2.go @@ -4,7 +4,6 @@ import ( "errors" "github.com/nspcc-dev/tzhash/gf127" - "github.com/nspcc-dev/tzhash/gf127/avx" ) type ( @@ -18,16 +17,6 @@ var id = sl2{ {GF127{0, 0}, GF127{1, 0}}, } -var mul func(a, b, c *sl2, x *[4]GF127) - -func init() { - if hasAVX { - mul = mulSL2AVX - } else { - mul = mulSL2Pure - } -} - func (c *sl2) MarshalBinary() (data []byte, err error) { s := c.ByteArray() return s[:], nil @@ -56,86 +45,54 @@ func (c *sl2) UnmarshalBinary(data []byte) (err error) { func (c *sl2) mulStrassen(a, b *sl2, x *[8]GF127) *sl2 { //nolint:unused // strassen algorithm - avx.Add(&a[0][0], &a[1][1], &x[0]) - avx.Add(&b[0][0], &b[1][1], &x[1]) - avx.Mul(&x[0], &x[1], &x[0]) + gf127.Add(&a[0][0], &a[1][1], &x[0]) + gf127.Add(&b[0][0], &b[1][1], &x[1]) + gf127.Mul(&x[0], &x[1], &x[0]) - avx.Add(&a[1][0], &a[1][1], &x[1]) - avx.Mul(&x[1], &b[0][0], &x[1]) + gf127.Add(&a[1][0], &a[1][1], &x[1]) + gf127.Mul(&x[1], &b[0][0], &x[1]) - avx.Add(&b[0][1], &b[1][1], &x[2]) - avx.Mul(&x[2], &a[0][0], &x[2]) + gf127.Add(&b[0][1], &b[1][1], &x[2]) + gf127.Mul(&x[2], &a[0][0], &x[2]) - avx.Add(&b[1][0], &b[0][0], &x[3]) - avx.Mul(&x[3], &a[1][1], &x[3]) + gf127.Add(&b[1][0], &b[0][0], &x[3]) + gf127.Mul(&x[3], &a[1][1], &x[3]) - avx.Add(&a[0][0], &a[0][1], &x[4]) - avx.Mul(&x[4], &b[1][1], &x[4]) + gf127.Add(&a[0][0], &a[0][1], &x[4]) + gf127.Mul(&x[4], &b[1][1], &x[4]) - avx.Add(&a[1][0], &a[0][0], &x[5]) - avx.Add(&b[0][0], &b[0][1], &x[6]) - avx.Mul(&x[5], &x[6], &x[5]) + gf127.Add(&a[1][0], &a[0][0], &x[5]) + gf127.Add(&b[0][0], &b[0][1], &x[6]) + gf127.Mul(&x[5], &x[6], &x[5]) - avx.Add(&a[0][1], &a[1][1], &x[6]) - avx.Add(&b[1][0], &b[1][1], &x[7]) - avx.Mul(&x[6], &x[7], &x[6]) + gf127.Add(&a[0][1], &a[1][1], &x[6]) + gf127.Add(&b[1][0], &b[1][1], &x[7]) + gf127.Mul(&x[6], &x[7], &x[6]) - avx.Add(&x[2], &x[4], &c[0][1]) - avx.Add(&x[1], &x[3], &c[1][0]) + gf127.Add(&x[2], &x[4], &c[0][1]) + gf127.Add(&x[1], &x[3], &c[1][0]) - avx.Add(&x[4], &x[6], &x[4]) - avx.Add(&x[0], &x[3], &c[0][0]) - avx.Add(&c[0][0], &x[4], &c[0][0]) + gf127.Add(&x[4], &x[6], &x[4]) + gf127.Add(&x[0], &x[3], &c[0][0]) + gf127.Add(&c[0][0], &x[4], &c[0][0]) - avx.Add(&x[0], &x[1], &x[0]) - avx.Add(&x[2], &x[5], &c[1][1]) - avx.Add(&c[1][1], &x[0], &c[1][1]) + gf127.Add(&x[0], &x[1], &x[0]) + gf127.Add(&x[2], &x[5], &c[1][1]) + gf127.Add(&c[1][1], &x[0], &c[1][1]) return c } -func mulSL2AVX(a, b, c *sl2, x *[4]GF127) { - avx.Mul(&a[0][0], &b[0][0], &x[0]) - avx.Mul(&a[0][0], &b[0][1], &x[1]) - avx.Mul(&a[1][0], &b[0][0], &x[2]) - avx.Mul(&a[1][0], &b[0][1], &x[3]) - - avx.Mul(&a[0][1], &b[1][0], &c[0][0]) - avx.Add(&c[0][0], &x[0], &c[0][0]) - avx.Mul(&a[0][1], &b[1][1], &c[0][1]) - avx.Add(&c[0][1], &x[1], &c[0][1]) - avx.Mul(&a[1][1], &b[1][0], &c[1][0]) - avx.Add(&c[1][0], &x[2], &c[1][0]) - avx.Mul(&a[1][1], &b[1][1], &c[1][1]) - avx.Add(&c[1][1], &x[3], &c[1][1]) -} - -func mulSL2Pure(a, b, c *sl2, x *[4]GF127) { - gf127.Mul((*GF127)(&a[0][0]), (*GF127)(&b[0][0]), (*GF127)(&x[0])) - gf127.Mul((*GF127)(&a[0][0]), (*GF127)(&b[0][1]), (*GF127)(&x[1])) - gf127.Mul((*GF127)(&a[1][0]), (*GF127)(&b[0][0]), (*GF127)(&x[2])) - gf127.Mul((*GF127)(&a[1][0]), (*GF127)(&b[0][1]), (*GF127)(&x[3])) - - gf127.Mul((*GF127)(&a[0][1]), (*GF127)(&b[1][0]), (*GF127)(&c[0][0])) - gf127.Add((*GF127)(&c[0][0]), (*GF127)(&x[0]), (*GF127)(&c[0][0])) - gf127.Mul((*GF127)(&a[0][1]), (*GF127)(&b[1][1]), (*GF127)(&c[0][1])) - gf127.Add((*GF127)(&c[0][1]), (*GF127)(&x[1]), (*GF127)(&c[0][1])) - gf127.Mul((*GF127)(&a[1][1]), (*GF127)(&b[1][0]), (*GF127)(&c[1][0])) - gf127.Add((*GF127)(&c[1][0]), (*GF127)(&x[2]), (*GF127)(&c[1][0])) - gf127.Mul((*GF127)(&a[1][1]), (*GF127)(&b[1][1]), (*GF127)(&c[1][1])) - gf127.Add((*GF127)(&c[1][1]), (*GF127)(&x[3]), (*GF127)(&c[1][1])) -} - func (c *sl2) MulA() *sl2 { var a GF127 - avx.Mul10(&c[0][0], &a) + gf127.Mul10(&c[0][0], &a) gf127.Mul1(&c[0][0], &c[0][1]) - avx.Add(&a, &c[0][1], &c[0][0]) + gf127.Add(&a, &c[0][1], &c[0][0]) - avx.Mul10(&c[1][0], &a) + gf127.Mul10(&c[1][0], &a) gf127.Mul1(&c[1][0], &c[1][1]) - avx.Add(&a, &c[1][1], &c[1][0]) + gf127.Add(&a, &c[1][1], &c[1][0]) return c } @@ -144,20 +101,34 @@ func (c *sl2) MulB() *sl2 { var a GF127 gf127.Mul1(&c[0][0], &a) - avx.Mul10(&c[0][0], &c[0][0]) - avx.Add(&c[0][1], &c[0][0], &c[0][0]) - avx.Add(&c[0][0], &a, &c[0][1]) + gf127.Mul10(&c[0][0], &c[0][0]) + gf127.Add(&c[0][1], &c[0][0], &c[0][0]) + gf127.Add(&c[0][0], &a, &c[0][1]) gf127.Mul1(&c[1][0], &a) - avx.Mul10(&c[1][0], &c[1][0]) - avx.Add(&c[1][1], &c[1][0], &c[1][0]) - avx.Add(&c[1][0], &a, &c[1][1]) + gf127.Mul10(&c[1][0], &c[1][0]) + gf127.Add(&c[1][1], &c[1][0], &c[1][0]) + gf127.Add(&c[1][0], &a, &c[1][1]) return c } func (c *sl2) Mul(a, b *sl2) *sl2 { - mul(a, b, c, new([4]GF127)) + var x [4]GF127 + + gf127.Mul(&a[0][0], &b[0][0], &x[0]) + gf127.Mul(&a[0][0], &b[0][1], &x[1]) + gf127.Mul(&a[1][0], &b[0][0], &x[2]) + gf127.Mul(&a[1][0], &b[0][1], &x[3]) + + gf127.Mul(&a[0][1], &b[1][0], &c[0][0]) + gf127.Add(&c[0][0], &x[0], &c[0][0]) + gf127.Mul(&a[0][1], &b[1][1], &c[0][1]) + gf127.Add(&c[0][1], &x[1], &c[0][1]) + gf127.Mul(&a[1][1], &b[1][0], &c[1][0]) + gf127.Add(&c[1][0], &x[2], &c[1][0]) + gf127.Mul(&a[1][1], &b[1][1], &c[1][1]) + gf127.Add(&c[1][1], &x[3], &c[1][1]) return c } @@ -169,15 +140,15 @@ func Inv(a *sl2) (b *sl2) { } func inv(a, b *sl2, t *[2]GF127) { - avx.Mul(&a[0][0], &a[1][1], &t[0]) - avx.Mul(&a[0][1], &a[1][0], &t[1]) - avx.Add(&t[0], &t[1], &t[0]) + gf127.Mul(&a[0][0], &a[1][1], &t[0]) + gf127.Mul(&a[0][1], &a[1][0], &t[1]) + gf127.Add(&t[0], &t[1], &t[0]) gf127.Inv(&t[0], &t[1]) - avx.Mul(&t[1], &a[0][0], &b[1][1]) - avx.Mul(&t[1], &a[0][1], &b[0][1]) - avx.Mul(&t[1], &a[1][0], &b[1][0]) - avx.Mul(&t[1], &a[1][1], &b[0][0]) + gf127.Mul(&t[1], &a[0][0], &b[1][1]) + gf127.Mul(&t[1], &a[0][1], &b[0][1]) + gf127.Mul(&t[1], &a[1][0], &b[1][0]) + gf127.Mul(&t[1], &a[1][1], &b[0][0]) } func (c *sl2) String() string { diff --git a/tz/sl2_test.go b/tz/sl2_test.go index d43fec8..884d64c 100644 --- a/tz/sl2_test.go +++ b/tz/sl2_test.go @@ -6,7 +6,6 @@ import ( "time" "github.com/nspcc-dev/tzhash/gf127" - "github.com/nspcc-dev/tzhash/gf127/avx" "github.com/stretchr/testify/require" ) @@ -22,12 +21,12 @@ func random() (a *sl2) { // so that result is in SL2 // d = a^-1*(1+b*c) - avx.Mul(&a[0][1], &a[1][0], &a[1][1]) - avx.Add(&a[1][1], gf127.New(1, 0), &a[1][1]) + gf127.Mul(&a[0][1], &a[1][0], &a[1][1]) + gf127.Add(&a[1][1], gf127.New(1, 0), &a[1][1]) t := gf127.New(0, 0) gf127.Inv(&a[0][0], t) - avx.Mul(t, &a[1][1], &a[1][1]) + gf127.Mul(t, &a[1][1], &a[1][1]) return }