diff --git a/gogf127/gogf127.go b/gf127/arith.go similarity index 59% rename from gogf127/gogf127.go rename to gf127/arith.go index 10af5c4..7977440 100644 --- a/gogf127/gogf127.go +++ b/gf127/arith.go @@ -3,22 +3,10 @@ // Package gf127 implements the GF(2^127) arithmetic // modulo reduction polynomial x^127 + x^63 + 1 . // Implementation is in pure Go. -package gogf127 +package gf127 import ( - "encoding/binary" - "encoding/hex" - "errors" "math/bits" - "math/rand" -) - -// GF127 represents element of GF(2^127) -type GF127 [2]uint64 - -const ( - msb64 = uint64(0x8000000000000000) - byteSize = 16 ) var ( @@ -32,50 +20,6 @@ func New(lo, hi uint64) *GF127 { return &GF127{lo, hi} } -// Random returns random element from GF(2^127). -// Is used mostly for testing. -func Random() *GF127 { - return &GF127{rand.Uint64(), rand.Uint64() >> 1} -} - -// String returns hex-encoded representation, starting with MSB. -func (c *GF127) String() string { - return hex.EncodeToString(c.ByteArray()) -} - -// Equals checks if two reduced (zero MSB) elements of GF(2^127) are equal -func (c *GF127) Equals(b *GF127) bool { - return c[0] == b[0] && c[1] == b[1] -} - -// ByteArray represents element of GF(2^127) as byte array of length 16. -func (c *GF127) ByteArray() (buf []byte) { - buf = make([]byte, 16) - binary.BigEndian.PutUint64(buf[:8], c[1]) - binary.BigEndian.PutUint64(buf[8:], c[0]) - return -} - -// MarshalBinary implements encoding.BinaryMarshaler. -func (c *GF127) MarshalBinary() (data []byte, err error) { - return c.ByteArray(), nil -} - -// UnmarshalBinary implements encoding.BinaryUnmarshaler. -func (c *GF127) UnmarshalBinary(data []byte) error { - if len(data) != byteSize { - return errors.New("data must be 16-bytes long") - } - - c[0] = binary.BigEndian.Uint64(data[8:]) - c[1] = binary.BigEndian.Uint64(data[:8]) - if c[1]&msb64 != 0 { - return errors.New("MSB must be zero") - } - - return nil -} - // Inv sets b to a^-1 // Algorithm is based on Extended Euclidean Algorithm // and is described by Hankerson, Hernandez, Menezes in @@ -129,18 +73,6 @@ func msb(a *GF127) (x int) { return 127 - x } -// Mul sets c to the product a*b and returns c. -func (c *GF127) Mul(a, b *GF127) *GF127 { - Mul(a, b, c) - return c -} - -// Add sets c to the sum a+b and returns c. -func (c *GF127) Add(a, b *GF127) *GF127 { - Add(a, b, c) - return c -} - // Mul1 copies a to b. func Mul1(a, b *GF127) { b[0] = a[0] diff --git a/gogf127/gf127_test.go b/gf127/arith_test.go similarity index 98% rename from gogf127/gf127_test.go rename to gf127/arith_test.go index a6addb6..4cac7f7 100644 --- a/gogf127/gf127_test.go +++ b/gf127/arith_test.go @@ -1,4 +1,4 @@ -package gogf127 +package gf127 import ( "testing" @@ -6,8 +6,6 @@ import ( "github.com/stretchr/testify/require" ) -const maxUint64 = ^uint64(0) - func TestAdd(t *testing.T) { var ( a = Random() diff --git a/gf127/avx/gf127.go b/gf127/avx/gf127.go new file mode 100644 index 0000000..5283d36 --- /dev/null +++ b/gf127/avx/gf127.go @@ -0,0 +1,103 @@ +// Copyright 2018 (c) NSPCC +// +// Package gf127 implements the GF(2^127) arithmetic +// modulo reduction polynomial x^127 + x^63 + 1 . +// This is rather straight-forward re-implementation of C library +// available here https://github.com/srijs/hwsl2-core . +// Interfaces are highly influenced by math/big . +package avx + +import ( + "math/bits" + + "github.com/nspcc-dev/tzhash/gf127" +) + +type GF127 = gf127.GF127 + +const msb64 = uint64(1) << 63 + +var ( + // x127x63 represents x^127 + x^63. Used in assembly file. + x127x63 = GF127{msb64, msb64} + + // x126x631 is reduction polynomial x^127+x^63+1 + x127x631 = GF127{msb64 + 1, msb64} +) + +// Inv sets b to a^-1 +// Algorithm is based on Extended Euclidean Algorithm +// and is described by Hankerson, Hernandez, Menezes in +// https://link.springer.com/content/pdf/10.1007/3-540-44499-8_1.pdf +func Inv(a, b *GF127) { + var ( + v = x127x631 + u = *a + c, d = &GF127{1, 0}, &GF127{0, 0} + t = new(GF127) + x *GF127 + ) + + // degree of polynomial is a position of most significant bit + for du, dv := msb(&u), msb(&v); du != 0; du, dv = msb(&u), msb(&v) { + if du < dv { + v, u = u, v + dv, du = du, dv + d, c = c, d + } + + x = xN(du - dv) + + Mul(x, &v, t) + Add(&u, t, &u) + + // becasuse mul performs reduction on t, we need + // manually reduce u at first step + if msb(&u) == 127 { + Add(&u, &x127x631, &u) + } + + Mul(x, d, t) + Add(c, t, c) + } + *b = *c +} + +func xN(n int) *GF127 { + if n < 64 { + return &GF127{1 << uint(n), 0} + } + return &GF127{0, 1 << uint(n-64)} +} + +func msb(a *GF127) (x int) { + x = bits.LeadingZeros64(a[1]) + if x == 64 { + x = bits.LeadingZeros64(a[0]) + 64 + } + return 127 - x +} + +// Mul1 copies a to b. +func Mul1(a, b *GF127) { + b[0] = a[0] + b[1] = a[1] +} + +// And sets c to a & b (bitwise-and). +func And(a, b, c *GF127) { + c[0] = a[0] & b[0] + c[1] = a[1] & b[1] +} + +// Add sets c to a+b. +func Add(a, b, c *GF127) + +// Mul sets c to a*b. +func Mul(a, b, c *GF127) + +// Mul10 sets b to a*x. +func Mul10(a, b *GF127) + +// Mul11 sets b to a*(x+1). +func Mul11(a, b *GF127) diff --git a/gf127/gf127_amd64.s b/gf127/avx/gf127_amd64.s similarity index 75% rename from gf127/gf127_amd64.s rename to gf127/avx/gf127_amd64.s index ada6f24..d159814 100644 --- a/gf127/gf127_amd64.s +++ b/gf127/avx/gf127_amd64.s @@ -27,22 +27,6 @@ TEXT ·Mul10(SB),NOSPLIT,$0 MOVUPD X1, (AX) RET -// func Mul10x2(a, b) *[4]uint64 -TEXT ·Mul10x2(SB),NOSPLIT,$0 - MOVQ a+0(FP), AX - VMOVDQA (AX), Y0 - VPSLLQ $1, Y0, Y1 - VPALIGNR $8, Y1, Y0, Y2 - VPSRLQ $63, Y2, Y2 - VPXOR Y1, Y2, Y2 - VPSRLQ $63, Y1, Y3 - VPSLLQ $63, Y3, Y3 - VPUNPCKHQDQ Y3, Y3, Y3 - VPXOR Y2, Y3, Y3 - MOVQ b+8(FP), AX - VMOVDQA Y3, (AX) - RET - // func Mul11(a, b *[2]uint64) TEXT ·Mul11(SB),NOSPLIT,$0 MOVQ a+0(FP), AX @@ -60,23 +44,6 @@ TEXT ·Mul11(SB),NOSPLIT,$0 MOVUPD X1, (AX) RET -// func Mul11x2(a, b) *[4]uint64 -TEXT ·Mul11x2(SB),NOSPLIT,$0 - MOVQ a+0(FP), AX - VMOVDQA (AX), Y0 - VPSLLQ $1, Y0, Y1 - VPALIGNR $8, Y1, Y0, Y2 - VPSRLQ $63, Y2, Y2 - VPXOR Y1, Y2, Y2 - VPSRLQ $63, Y1, Y3 - VPSLLQ $63, Y3, Y3 - VPUNPCKHQDQ Y3, Y3, Y3 - VPXOR Y2, Y3, Y3 - VPXOR Y0, Y3, Y3 - MOVQ b+8(FP), AX - VMOVDQA Y3, (AX) - RET - // func Mul(a, b, c *[2]uint64) TEXT ·Mul(SB),NOSPLIT,$0 MOVQ a+0(FP), AX // X0 = a0 . a1 diff --git a/gf127/gf127_test.go b/gf127/avx/gf127_test.go similarity index 76% rename from gf127/gf127_test.go rename to gf127/avx/gf127_test.go index e24533d..b7f1ba9 100644 --- a/gf127/gf127_test.go +++ b/gf127/avx/gf127_test.go @@ -1,8 +1,9 @@ -package gf127 +package avx import ( "testing" + "github.com/nspcc-dev/tzhash/gf127" "github.com/stretchr/testify/require" ) @@ -10,12 +11,12 @@ const maxUint64 = ^uint64(0) func TestAdd(t *testing.T) { var ( - a = Random() - b = Random() + a = gf127.Random() + b = gf127.Random() e = &GF127{a[0] ^ b[0], a[1] ^ b[1]} c = new(GF127) ) - c.Add(a, b) + Add(a, b, c) require.Equal(t, e, c) } @@ -81,7 +82,7 @@ func TestInv(t *testing.T) { for i := 0; i < 3; i++ { // 0 has no inverse - if a = Random(); a.Equals(&GF127{0, 0}) { + if a = gf127.Random(); a.Equals(&GF127{0, 0}) { continue } Inv(a, b) @@ -89,22 +90,3 @@ func TestInv(t *testing.T) { require.Equal(t, &GF127{1, 0}, c) } } - -func TestGF127_MarshalBinary(t *testing.T) { - a := New(0xFF, 0xEE) - data, err := a.MarshalBinary() - require.NoError(t, err) - require.Equal(t, data, []byte{0, 0, 0, 0, 0, 0, 0, 0xEE, 0, 0, 0, 0, 0, 0, 0, 0xFF}) - - a = Random() - data, err = a.MarshalBinary() - require.NoError(t, err) - - b := new(GF127) - err = b.UnmarshalBinary(data) - require.NoError(t, err) - require.Equal(t, a, b) - - err = b.UnmarshalBinary([]byte{0, 1, 2, 3}) - require.Error(t, err) -} diff --git a/gf127/gf127x2.go b/gf127/avx2/gf127x2.go similarity index 85% rename from gf127/gf127x2.go rename to gf127/avx2/gf127x2.go index 22ab9a8..8d96836 100644 --- a/gf127/gf127x2.go +++ b/gf127/avx2/gf127x2.go @@ -1,20 +1,22 @@ -package gf127 +package avx2 import ( "encoding/binary" "encoding/hex" + + "github.com/nspcc-dev/tzhash/gf127" ) // GF127x2 represents a pair of elements of GF(2^127) stored together. -type GF127x2 [2]GF127 +type GF127x2 [2]gf127.GF127 // Split returns 2 components of pair without additional allocations. -func Split(a *GF127x2) (*GF127, *GF127) { +func Split(a *GF127x2) (*gf127.GF127, *gf127.GF127) { return &a[0], &a[1] } // CombineTo 2 elements of GF(2^127) to the respective components of pair. -func CombineTo(a *GF127, b *GF127, c *GF127x2) { +func CombineTo(a *gf127.GF127, b *gf127.GF127, c *GF127x2) { c[0] = *a c[1] = *b } diff --git a/gf127/avx2/gf127x2_amd64.s b/gf127/avx2/gf127x2_amd64.s new file mode 100644 index 0000000..05f3700 --- /dev/null +++ b/gf127/avx2/gf127x2_amd64.s @@ -0,0 +1,35 @@ +#include "textflag.h" + +// func Mul10x2(a, b) *[4]uint64 +TEXT ·Mul10x2(SB),NOSPLIT,$0 + MOVQ a+0(FP), AX + VMOVDQA (AX), Y0 + VPSLLQ $1, Y0, Y1 + VPALIGNR $8, Y1, Y0, Y2 + VPSRLQ $63, Y2, Y2 + VPXOR Y1, Y2, Y2 + VPSRLQ $63, Y1, Y3 + VPSLLQ $63, Y3, Y3 + VPUNPCKHQDQ Y3, Y3, Y3 + VPXOR Y2, Y3, Y3 + MOVQ b+8(FP), AX + VMOVDQA Y3, (AX) + RET + + +// func Mul11x2(a, b) *[4]uint64 +TEXT ·Mul11x2(SB),NOSPLIT,$0 + MOVQ a+0(FP), AX + VMOVDQA (AX), Y0 + VPSLLQ $1, Y0, Y1 + VPALIGNR $8, Y1, Y0, Y2 + VPSRLQ $63, Y2, Y2 + VPXOR Y1, Y2, Y2 + VPSRLQ $63, Y1, Y3 + VPSLLQ $63, Y3, Y3 + VPUNPCKHQDQ Y3, Y3, Y3 + VPXOR Y2, Y3, Y3 + VPXOR Y0, Y3, Y3 + MOVQ b+8(FP), AX + VMOVDQA Y3, (AX) + RET diff --git a/gf127/avx2/gf127x2_test.go b/gf127/avx2/gf127x2_test.go new file mode 100644 index 0000000..6df2b9e --- /dev/null +++ b/gf127/avx2/gf127x2_test.go @@ -0,0 +1,81 @@ +package avx2 + +import ( + "testing" + + "github.com/nspcc-dev/tzhash/gf127" + "github.com/stretchr/testify/require" +) + +const maxUint64 = ^uint64(0) + +var testCasesSplit = []struct { + num *GF127x2 + h1 *gf127.GF127 + h2 *gf127.GF127 +}{ + {&GF127x2{gf127.GF127{123, 31}, gf127.GF127{141, 9}}, &gf127.GF127{123, 31}, &gf127.GF127{141, 9}}, + {&GF127x2{gf127.GF127{maxUint64, 0}, gf127.GF127{0, maxUint64}}, &gf127.GF127{maxUint64, 0}, &gf127.GF127{0, maxUint64}}, +} + +func TestSplit(t *testing.T) { + for _, tc := range testCasesSplit { + a, b := Split(tc.num) + require.Equal(t, tc.h1, a) + require.Equal(t, tc.h2, b) + } +} + +func TestCombineTo(t *testing.T) { + c := new(GF127x2) + for _, tc := range testCasesSplit { + CombineTo(tc.h1, tc.h2, c) + require.Equal(t, tc.num, c) + } +} + +var testCasesMul10x2 = [][2]*GF127x2{ + { + &GF127x2{gf127.GF127{123, 0}, gf127.GF127{123, 0}}, + &GF127x2{gf127.GF127{246, 0}, gf127.GF127{246, 0}}, + }, + { + &GF127x2{gf127.GF127{maxUint64, 2}, gf127.GF127{0, 1}}, + &GF127x2{gf127.GF127{maxUint64 - 1, 5}, gf127.GF127{0, 2}}, + }, + { + &GF127x2{gf127.GF127{0, maxUint64 >> 1}, gf127.GF127{maxUint64, 2}}, + &GF127x2{gf127.GF127{1 + 1<<63, maxUint64>>1 - 1}, gf127.GF127{maxUint64 - 1, 5}}, + }, +} + +func TestMul10x2(t *testing.T) { + c := new(GF127x2) + for _, tc := range testCasesMul10x2 { + Mul10x2(tc[0], c) + require.Equal(t, tc[1], c) + } +} + +var testCasesMul11x2 = [][2]*GF127x2{ + { + &GF127x2{gf127.GF127{123, 0}, gf127.GF127{123, 0}}, + &GF127x2{gf127.GF127{141, 0}, gf127.GF127{141, 0}}, + }, + { + &GF127x2{gf127.GF127{maxUint64, 2}, gf127.GF127{0, 1}}, + &GF127x2{gf127.GF127{1, 7}, gf127.GF127{0, 3}}, + }, + { + &GF127x2{gf127.GF127{0, maxUint64 >> 1}, gf127.GF127{maxUint64, 2}}, + &GF127x2{gf127.GF127{1 + 1<<63, 1}, gf127.GF127{1, 7}}, + }, +} + +func TestMul11x2(t *testing.T) { + c := new(GF127x2) + for _, tc := range testCasesMul11x2 { + Mul11x2(tc[0], c) + require.Equal(t, tc[1], c) + } +} diff --git a/gf127/gf127.go b/gf127/gf127.go index 40e7965..de3c276 100644 --- a/gf127/gf127.go +++ b/gf127/gf127.go @@ -1,42 +1,21 @@ -// Copyright 2018 (c) NSPCC -// -// Package gf127 implements the GF(2^127) arithmetic -// modulo reduction polynomial x^127 + x^63 + 1 . -// This is rather straight-forward re-implementation of C library -// available here https://github.com/srijs/hwsl2-core . -// Interfaces are highly influenced by math/big . package gf127 import ( "encoding/binary" "encoding/hex" "errors" - "math/bits" "math/rand" ) +const ( + byteSize = 16 + maxUint64 = ^uint64(0) + msb64 = uint64(1) << 63 +) + // GF127 represents element of GF(2^127) type GF127 [2]uint64 -const ( - msb64 = uint64(0x8000000000000000) - byteSize = 16 -) - -var ( - // x127x63 represents x^127 + x^63. Used in assembly file. - x127x63 = GF127{msb64, msb64} - - // x126x631 is reduction polynomial x^127+x^63+1 - x127x631 = GF127{msb64 + 1, msb64} -) - -// New constructs new element of GF(2^127) as hi*x^64 + lo. -// It is assumed that hi has zero MSB. -func New(lo, hi uint64) *GF127 { - return &GF127{lo, hi} -} - // Random returns random element from GF(2^127). // Is used mostly for testing. func Random() *GF127 { @@ -80,92 +59,3 @@ func (c *GF127) UnmarshalBinary(data []byte) error { return nil } - -// Inv sets b to a^-1 -// Algorithm is based on Extended Euclidean Algorithm -// and is described by Hankerson, Hernandez, Menezes in -// https://link.springer.com/content/pdf/10.1007/3-540-44499-8_1.pdf -func Inv(a, b *GF127) { - var ( - v = x127x631 - u = *a - c, d = &GF127{1, 0}, &GF127{0, 0} - t = new(GF127) - x *GF127 - ) - - // degree of polynomial is a position of most significant bit - for du, dv := msb(&u), msb(&v); du != 0; du, dv = msb(&u), msb(&v) { - if du < dv { - v, u = u, v - dv, du = du, dv - d, c = c, d - } - - x = xN(du - dv) - - Mul(x, &v, t) - Add(&u, t, &u) - - // becasuse mul performs reduction on t, we need - // manually reduce u at first step - if msb(&u) == 127 { - Add(&u, &x127x631, &u) - } - - Mul(x, d, t) - Add(c, t, c) - } - *b = *c -} - -func xN(n int) *GF127 { - if n < 64 { - return &GF127{1 << uint(n), 0} - } - return &GF127{0, 1 << uint(n-64)} -} - -func msb(a *GF127) (x int) { - x = bits.LeadingZeros64(a[1]) - if x == 64 { - x = bits.LeadingZeros64(a[0]) + 64 - } - return 127 - x -} - -// Mul sets c to the product a*b and returns c. -func (c *GF127) Mul(a, b *GF127) *GF127 { - Mul(a, b, c) - return c -} - -// Add sets c to the sum a+b and returns c. -func (c *GF127) Add(a, b *GF127) *GF127 { - Add(a, b, c) - return c -} - -// Mul1 copies a to b. -func Mul1(a, b *GF127) { - b[0] = a[0] - b[1] = a[1] -} - -// And sets c to a & b (bitwise-and). -func And(a, b, c *GF127) { - c[0] = a[0] & b[0] - c[1] = a[1] & b[1] -} - -// Add sets c to a+b. -func Add(a, b, c *GF127) - -// Mul sets c to a*b. -func Mul(a, b, c *GF127) - -// Mul10 sets b to a*x. -func Mul10(a, b *GF127) - -// Mul11 sets b to a*(x+1). -func Mul11(a, b *GF127) diff --git a/gf127/gf127x2_test.go b/gf127/gf127x2_test.go deleted file mode 100644 index 73a4a68..0000000 --- a/gf127/gf127x2_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package gf127 - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -var testCasesSplit = []struct { - num *GF127x2 - h1 *GF127 - h2 *GF127 -}{ - {&GF127x2{GF127{123, 31}, GF127{141, 9}}, &GF127{123, 31}, &GF127{141, 9}}, - {&GF127x2{GF127{maxUint64, 0}, GF127{0, maxUint64}}, &GF127{maxUint64, 0}, &GF127{0, maxUint64}}, -} - -func TestSplit(t *testing.T) { - for _, tc := range testCasesSplit { - a, b := Split(tc.num) - require.Equal(t, tc.h1, a) - require.Equal(t, tc.h2, b) - } -} - -func TestCombineTo(t *testing.T) { - c := new(GF127x2) - for _, tc := range testCasesSplit { - CombineTo(tc.h1, tc.h2, c) - require.Equal(t, tc.num, c) - } -} - -var testCasesMul10x2 = [][2]*GF127x2{ - { - &GF127x2{GF127{123, 0}, GF127{123, 0}}, - &GF127x2{GF127{246, 0}, GF127{246, 0}}, - }, - { - &GF127x2{GF127{maxUint64, 2}, GF127{0, 1}}, - &GF127x2{GF127{maxUint64 - 1, 5}, GF127{0, 2}}, - }, - { - &GF127x2{GF127{0, maxUint64 >> 1}, GF127{maxUint64, 2}}, - &GF127x2{GF127{1 + 1<<63, maxUint64>>1 - 1}, GF127{maxUint64 - 1, 5}}, - }, -} - -func TestMul10x2(t *testing.T) { - c := new(GF127x2) - for _, tc := range testCasesMul10x2 { - Mul10x2(tc[0], c) - require.Equal(t, tc[1], c) - } -} - -var testCasesMul11x2 = [][2]*GF127x2{ - { - &GF127x2{GF127{123, 0}, GF127{123, 0}}, - &GF127x2{GF127{141, 0}, GF127{141, 0}}, - }, - { - &GF127x2{GF127{maxUint64, 2}, GF127{0, 1}}, - &GF127x2{GF127{1, 7}, GF127{0, 3}}, - }, - { - &GF127x2{GF127{0, maxUint64 >> 1}, GF127{maxUint64, 2}}, - &GF127x2{GF127{1 + 1<<63, 1}, GF127{1, 7}}, - }, -} - -func TestMul11x2(t *testing.T) { - c := new(GF127x2) - for _, tc := range testCasesMul11x2 { - Mul11x2(tc[0], c) - require.Equal(t, tc[1], c) - } -} diff --git a/tz/avx2.go b/tz/avx2.go index 00ec3b3..bff7de2 100644 --- a/tz/avx2.go +++ b/tz/avx2.go @@ -7,10 +7,11 @@ import ( "hash" "github.com/nspcc-dev/tzhash/gf127" + "github.com/nspcc-dev/tzhash/gf127/avx2" ) type digest2 struct { - x [2]gf127.GF127x2 + x [2]avx2.GF127x2 } // type assertion @@ -44,8 +45,8 @@ func (d *digest2) Sum(in []byte) []byte { return append(in, h[:]...) } func (d *digest2) Reset() { - d.x[0] = gf127.GF127x2{gf127.GF127{1, 0}, gf127.GF127{0, 0}} - d.x[1] = gf127.GF127x2{gf127.GF127{0, 0}, gf127.GF127{1, 0}} + d.x[0] = avx2.GF127x2{gf127.GF127{1, 0}, gf127.GF127{0, 0}} + d.x[1] = avx2.GF127x2{gf127.GF127{0, 0}, gf127.GF127{1, 0}} } func (d *digest2) Size() int { return hashSize } func (d *digest2) BlockSize() int { return hashBlockSize } @@ -62,4 +63,4 @@ func (d *digest2) checkSum() (b [hashSize]byte) { return } -func mulBitRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, e *gf127.GF127) +func mulBitRightx2(c00c10 *avx2.GF127x2, c01c11 *avx2.GF127x2, e *gf127.GF127) diff --git a/tz/avx2_inline.go b/tz/avx2_inline.go index 286e1b7..f2f00dc 100644 --- a/tz/avx2_inline.go +++ b/tz/avx2_inline.go @@ -8,10 +8,11 @@ import ( "hash" "github.com/nspcc-dev/tzhash/gf127" + "github.com/nspcc-dev/tzhash/gf127/avx2" ) type digest3 struct { - x [2]gf127.GF127x2 + x [2]avx2.GF127x2 } // type assertion @@ -38,8 +39,8 @@ func (d *digest3) Sum(in []byte) []byte { return append(in, h[:]...) } func (d *digest3) Reset() { - d.x[0] = gf127.GF127x2{gf127.GF127{1, 0}, gf127.GF127{0, 0}} - d.x[1] = gf127.GF127x2{gf127.GF127{0, 0}, gf127.GF127{1, 0}} + d.x[0] = avx2.GF127x2{gf127.GF127{1, 0}, gf127.GF127{0, 0}} + d.x[1] = avx2.GF127x2{gf127.GF127{0, 0}, gf127.GF127{1, 0}} } func (d *digest3) Size() int { return hashSize } func (d *digest3) BlockSize() int { return hashBlockSize } @@ -56,4 +57,4 @@ func (d *digest3) checkSum() (b [hashSize]byte) { return } -func mulByteRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, b byte) +func mulByteRightx2(c00c10 *avx2.GF127x2, c01c11 *avx2.GF127x2, b byte) diff --git a/tz/pure.go b/tz/pure.go index d1a5961..36b1e8b 100644 --- a/tz/pure.go +++ b/tz/pure.go @@ -1,11 +1,11 @@ package tz import ( - "github.com/nspcc-dev/tzhash/gogf127" + "github.com/nspcc-dev/tzhash/gf127" ) type digestp struct { - x [4]gogf127.GF127 + x [4]gf127.GF127 } // New returns a new hash.Hash computing the Tillich-Zémor checksum. @@ -35,15 +35,15 @@ func (d *digestp) byteArray() (b [hashSize]byte) { } func (d *digestp) Reset() { - d.x[0] = gogf127.GF127{1, 0} - d.x[1] = gogf127.GF127{0, 0} - d.x[2] = gogf127.GF127{0, 0} - d.x[3] = gogf127.GF127{1, 0} + d.x[0] = gf127.GF127{1, 0} + d.x[1] = gf127.GF127{0, 0} + d.x[2] = gf127.GF127{0, 0} + d.x[3] = gf127.GF127{1, 0} } func (d *digestp) Write(data []byte) (n int, err error) { n = len(data) - tmp := new(gogf127.GF127) + tmp := new(gf127.GF127) for _, b := range data { mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x80 != 0, tmp) mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x40 != 0, tmp) @@ -65,28 +65,28 @@ func (d *digestp) BlockSize() int { return hashBlockSize } -func mulBitRightPure(c00, c01, c10, c11 *gogf127.GF127, bit bool, tmp *gogf127.GF127) { +func mulBitRightPure(c00, c01, c10, c11 *gf127.GF127, bit bool, tmp *gf127.GF127) { if bit { *tmp = *c00 - gogf127.Mul10(c00, c00) - gogf127.Add(c00, c01, c00) - gogf127.Mul11(tmp, tmp) - gogf127.Add(c01, tmp, c01) + gf127.Mul10(c00, c00) + gf127.Add(c00, c01, c00) + gf127.Mul11(tmp, tmp) + gf127.Add(c01, tmp, c01) *tmp = *c10 - gogf127.Mul10(c10, c10) - gogf127.Add(c10, c11, c10) - gogf127.Mul11(tmp, tmp) - gogf127.Add(c11, tmp, c11) + gf127.Mul10(c10, c10) + gf127.Add(c10, c11, c10) + gf127.Mul11(tmp, tmp) + gf127.Add(c11, tmp, c11) } else { *tmp = *c00 - gogf127.Mul10(c00, c00) - gogf127.Add(c00, c01, c00) + gf127.Mul10(c00, c00) + gf127.Add(c00, c01, c00) *c01 = *tmp *tmp = *c10 - gogf127.Mul10(c10, c10) - gogf127.Add(c10, c11, c10) + gf127.Mul10(c10, c10) + gf127.Add(c10, c11, c10) *c11 = *tmp } } diff --git a/tz/sl2.go b/tz/sl2.go index 30c0264..ba47b52 100644 --- a/tz/sl2.go +++ b/tz/sl2.go @@ -4,7 +4,7 @@ import ( "errors" "github.com/nspcc-dev/tzhash/gf127" - "github.com/nspcc-dev/tzhash/gogf127" + "github.com/nspcc-dev/tzhash/gf127/avx" ) type sl2 [2][2]gf127.GF127 @@ -52,86 +52,86 @@ func (c *sl2) UnmarshalBinary(data []byte) (err error) { func (c *sl2) mulStrassen(a, b *sl2, x *[8]gf127.GF127) *sl2 { // strassen algorithm - gf127.Add(&a[0][0], &a[1][1], &x[0]) - gf127.Add(&b[0][0], &b[1][1], &x[1]) - gf127.Mul(&x[0], &x[1], &x[0]) + avx.Add(&a[0][0], &a[1][1], &x[0]) + avx.Add(&b[0][0], &b[1][1], &x[1]) + avx.Mul(&x[0], &x[1], &x[0]) - gf127.Add(&a[1][0], &a[1][1], &x[1]) - gf127.Mul(&x[1], &b[0][0], &x[1]) + avx.Add(&a[1][0], &a[1][1], &x[1]) + avx.Mul(&x[1], &b[0][0], &x[1]) - gf127.Add(&b[0][1], &b[1][1], &x[2]) - gf127.Mul(&x[2], &a[0][0], &x[2]) + avx.Add(&b[0][1], &b[1][1], &x[2]) + avx.Mul(&x[2], &a[0][0], &x[2]) - gf127.Add(&b[1][0], &b[0][0], &x[3]) - gf127.Mul(&x[3], &a[1][1], &x[3]) + avx.Add(&b[1][0], &b[0][0], &x[3]) + avx.Mul(&x[3], &a[1][1], &x[3]) - gf127.Add(&a[0][0], &a[0][1], &x[4]) - gf127.Mul(&x[4], &b[1][1], &x[4]) + avx.Add(&a[0][0], &a[0][1], &x[4]) + avx.Mul(&x[4], &b[1][1], &x[4]) - gf127.Add(&a[1][0], &a[0][0], &x[5]) - gf127.Add(&b[0][0], &b[0][1], &x[6]) - gf127.Mul(&x[5], &x[6], &x[5]) + avx.Add(&a[1][0], &a[0][0], &x[5]) + avx.Add(&b[0][0], &b[0][1], &x[6]) + avx.Mul(&x[5], &x[6], &x[5]) - gf127.Add(&a[0][1], &a[1][1], &x[6]) - gf127.Add(&b[1][0], &b[1][1], &x[7]) - gf127.Mul(&x[6], &x[7], &x[6]) + avx.Add(&a[0][1], &a[1][1], &x[6]) + avx.Add(&b[1][0], &b[1][1], &x[7]) + avx.Mul(&x[6], &x[7], &x[6]) - gf127.Add(&x[2], &x[4], &c[0][1]) - gf127.Add(&x[1], &x[3], &c[1][0]) + avx.Add(&x[2], &x[4], &c[0][1]) + avx.Add(&x[1], &x[3], &c[1][0]) - gf127.Add(&x[4], &x[6], &x[4]) - gf127.Add(&x[0], &x[3], &c[0][0]) - gf127.Add(&c[0][0], &x[4], &c[0][0]) + avx.Add(&x[4], &x[6], &x[4]) + avx.Add(&x[0], &x[3], &c[0][0]) + avx.Add(&c[0][0], &x[4], &c[0][0]) - gf127.Add(&x[0], &x[1], &x[0]) - gf127.Add(&x[2], &x[5], &c[1][1]) - gf127.Add(&c[1][1], &x[0], &c[1][1]) + avx.Add(&x[0], &x[1], &x[0]) + avx.Add(&x[2], &x[5], &c[1][1]) + avx.Add(&c[1][1], &x[0], &c[1][1]) return c } func mulSL2AVX(a, b, c *sl2, x *[4]gf127.GF127) { - gf127.Mul(&a[0][0], &b[0][0], &x[0]) - gf127.Mul(&a[0][0], &b[0][1], &x[1]) - gf127.Mul(&a[1][0], &b[0][0], &x[2]) - gf127.Mul(&a[1][0], &b[0][1], &x[3]) + avx.Mul(&a[0][0], &b[0][0], &x[0]) + avx.Mul(&a[0][0], &b[0][1], &x[1]) + avx.Mul(&a[1][0], &b[0][0], &x[2]) + avx.Mul(&a[1][0], &b[0][1], &x[3]) - gf127.Mul(&a[0][1], &b[1][0], &c[0][0]) - gf127.Add(&c[0][0], &x[0], &c[0][0]) - gf127.Mul(&a[0][1], &b[1][1], &c[0][1]) - gf127.Add(&c[0][1], &x[1], &c[0][1]) - gf127.Mul(&a[1][1], &b[1][0], &c[1][0]) - gf127.Add(&c[1][0], &x[2], &c[1][0]) - gf127.Mul(&a[1][1], &b[1][1], &c[1][1]) - gf127.Add(&c[1][1], &x[3], &c[1][1]) + avx.Mul(&a[0][1], &b[1][0], &c[0][0]) + avx.Add(&c[0][0], &x[0], &c[0][0]) + avx.Mul(&a[0][1], &b[1][1], &c[0][1]) + avx.Add(&c[0][1], &x[1], &c[0][1]) + avx.Mul(&a[1][1], &b[1][0], &c[1][0]) + avx.Add(&c[1][0], &x[2], &c[1][0]) + avx.Mul(&a[1][1], &b[1][1], &c[1][1]) + avx.Add(&c[1][1], &x[3], &c[1][1]) } func mulSL2Pure(a, b, c *sl2, x *[4]gf127.GF127) { - gogf127.Mul((*gogf127.GF127)(&a[0][0]), (*gogf127.GF127)(&b[0][0]), (*gogf127.GF127)(&x[0])) - gogf127.Mul((*gogf127.GF127)(&a[0][0]), (*gogf127.GF127)(&b[0][1]), (*gogf127.GF127)(&x[1])) - gogf127.Mul((*gogf127.GF127)(&a[1][0]), (*gogf127.GF127)(&b[0][0]), (*gogf127.GF127)(&x[2])) - gogf127.Mul((*gogf127.GF127)(&a[1][0]), (*gogf127.GF127)(&b[0][1]), (*gogf127.GF127)(&x[3])) + gf127.Mul((*gf127.GF127)(&a[0][0]), (*gf127.GF127)(&b[0][0]), (*gf127.GF127)(&x[0])) + gf127.Mul((*gf127.GF127)(&a[0][0]), (*gf127.GF127)(&b[0][1]), (*gf127.GF127)(&x[1])) + gf127.Mul((*gf127.GF127)(&a[1][0]), (*gf127.GF127)(&b[0][0]), (*gf127.GF127)(&x[2])) + gf127.Mul((*gf127.GF127)(&a[1][0]), (*gf127.GF127)(&b[0][1]), (*gf127.GF127)(&x[3])) - gogf127.Mul((*gogf127.GF127)(&a[0][1]), (*gogf127.GF127)(&b[1][0]), (*gogf127.GF127)(&c[0][0])) - gogf127.Add((*gogf127.GF127)(&c[0][0]), (*gogf127.GF127)(&x[0]), (*gogf127.GF127)(&c[0][0])) - gogf127.Mul((*gogf127.GF127)(&a[0][1]), (*gogf127.GF127)(&b[1][1]), (*gogf127.GF127)(&c[0][1])) - gogf127.Add((*gogf127.GF127)(&c[0][1]), (*gogf127.GF127)(&x[1]), (*gogf127.GF127)(&c[0][1])) - gogf127.Mul((*gogf127.GF127)(&a[1][1]), (*gogf127.GF127)(&b[1][0]), (*gogf127.GF127)(&c[1][0])) - gogf127.Add((*gogf127.GF127)(&c[1][0]), (*gogf127.GF127)(&x[2]), (*gogf127.GF127)(&c[1][0])) - gogf127.Mul((*gogf127.GF127)(&a[1][1]), (*gogf127.GF127)(&b[1][1]), (*gogf127.GF127)(&c[1][1])) - gogf127.Add((*gogf127.GF127)(&c[1][1]), (*gogf127.GF127)(&x[3]), (*gogf127.GF127)(&c[1][1])) + gf127.Mul((*gf127.GF127)(&a[0][1]), (*gf127.GF127)(&b[1][0]), (*gf127.GF127)(&c[0][0])) + gf127.Add((*gf127.GF127)(&c[0][0]), (*gf127.GF127)(&x[0]), (*gf127.GF127)(&c[0][0])) + gf127.Mul((*gf127.GF127)(&a[0][1]), (*gf127.GF127)(&b[1][1]), (*gf127.GF127)(&c[0][1])) + gf127.Add((*gf127.GF127)(&c[0][1]), (*gf127.GF127)(&x[1]), (*gf127.GF127)(&c[0][1])) + gf127.Mul((*gf127.GF127)(&a[1][1]), (*gf127.GF127)(&b[1][0]), (*gf127.GF127)(&c[1][0])) + gf127.Add((*gf127.GF127)(&c[1][0]), (*gf127.GF127)(&x[2]), (*gf127.GF127)(&c[1][0])) + gf127.Mul((*gf127.GF127)(&a[1][1]), (*gf127.GF127)(&b[1][1]), (*gf127.GF127)(&c[1][1])) + gf127.Add((*gf127.GF127)(&c[1][1]), (*gf127.GF127)(&x[3]), (*gf127.GF127)(&c[1][1])) } func (c *sl2) MulA() *sl2 { var a gf127.GF127 - gf127.Mul10(&c[0][0], &a) - gf127.Mul1(&c[0][0], &c[0][1]) - gf127.Add(&a, &c[0][1], &c[0][0]) + avx.Mul10(&c[0][0], &a) + avx.Mul1(&c[0][0], &c[0][1]) + avx.Add(&a, &c[0][1], &c[0][0]) - gf127.Mul10(&c[1][0], &a) - gf127.Mul1(&c[1][0], &c[1][1]) - gf127.Add(&a, &c[1][1], &c[1][0]) + avx.Mul10(&c[1][0], &a) + avx.Mul1(&c[1][0], &c[1][1]) + avx.Add(&a, &c[1][1], &c[1][0]) return c } @@ -139,15 +139,15 @@ func (c *sl2) MulA() *sl2 { func (c *sl2) MulB() *sl2 { var a gf127.GF127 - gf127.Mul1(&c[0][0], &a) - gf127.Mul10(&c[0][0], &c[0][0]) - gf127.Add(&c[0][1], &c[0][0], &c[0][0]) - gf127.Add(&c[0][0], &a, &c[0][1]) + avx.Mul1(&c[0][0], &a) + avx.Mul10(&c[0][0], &c[0][0]) + avx.Add(&c[0][1], &c[0][0], &c[0][0]) + avx.Add(&c[0][0], &a, &c[0][1]) - gf127.Mul1(&c[1][0], &a) - gf127.Mul10(&c[1][0], &c[1][0]) - gf127.Add(&c[1][1], &c[1][0], &c[1][0]) - gf127.Add(&c[1][0], &a, &c[1][1]) + avx.Mul1(&c[1][0], &a) + avx.Mul10(&c[1][0], &c[1][0]) + avx.Add(&c[1][1], &c[1][0], &c[1][0]) + avx.Add(&c[1][0], &a, &c[1][1]) return c } @@ -165,15 +165,15 @@ func Inv(a *sl2) (b *sl2) { } func inv(a, b *sl2, t *[2]gf127.GF127) { - gf127.Mul(&a[0][0], &a[1][1], &t[0]) - gf127.Mul(&a[0][1], &a[1][0], &t[1]) - gf127.Add(&t[0], &t[1], &t[0]) - gf127.Inv(&t[0], &t[1]) + avx.Mul(&a[0][0], &a[1][1], &t[0]) + avx.Mul(&a[0][1], &a[1][0], &t[1]) + avx.Add(&t[0], &t[1], &t[0]) + avx.Inv(&t[0], &t[1]) - gf127.Mul(&t[1], &a[0][0], &b[1][1]) - gf127.Mul(&t[1], &a[0][1], &b[0][1]) - gf127.Mul(&t[1], &a[1][0], &b[1][0]) - gf127.Mul(&t[1], &a[1][1], &b[0][0]) + avx.Mul(&t[1], &a[0][0], &b[1][1]) + avx.Mul(&t[1], &a[0][1], &b[0][1]) + avx.Mul(&t[1], &a[1][0], &b[1][0]) + avx.Mul(&t[1], &a[1][1], &b[0][0]) } func (c *sl2) String() string { diff --git a/tz/sl2_test.go b/tz/sl2_test.go index 884d64c..e3a9190 100644 --- a/tz/sl2_test.go +++ b/tz/sl2_test.go @@ -6,6 +6,7 @@ import ( "time" "github.com/nspcc-dev/tzhash/gf127" + "github.com/nspcc-dev/tzhash/gf127/avx" "github.com/stretchr/testify/require" ) @@ -21,12 +22,12 @@ func random() (a *sl2) { // so that result is in SL2 // d = a^-1*(1+b*c) - gf127.Mul(&a[0][1], &a[1][0], &a[1][1]) - gf127.Add(&a[1][1], gf127.New(1, 0), &a[1][1]) + avx.Mul(&a[0][1], &a[1][0], &a[1][1]) + avx.Add(&a[1][1], gf127.New(1, 0), &a[1][1]) t := gf127.New(0, 0) - gf127.Inv(&a[0][0], t) - gf127.Mul(t, &a[1][1], &a[1][1]) + avx.Inv(&a[0][0], t) + avx.Mul(t, &a[1][1], &a[1][1]) return }