Add partial AVX2 support (multiplication by 10 and 11)

AVX2 permits working with 256-bit registers. Thus we can
multiply 2 GF(2^127) elements in parallel.
This commit adds 2 such functions for multiplication by 10 and 11).
This commit is contained in:
Evgenii 2019-06-21 17:40:48 +03:00
parent 6b644651fa
commit 4618e1484c
4 changed files with 147 additions and 2 deletions

View file

@ -164,8 +164,8 @@ func Add(a, b, c *GF127)
// Mul sets c to a*b. // Mul sets c to a*b.
func Mul(a, b, c *GF127) func Mul(a, b, c *GF127)
// Mul10 sets y to a*x. // Mul10 sets b to a*x.
func Mul10(a, b *GF127) func Mul10(a, b *GF127)
// Mul11 sets y to a*(x+1). // Mul11 sets b to a*(x+1).
func Mul11(a, b *GF127) func Mul11(a, b *GF127)

View file

@ -27,6 +27,22 @@ TEXT ·Mul10(SB),NOSPLIT,$0
MOVUPD X1, (AX) MOVUPD X1, (AX)
RET RET
// func Mul10x2(a, b) *[4]uint64
TEXT ·Mul10x2(SB),NOSPLIT,$0
MOVQ a+0(FP), AX
VMOVDQA (AX), Y0
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ b+8(FP), AX
VMOVDQA Y3, (AX)
RET
// func Mul11(a, b *[2]uint64) // func Mul11(a, b *[2]uint64)
TEXT ·Mul11(SB),NOSPLIT,$0 TEXT ·Mul11(SB),NOSPLIT,$0
MOVQ a+0(FP), AX MOVQ a+0(FP), AX
@ -44,6 +60,23 @@ TEXT ·Mul11(SB),NOSPLIT,$0
MOVUPD X1, (AX) MOVUPD X1, (AX)
RET RET
// func Mul11x2(a, b) *[4]uint64
TEXT ·Mul11x2(SB),NOSPLIT,$0
MOVQ a+0(FP), AX
VMOVDQA (AX), Y0
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
VPXOR Y0, Y3, Y3
MOVQ b+8(FP), AX
VMOVDQA Y3, (AX)
RET
// func Mul(a, b, c *[2]uint64) // func Mul(a, b, c *[2]uint64)
TEXT ·Mul(SB),NOSPLIT,$0 TEXT ·Mul(SB),NOSPLIT,$0
MOVQ a+0(FP), AX // X0 = a0 . a1 MOVQ a+0(FP), AX // X0 = a0 . a1

51
gf127/gf127x2.go Normal file
View file

@ -0,0 +1,51 @@
package gf127
import (
"encoding/binary"
"encoding/hex"
"unsafe"
)
// GF127x2 represents a pair of elements of GF(2^127) stored together.
type GF127x2 [4]uint64
// Split returns 2 components of pair without additional allocations.
func Split(a *GF127x2) (*GF127, *GF127) {
return (*GF127)(unsafe.Pointer(a)), (*GF127)(unsafe.Pointer(&(*a)[2]))
}
// CombineTo 2 elements of GF(2^127) to the respective components of pair.
func CombineTo(a *GF127, b *GF127, c *GF127x2) {
c[0] = a[0]
c[1] = a[1]
c[2] = b[0]
c[3] = b[1]
}
// Equal checks if both elements of GF(2^127) pair are equal.
func (a *GF127x2) Equal(b *GF127x2) bool {
return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3]
}
// String returns hex-encoded representation, starting with MSB.
// Elements of pair are separated by comma.
func (a *GF127x2) String() string {
b := a.ByteArray()
return hex.EncodeToString(b[:16]) + " , " + hex.EncodeToString(b[16:])
}
// ByteArray represents element of GF(2^127) as byte array of length 32.
func (a *GF127x2) ByteArray() (buf []byte) {
buf = make([]byte, 32)
binary.BigEndian.PutUint64(buf, a[1])
binary.BigEndian.PutUint64(buf[8:], a[0])
binary.BigEndian.PutUint64(buf[16:], a[3])
binary.BigEndian.PutUint64(buf[24:], a[2])
return
}
// Mul10x2 sets (b1, b2) to (a1*x, a2*x)
func Mul10x2(a, b *GF127x2)
// Mul10x2 sets (b1, b2) to (a1*(x+1), a2*(x+1))
func Mul11x2(a, b *GF127x2)

61
gf127/gf127x2_test.go Normal file
View file

@ -0,0 +1,61 @@
package gf127
import "testing"
var testCasesSplit = []struct {
num *GF127x2
h1 *GF127
h2 *GF127
}{
{&GF127x2{123, 31, 141, 9}, &GF127{123, 31}, &GF127{141, 9}},
{&GF127x2{maxUint64, 0, 0, maxUint64}, &GF127{maxUint64, 0}, &GF127{0, maxUint64}},
}
func TestSplit(t *testing.T) {
for _, tc := range testCasesSplit {
a, b := Split(tc.num)
if !a.Equals(tc.h1) || !b.Equals(tc.h2) {
t.Errorf("expected (%s,%s), got (%s,%s)", tc.h1, tc.h2, a, b)
}
}
}
func TestCombineTo(t *testing.T) {
c := new(GF127x2)
for _, tc := range testCasesSplit {
CombineTo(tc.h1, tc.h2, c)
if !c.Equal(tc.num) {
t.Errorf("expected (%s), got (%s)", tc.num, c)
}
}
}
var testCasesMul10x2 = [][2]*GF127x2{
{&GF127x2{123, 0, 123, 0}, &GF127x2{246, 0, 246, 0}},
{&GF127x2{maxUint64, 2, 0, 1}, &GF127x2{maxUint64 - 1, 5, 0, 2}},
{&GF127x2{0, maxUint64 >> 1, maxUint64, 2}, &GF127x2{1 + 1<<63, maxUint64>>1 - 1, maxUint64 - 1, 5}},
}
func TestMul10x2(t *testing.T) {
c := new(GF127x2)
for _, tc := range testCasesMul10x2 {
if Mul10x2(tc[0], c); !c.Equal(tc[1]) {
t.Errorf("expected (%s), got (%s)", tc[1], c)
}
}
}
var testCasesMul11x2 = [][2]*GF127x2{
{&GF127x2{123, 0, 123, 0}, &GF127x2{141, 0, 141, 0}},
{&GF127x2{maxUint64, 2, 0, 1}, &GF127x2{1, 7, 0, 3}},
{&GF127x2{0, maxUint64 >> 1, maxUint64, 2}, &GF127x2{1 + 1<<63, 1, 1, 7}},
}
func TestMul11x2(t *testing.T) {
c := new(GF127x2)
for _, tc := range testCasesMul11x2 {
if Mul11x2(tc[0], c); !c.Equal(tc[1]) {
t.Errorf("expected (%s), got (%s)", tc[1], c)
}
}
}