Make use of AVX2 in Sum() by default

This commit is contained in:
Evgenii 2019-06-21 18:47:01 +03:00
parent f197b9e890
commit a967cc9d3d
2 changed files with 77 additions and 6 deletions

View file

@ -7,6 +7,7 @@ import (
"errors"
"hash"
"math"
"unsafe"
"github.com/nspcc-dev/tzhash/gf127"
)
@ -16,12 +17,19 @@ const (
hashBlockSize = 128
)
type digest struct {
type (
digest struct {
x [4]gf127.GF127
}
// type assertion
var _ hash.Hash = new(digest)
digest2 digest
)
// type assertions
var (
_ hash.Hash = new(digest)
_ hash.Hash = new(digest2)
)
var (
minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}}
@ -85,9 +93,44 @@ func (d *digest) BlockSize() int {
return hashBlockSize
}
func (d *digest2) Write(data []byte) (n int, err error) {
n = len(data)
// We need to transpose matrix, because
// mulBitRightx2 accepts matrix by columns, not rows
a := d.x[1]
d.x[1] = d.x[2]
d.x[2] = a
h1 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[0]))
h2 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[2]))
for _, b := range data {
mulBitRightx2(h1, h2, &minmax[(b>>7)&1])
mulBitRightx2(h1, h2, &minmax[(b>>6)&1])
mulBitRightx2(h1, h2, &minmax[(b>>5)&1])
mulBitRightx2(h1, h2, &minmax[(b>>4)&1])
mulBitRightx2(h1, h2, &minmax[(b>>3)&1])
mulBitRightx2(h1, h2, &minmax[(b>>2)&1])
mulBitRightx2(h1, h2, &minmax[(b>>1)&1])
mulBitRightx2(h1, h2, &minmax[(b>>0)&1])
}
// transpose matrix back
a = d.x[1]
d.x[1] = d.x[2]
d.x[2] = a
return
}
func (d *digest2) Sum(b []byte) []byte { return (*digest)(d).Sum(b) }
func (d *digest2) Reset() { (*digest)(d).Reset() }
func (d *digest2) Size() int { return (*digest)(d).Size() }
func (d *digest2) BlockSize() int { return (*digest)(d).BlockSize() }
func (d *digest2) checkSum() [hashSize]byte { return (*digest)(d).checkSum() }
// Sum returnz Tillich-Zémor checksum of data
func Sum(data []byte) [hashSize]byte {
d := new(digest)
d := new(digest2)
d.Reset()
d.Write(data)
return d.checkSum()
@ -172,3 +215,4 @@ func SubtractL(c, a []byte) (b []byte, err error) {
}
func mulBitRight(c00, c01, c10, c11, e *gf127.GF127)
func mulBitRightx2(c00c01 *gf127.GF127x2, c10c11 *gf127.GF127x2, e *gf127.GF127)

View file

@ -60,3 +60,30 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0
XORPD X9, X3
MOVUPD X3, (DX)
RET
// func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64)
TEXT ·mulBitRightx2(SB),NOSPLIT,$0
MOVQ c00c01+0(FP), AX
VMOVDQA (AX), Y0
MOVQ c10c11+8(FP), BX
VMOVDQA (BX), Y8
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ e+16(FP), CX
VBROADCASTI128 (CX), Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y8, (BX)
VMOVDQA Y3, (AX)
RET