From a967cc9d3dc857de6da5a07faec8225f4ae172d0 Mon Sep 17 00:00:00 2001 From: Evgenii Date: Fri, 21 Jun 2019 18:47:01 +0300 Subject: [PATCH] Make use of AVX2 in Sum() by default --- tz/hash.go | 56 ++++++++++++++++++++++++++++++++++++++++++----- tz/tzbits_amd64.s | 27 +++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/tz/hash.go b/tz/hash.go index dc46290..dd12474 100644 --- a/tz/hash.go +++ b/tz/hash.go @@ -7,6 +7,7 @@ import ( "errors" "hash" "math" + "unsafe" "github.com/nspcc-dev/tzhash/gf127" ) @@ -16,12 +17,19 @@ const ( hashBlockSize = 128 ) -type digest struct { - x [4]gf127.GF127 -} +type ( + digest struct { + x [4]gf127.GF127 + } -// type assertion -var _ hash.Hash = new(digest) + digest2 digest +) + +// type assertions +var ( + _ hash.Hash = new(digest) + _ hash.Hash = new(digest2) +) var ( minmax = [2]gf127.GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} @@ -85,9 +93,44 @@ func (d *digest) BlockSize() int { return hashBlockSize } +func (d *digest2) Write(data []byte) (n int, err error) { + n = len(data) + + // We need to transpose matrix, because + // mulBitRightx2 accepts matrix by columns, not rows + a := d.x[1] + d.x[1] = d.x[2] + d.x[2] = a + + h1 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[0])) + h2 := (*gf127.GF127x2)(unsafe.Pointer(&d.x[2])) + for _, b := range data { + mulBitRightx2(h1, h2, &minmax[(b>>7)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>6)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>5)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>4)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>3)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>2)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>1)&1]) + mulBitRightx2(h1, h2, &minmax[(b>>0)&1]) + } + + // transpose matrix back + a = d.x[1] + d.x[1] = d.x[2] + d.x[2] = a + + return +} +func (d *digest2) Sum(b []byte) []byte { return (*digest)(d).Sum(b) } +func (d *digest2) Reset() { (*digest)(d).Reset() } +func (d *digest2) Size() int { return (*digest)(d).Size() } +func (d *digest2) BlockSize() int { return (*digest)(d).BlockSize() } +func (d *digest2) checkSum() [hashSize]byte { return (*digest)(d).checkSum() } + // Sum returnz Tillich-Zémor checksum of data func Sum(data []byte) [hashSize]byte { - d := new(digest) + d := new(digest2) d.Reset() d.Write(data) return d.checkSum() @@ -172,3 +215,4 @@ func SubtractL(c, a []byte) (b []byte, err error) { } func mulBitRight(c00, c01, c10, c11, e *gf127.GF127) +func mulBitRightx2(c00c01 *gf127.GF127x2, c10c11 *gf127.GF127x2, e *gf127.GF127) diff --git a/tz/tzbits_amd64.s b/tz/tzbits_amd64.s index 7ad3a0e..ecd170c 100644 --- a/tz/tzbits_amd64.s +++ b/tz/tzbits_amd64.s @@ -60,3 +60,30 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0 XORPD X9, X3 MOVUPD X3, (DX) RET + + +// func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64) +TEXT ·mulBitRightx2(SB),NOSPLIT,$0 + MOVQ c00c01+0(FP), AX + VMOVDQA (AX), Y0 + MOVQ c10c11+8(FP), BX + VMOVDQA (BX), Y8 + + VPSLLQ $1, Y0, Y1 + VPALIGNR $8, Y1, Y0, Y2 + VPSRLQ $63, Y2, Y2 + VPXOR Y1, Y2, Y2 + VPSRLQ $63, Y1, Y3 + VPSLLQ $63, Y3, Y3 + VPUNPCKHQDQ Y3, Y3, Y3 + VPXOR Y2, Y3, Y3 + + MOVQ e+16(FP), CX + VBROADCASTI128 (CX), Y2 + + VPXOR Y3, Y8, Y3 + VPAND Y3, Y2, Y4 + VPXOR Y4, Y0, Y8 + VMOVDQA Y8, (BX) + VMOVDQA Y3, (AX) + RET