From 0e0d28e82f286fd7632299f1ff2f0afd04a1f3de Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Wed, 9 Mar 2022 16:58:21 +0300 Subject: [PATCH] tz: use build tags for different implemenations Signed-off-by: Evgenii Stratonikov --- benchmark | 7 +- cmd/tzsum/main.go | 22 ++-- tz/avx.go | 75 -------------- tz/avx2.go | 62 ------------ tz/avx2_inline.go | 55 ---------- tz/avx_inline.go | 62 ------------ tz/digest.go | 122 +++++++++++++++++++++++ tz/{avx2_amd64.s => digest_avx2_amd64.s} | 26 ----- tz/{avx_amd64.s => digest_avx_amd64.s} | 45 +-------- tz/digest_generic.go | 8 ++ tz/digets_amd64.go | 39 ++++++++ tz/hash.go | 87 ---------------- tz/hash_test.go | 69 +++++++------ tz/pure.go | 92 ----------------- tz/sl2.go | 11 +- 15 files changed, 236 insertions(+), 546 deletions(-) delete mode 100644 tz/avx.go delete mode 100644 tz/avx2.go delete mode 100644 tz/avx2_inline.go delete mode 100644 tz/avx_inline.go create mode 100644 tz/digest.go rename tz/{avx2_amd64.s => digest_avx2_amd64.s} (74%) rename tz/{avx_amd64.s => digest_avx_amd64.s} (60%) create mode 100644 tz/digest_generic.go create mode 100644 tz/digets_amd64.go delete mode 100644 tz/pure.go diff --git a/benchmark b/benchmark index 926c02a..6d16439 100755 --- a/benchmark +++ b/benchmark @@ -3,9 +3,10 @@ tmpfile=$(mktemp /tmp/random-file.XXXXXX) dd if=/dev/urandom of=$tmpfile bs=$1 count=1 -go build ./cmd/tzsum && \ -for impl in avx avx2 avx2inline purego; do +go build ./cmd/tzsum || exit 1 + +for impl in avx avx2 generic; do echo $impl implementation: - /usr/bin/env time ./tzsum -name $tmpfile -impl $impl + time ./tzsum -name $tmpfile -impl $impl echo done diff --git a/cmd/tzsum/main.go b/cmd/tzsum/main.go index 5d6565e..1707e74 100644 --- a/cmd/tzsum/main.go +++ b/cmd/tzsum/main.go @@ -11,6 +11,7 @@ import ( "runtime/pprof" "github.com/nspcc-dev/tzhash/tz" + "golang.org/x/sys/cpu" ) var ( @@ -46,18 +47,23 @@ func main() { f = os.Stdin } + // Override CPU feature flags to make sure a proper backend is used. var h hash.Hash switch *hashimpl { case "avx": - h = tz.NewWith(tz.AVX) - case "avx2": - h = tz.NewWith(tz.AVX2) - case "avx2inline": - h = tz.NewWith(tz.AVX2Inline) - case "purego": - h = tz.NewWith(tz.PureGo) - default: + cpu.X86.HasAVX = true + cpu.X86.HasAVX2 = false h = tz.New() + case "avx2": + cpu.X86.HasAVX = true + cpu.X86.HasAVX2 = true + h = tz.New() + case "generic": + cpu.X86.HasAVX = false + cpu.X86.HasAVX2 = false + h = tz.New() + default: + log.Fatalf("Invalid backend: %s", *hashimpl) } if _, err := io.Copy(h, f); err != nil { diff --git a/tz/avx.go b/tz/avx.go deleted file mode 100644 index 4e0a2fa..0000000 --- a/tz/avx.go +++ /dev/null @@ -1,75 +0,0 @@ -package tz - -import ( - "hash" - "math" -) - -type digest struct { - x [4]GF127 -} - -// type assertion -var _ hash.Hash = (*digest)(nil) - -var ( - minmax = [2]GF127{{0, 0}, {math.MaxUint64, math.MaxUint64}} - x127x63 = GF127{1 << 63, 1 << 63} //nolint:deadcode,varcheck -) - -func newAVX() *digest { - d := new(digest) - d.Reset() - return d -} - -func (d *digest) Sum(in []byte) []byte { - // Make a copy of d so that caller can keep writing and summing. - d0 := *d - h := d0.checkSum() - return append(in, h[:]...) -} - -func (d *digest) checkSum() [Size]byte { - return d.byteArray() -} - -func (d *digest) byteArray() (b [Size]byte) { - copy(b[:], d.x[0].ByteArray()) - copy(b[16:], d.x[1].ByteArray()) - copy(b[32:], d.x[2].ByteArray()) - copy(b[48:], d.x[3].ByteArray()) - return -} - -func (d *digest) Reset() { - d.x[0] = GF127{1, 0} - d.x[1] = GF127{0, 0} - d.x[2] = GF127{0, 0} - d.x[3] = GF127{1, 0} -} - -func (d *digest) Write(data []byte) (n int, err error) { - n = len(data) - for _, b := range data { - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>7)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>6)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>5)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>4)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>3)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>2)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>1)&1]) - mulBitRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], &minmax[(b>>0)&1]) - } - return -} - -func (d *digest) Size() int { - return Size -} - -func (d *digest) BlockSize() int { - return hashBlockSize -} - -func mulBitRight(c00, c01, c10, c11, e *GF127) diff --git a/tz/avx2.go b/tz/avx2.go deleted file mode 100644 index b41c182..0000000 --- a/tz/avx2.go +++ /dev/null @@ -1,62 +0,0 @@ -package tz - -import ( - "hash" - - "github.com/nspcc-dev/tzhash/gf127" -) - -type digest2 struct { - x [2]gf127.GF127x2 -} - -// type assertion -var _ hash.Hash = (*digest2)(nil) - -func newAVX2() *digest2 { - d := new(digest2) - d.Reset() - return d -} - -func (d *digest2) Write(data []byte) (n int, err error) { - n = len(data) - for _, b := range data { - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>7)&1]) - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>6)&1]) - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>5)&1]) - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>4)&1]) - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>3)&1]) - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>2)&1]) - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>1)&1]) - mulBitRightx2(&d.x[0], &d.x[1], &minmax[(b>>0)&1]) - } - return -} - -func (d *digest2) Sum(in []byte) []byte { - // Make a copy of d so that caller can keep writing and summing. - d0 := *d - h := d0.checkSum() - return append(in, h[:]...) -} -func (d *digest2) Reset() { - d.x[0] = gf127.GF127x2{GF127{1, 0}, GF127{0, 0}} - d.x[1] = gf127.GF127x2{GF127{0, 0}, GF127{1, 0}} -} -func (d *digest2) Size() int { return Size } -func (d *digest2) BlockSize() int { return hashBlockSize } -func (d *digest2) checkSum() (b [Size]byte) { - // Matrix is stored transposed, - // but we need to use order consistent with digest. - h := d.x[0].ByteArray() - copy(b[:], h[:16]) - copy(b[32:], h[16:]) - - h = d.x[1].ByteArray() - copy(b[16:], h[:16]) - copy(b[48:], h[16:]) - return -} - -func mulBitRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, e *GF127) diff --git a/tz/avx2_inline.go b/tz/avx2_inline.go deleted file mode 100644 index f5fcecf..0000000 --- a/tz/avx2_inline.go +++ /dev/null @@ -1,55 +0,0 @@ -package tz - -import ( - "hash" - - "github.com/nspcc-dev/tzhash/gf127" -) - -type digest3 struct { - x [2]gf127.GF127x2 -} - -// type assertion -var _ hash.Hash = (*digest3)(nil) - -func newAVX2Inline() *digest3 { - d := new(digest3) - d.Reset() - return d -} - -func (d *digest3) Write(data []byte) (n int, err error) { - n = len(data) - if len(data) != 0 { - mulByteSliceRightx2(&d.x[0], &d.x[1], n, &data[0]) - } - return -} - -func (d *digest3) Sum(in []byte) []byte { - // Make a copy of d so that caller can keep writing and summing. - d0 := *d - h := d0.checkSum() - return append(in, h[:]...) -} -func (d *digest3) Reset() { - d.x[0] = gf127.GF127x2{GF127{1, 0}, GF127{0, 0}} - d.x[1] = gf127.GF127x2{GF127{0, 0}, GF127{1, 0}} -} -func (d *digest3) Size() int { return Size } -func (d *digest3) BlockSize() int { return hashBlockSize } -func (d *digest3) checkSum() (b [Size]byte) { - // Matrix is stored transposed, - // but we need to use order consistent with digest. - h := d.x[0].ByteArray() - copy(b[:], h[:16]) - copy(b[32:], h[16:]) - - h = d.x[1].ByteArray() - copy(b[16:], h[:16]) - copy(b[48:], h[16:]) - return -} - -func mulByteSliceRightx2(c00c10 *gf127.GF127x2, c01c11 *gf127.GF127x2, n int, data *byte) diff --git a/tz/avx_inline.go b/tz/avx_inline.go deleted file mode 100644 index a023be5..0000000 --- a/tz/avx_inline.go +++ /dev/null @@ -1,62 +0,0 @@ -package tz - -import ( - "hash" -) - -type digest4 struct { - x [4]GF127 -} - -// type assertion -var _ hash.Hash = (*digest4)(nil) - -func newAVXInline() *digest4 { - d := new(digest4) - d.Reset() - return d -} - -func (d *digest4) Sum(in []byte) []byte { - // Make a copy of d so that caller can keep writing and summing. - d0 := *d - h := d0.checkSum() - return append(in, h[:]...) -} - -func (d *digest4) checkSum() [Size]byte { - return d.byteArray() -} - -func (d *digest4) byteArray() (b [Size]byte) { - copy(b[:], d.x[0].ByteArray()) - copy(b[16:], d.x[1].ByteArray()) - copy(b[32:], d.x[2].ByteArray()) - copy(b[48:], d.x[3].ByteArray()) - return -} - -func (d *digest4) Reset() { - d.x[0] = GF127{1, 0} - d.x[1] = GF127{0, 0} - d.x[2] = GF127{0, 0} - d.x[3] = GF127{1, 0} -} - -func (d *digest4) Write(data []byte) (n int, err error) { - n = len(data) - for _, b := range data { - mulByteRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b) - } - return -} - -func (d *digest4) Size() int { - return Size -} - -func (d *digest4) BlockSize() int { - return hashBlockSize -} - -func mulByteRight(c00, c01, c10, c11 *GF127, b byte) diff --git a/tz/digest.go b/tz/digest.go new file mode 100644 index 0000000..7ba074d --- /dev/null +++ b/tz/digest.go @@ -0,0 +1,122 @@ +package tz + +import ( + "github.com/nspcc-dev/tzhash/gf127" +) + +const ( + // Size is the size of a Tillich-Zémor hash sum in bytes. + Size = 64 + hashBlockSize = 128 +) + +type digest struct { + // Stores matrix cells in the following order: + // [ 0 2 ] + // [ 1 3 ] + // This is done to reuse the same digest between generic + // and AVX2 implementation. + x [4]GF127 +} + +// New returns a new hash.Hash computing the Tillich-Zémor checksum. +func New() *digest { + d := new(digest) + d.Reset() + return d +} + +// Sum returns Tillich-Zémor checksum of data. +func Sum(data []byte) [Size]byte { + d := new(digest) + _, _ = d.Write(data) // no errors + return d.checkSum() +} + +func (d *digest) Sum(in []byte) []byte { + // Make a copy of d so that caller can keep writing and summing. + d0 := *d + h := d0.checkSum() + return append(in, h[:]...) +} + +func (d *digest) checkSum() [Size]byte { + return d.byteArray() +} + +func (d *digest) byteArray() (b [Size]byte) { + t := d.x[0].ByteArray() + copy(b[:], t[:]) + + t = d.x[2].ByteArray() + copy(b[16:], t[:]) + + t = d.x[1].ByteArray() + copy(b[32:], t[:]) + + t = d.x[3].ByteArray() + copy(b[48:], t[:]) + + return +} + +func (d *digest) Reset() { + d.x[0] = GF127{1, 0} + d.x[1] = GF127{0, 0} + d.x[2] = GF127{0, 0} + d.x[3] = GF127{1, 0} +} + +func (d *digest) Write(data []byte) (n int, err error) { + return write(d, data) +} + +func writeGeneric(d *digest, data []byte) (n int, err error) { + n = len(data) + tmp := new(GF127) + for _, b := range data { + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x80 != 0, tmp) + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x40 != 0, tmp) + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x20 != 0, tmp) + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x10 != 0, tmp) + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x08 != 0, tmp) + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x04 != 0, tmp) + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x02 != 0, tmp) + mulBitRightGeneric(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x01 != 0, tmp) + } + return +} + +func (d *digest) Size() int { + return Size +} + +func (d *digest) BlockSize() int { + return hashBlockSize +} + +func mulBitRightGeneric(c00, c10, c01, c11 *GF127, bit bool, tmp *GF127) { + if bit { + *tmp = *c00 + gf127.Mul10(c00, c00) + gf127.Add(c00, c01, c00) + gf127.Mul11(tmp, tmp) + gf127.Add(c01, tmp, c01) + + *tmp = *c10 + gf127.Mul10(c10, c10) + gf127.Add(c10, c11, c10) + gf127.Mul11(tmp, tmp) + gf127.Add(c11, tmp, c11) + } else { + *tmp = *c00 + gf127.Mul10(c00, c00) + gf127.Add(c00, c01, c00) + *c01 = *tmp + + *tmp = *c10 + gf127.Mul10(c10, c10) + gf127.Add(c10, c11, c10) + *c11 = *tmp + } +} diff --git a/tz/avx2_amd64.s b/tz/digest_avx2_amd64.s similarity index 74% rename from tz/avx2_amd64.s rename to tz/digest_avx2_amd64.s index 0a0de51..2d818de 100644 --- a/tz/avx2_amd64.s +++ b/tz/digest_avx2_amd64.s @@ -59,29 +59,3 @@ finish: VMOVDQU Y0, (AX) RET - -// func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64) -TEXT ·mulBitRightx2(SB), NOSPLIT, $0 - MOVQ c00c10+0(FP), AX - VMOVDQU (AX), Y0 - MOVQ c01c11+8(FP), BX - VMOVDQU (BX), Y8 - - VPSLLQ $1, Y0, Y1 - VPALIGNR $8, Y1, Y0, Y2 - VPSRLQ $63, Y2, Y2 - VPXOR Y1, Y2, Y2 - VPSRLQ $63, Y1, Y3 - VPSLLQ $63, Y3, Y3 - VPUNPCKHQDQ Y3, Y3, Y3 - VPXOR Y2, Y3, Y3 - - MOVQ e+16(FP), CX - VBROADCASTI128 (CX), Y2 - - VPXOR Y3, Y8, Y3 - VPAND Y3, Y2, Y4 - VPXOR Y4, Y0, Y8 - VMOVDQU Y8, (BX) - VMOVDQU Y3, (AX) - RET diff --git a/tz/avx_amd64.s b/tz/digest_avx_amd64.s similarity index 60% rename from tz/avx_amd64.s rename to tz/digest_avx_amd64.s index 7a56a5c..e040ffd 100644 --- a/tz/avx_amd64.s +++ b/tz/digest_avx_amd64.s @@ -29,50 +29,13 @@ VANDPD X2, X5, X3 \ VXORPD X9, X3, X3 -// func mulBitRight(c00, c01, c10, c11, e *[2]uint64) -TEXT ·mulBitRight(SB), NOSPLIT, $0 - MOVQ c00+0(FP), AX - VMOVDQU (AX), X0 - VMOVDQU X0, X8 // remember c00 value - MOVQ c01+8(FP), BX - VMOVDQU (BX), X1 - MOVQ c10+16(FP), CX - VMOVDQU (CX), X2 - VMOVDQU X2, X9 // remember c10 value - MOVQ c11+24(FP), DX - VMOVDQU (DX), X3 - - VPXOR X13, X13, X13 // Y13 = 0x0000... - VPCMPEQB X14, X14, X14 // Y14 = 0xFFFF... - VPSUBQ X14, X13, X13 - VPSLLQ $63, X13, X14 - - mul2(X0, X5, X6, X7) // c00 *= 2 - VXORPD X5, X1, X0 // c00 += c01 - mul2(X2, X5, X6, X7) // c10 *= 2 - VXORPD X3, X5, X2 // c10 += c11 - MOVQ e+32(FP), CX - VMOVDQU (CX), X5 - VANDPD X0, X5, X1 // c01 = c00 + e - VXORPD X8, X1, X1 // c01 += X8 (old c00) - VANDPD X2, X5, X3 // c11 = c10 + e - VXORPD X9, X3, X3 // c11 += x9 (old c10) - - VMOVDQU X0, (AX) - MOVQ c10+16(FP), CX - VMOVDQU X2, (CX) - VMOVDQU X1, (BX) - VMOVDQU X3, (DX) - - RET - TEXT ·mulByteRight(SB), NOSPLIT, $0 MOVQ c00+0(FP), AX VMOVDQU (AX), X0 - MOVQ c01+8(FP), BX - VMOVDQU (BX), X1 - MOVQ c10+16(FP), CX + MOVQ c10+8(FP), CX VMOVDQU (CX), X2 + MOVQ c01+16(FP), BX + VMOVDQU (BX), X1 MOVQ c11+24(FP), DX VMOVDQU (DX), X3 MOVQ $0, CX @@ -98,7 +61,7 @@ TEXT ·mulByteRight(SB), NOSPLIT, $0 mulBit($0) VMOVDQU X0, (AX) - MOVQ c10+16(FP), CX + MOVQ c10+8(FP), CX VMOVDQU X2, (CX) VMOVDQU X1, (BX) MOVQ c11+24(FP), DX diff --git a/tz/digest_generic.go b/tz/digest_generic.go new file mode 100644 index 0000000..b8778c2 --- /dev/null +++ b/tz/digest_generic.go @@ -0,0 +1,8 @@ +//go:build !(amd64 && !generic) +// +build !amd64 generic + +package tz + +func write(d *digest, data []byte) (int, error) { + return writeGeneric(d, data) +} diff --git a/tz/digets_amd64.go b/tz/digets_amd64.go new file mode 100644 index 0000000..462bfff --- /dev/null +++ b/tz/digets_amd64.go @@ -0,0 +1,39 @@ +//go:build amd64 && !generic +// +build amd64,!generic + +package tz + +import ( + "github.com/nspcc-dev/tzhash/gf127" + "golang.org/x/sys/cpu" +) + +func write(d *digest, data []byte) (n int, err error) { + switch { + case cpu.X86.HasAVX && cpu.X86.HasAVX2: + return writeAVX2(d, data) + case cpu.X86.HasAVX: + return writeAVX(d, data) + default: + return writeGeneric(d, data) + } +} + +func writeAVX2(d *digest, data []byte) (n int, err error) { + n = len(data) + if len(data) != 0 { + mulByteSliceRightx2(&d.x[0], &d.x[2], n, &data[0]) + } + return +} + +func writeAVX(d *digest, data []byte) (n int, err error) { + n = len(data) + for _, b := range data { + mulByteRight(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b) + } + return +} + +func mulByteRight(c00, c01, c10, c11 *GF127, b byte) +func mulByteSliceRightx2(c00c10 *gf127.GF127, c01c11 *gf127.GF127, n int, data *byte) diff --git a/tz/hash.go b/tz/hash.go index a7e4e4e..e39ae60 100644 --- a/tz/hash.go +++ b/tz/hash.go @@ -6,95 +6,8 @@ package tz import ( "errors" - "hash" - - "golang.org/x/sys/cpu" ) -type Implementation int - -const ( - // Size is the size of a Tillich-Zemor hash sum in bytes. - Size = 64 - hashBlockSize = 128 - - _ Implementation = iota - AVX - AVX2 - AVX2Inline - PureGo - AVXInline -) - -var ( - hasAVX = cpu.X86.HasAVX - // Having AVX2 does not guarantee - // that AVX is also present. - hasAVX2 = cpu.X86.HasAVX2 && hasAVX -) - -func (impl Implementation) String() string { - switch impl { - case AVX: - return "AVX" - case AVXInline: - return "AVXInline" - case AVX2: - return "AVX2" - case AVX2Inline: - return "AVX2Inline" - case PureGo: - return "PureGo" - default: - return "UNKNOWN" - } -} - -func NewWith(impl Implementation) hash.Hash { - switch impl { - case AVX: - return newAVX() - case AVXInline: - return newAVXInline() - case AVX2: - return newAVX2() - case AVX2Inline: - return newAVX2Inline() - case PureGo: - return newPure() - default: - return New() - } -} - -// New returns a new hash.Hash computing the Tillich-Zémor checksum. -func New() hash.Hash { - if hasAVX2 { - return newAVX2Inline() - } else if hasAVX { - return newAVXInline() - } else { - return newPure() - } -} - -// Sum returns Tillich-Zémor checksum of data. -func Sum(data []byte) [Size]byte { - if hasAVX2 { - d := newAVX2Inline() - _, _ = d.Write(data) // no errors - return d.checkSum() - } else if hasAVX { - d := newAVXInline() - _, _ = d.Write(data) // no errors - return d.checkSum() - } else { - d := newPure() - _, _ = d.Write(data) // no errors - return d.checkSum() - } -} - // Concat performs combining of hashes based on homomorphic property. func Concat(hs [][]byte) ([]byte, error) { var b, c sl2 diff --git a/tz/hash_test.go b/tz/hash_test.go index c0c10a4..539b8f1 100644 --- a/tz/hash_test.go +++ b/tz/hash_test.go @@ -2,38 +2,29 @@ package tz import ( "encoding/hex" + "fmt" "io" "math/rand" "testing" "github.com/stretchr/testify/require" + "golang.org/x/sys/cpu" ) const benchDataSize = 100000 -var providers = []Implementation{ - AVX, - AVXInline, - AVX2, - AVX2Inline, - PureGo, +type arch struct { + HasAVX bool + HasAVX2 bool } -func TestNewWith(t *testing.T) { - d := NewWith(AVX) - require.IsType(t, (*digest)(nil), d) - - d = NewWith(AVXInline) - require.IsType(t, (*digest4)(nil), d) - - d = NewWith(AVX2) - require.IsType(t, (*digest2)(nil), d) - - d = NewWith(AVX2Inline) - require.IsType(t, (*digest3)(nil), d) - - d = NewWith(PureGo) - require.IsType(t, (*digestp)(nil), d) +var backends = []struct { + Name string + arch +}{ + {"AVX", arch{true, false}}, + {"AVX2", arch{true, true}}, + {"Generic", arch{false, false}}, } var testCases = []struct { @@ -83,10 +74,12 @@ var testCases = []struct { } func TestHash(t *testing.T) { - for i := range providers { - p := providers[i] - t.Run(p.String()+" digest", func(t *testing.T) { - d := NewWith(p) + for i, b := range backends { + t.Run(b.Name+" digest", func(t *testing.T) { + prepareArch(t, backends[i].arch) + + fmt.Println("FEATURES:", cpu.X86.HasAVX, cpu.X86.HasAVX2) + d := New() for _, tc := range testCases { d.Reset() _, _ = d.Write(tc.input) @@ -97,6 +90,20 @@ func TestHash(t *testing.T) { } } +func prepareArch(t testing.TB, b arch) { + realCPU := cpu.X86 + if !realCPU.HasAVX2 && b.HasAVX2 || !realCPU.HasAVX && b.HasAVX { + t.Skip("Underlying CPU doesn't support necessary features") + } else { + t.Cleanup(func() { + cpu.X86.HasAVX = realCPU.HasAVX + cpu.X86.HasAVX2 = realCPU.HasAVX2 + }) + cpu.X86.HasAVX = b.HasAVX + cpu.X86.HasAVX2 = b.HasAVX2 + } +} + func newBuffer() (data []byte) { data = make([]byte, benchDataSize) @@ -110,20 +117,20 @@ func newBuffer() (data []byte) { func BenchmarkSum(b *testing.B) { data := newBuffer() - size := int64(len(data)) - for i := range providers { - p := providers[i] - b.Run(p.String()+" digest", func(b *testing.B) { + for i := range backends { + b.Run(backends[i].Name+" digest", func(b *testing.B) { + prepareArch(b, backends[i].arch) + b.ResetTimer() b.ReportAllocs() - d := NewWith(p) + d := New() for i := 0; i < b.N; i++ { d.Reset() _, _ = d.Write(data) d.Sum(nil) } - b.SetBytes(size) + b.SetBytes(int64(len(data))) }) } } diff --git a/tz/pure.go b/tz/pure.go deleted file mode 100644 index af9b3a7..0000000 --- a/tz/pure.go +++ /dev/null @@ -1,92 +0,0 @@ -package tz - -import ( - "github.com/nspcc-dev/tzhash/gf127" -) - -type digestp struct { - x [4]GF127 -} - -// New returns a new hash.Hash computing the Tillich-Zémor checksum. -func newPure() *digestp { - d := new(digestp) - d.Reset() - return d -} - -func (d *digestp) Sum(in []byte) []byte { - // Make a copy of d so that caller can keep writing and summing. - d0 := *d - h := d0.checkSum() - return append(in, h[:]...) -} - -func (d *digestp) checkSum() [Size]byte { - return d.byteArray() -} - -func (d *digestp) byteArray() (b [Size]byte) { - for i := 0; i < 4; i++ { - t := d.x[i].ByteArray() - copy(b[i*16:], t[:]) - } - return -} - -func (d *digestp) Reset() { - d.x[0] = GF127{1, 0} - d.x[1] = GF127{0, 0} - d.x[2] = GF127{0, 0} - d.x[3] = GF127{1, 0} -} - -func (d *digestp) Write(data []byte) (n int, err error) { - n = len(data) - tmp := new(GF127) - for _, b := range data { - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x80 != 0, tmp) - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x40 != 0, tmp) - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x20 != 0, tmp) - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x10 != 0, tmp) - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x08 != 0, tmp) - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x04 != 0, tmp) - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x02 != 0, tmp) - mulBitRightPure(&d.x[0], &d.x[1], &d.x[2], &d.x[3], b&0x01 != 0, tmp) - } - return -} - -func (d *digestp) Size() int { - return Size -} - -func (d *digestp) BlockSize() int { - return hashBlockSize -} - -func mulBitRightPure(c00, c01, c10, c11 *GF127, bit bool, tmp *GF127) { - if bit { - *tmp = *c00 - gf127.Mul10(c00, c00) - gf127.Add(c00, c01, c00) - gf127.Mul11(tmp, tmp) - gf127.Add(c01, tmp, c01) - - *tmp = *c10 - gf127.Mul10(c10, c10) - gf127.Add(c10, c11, c10) - gf127.Mul11(tmp, tmp) - gf127.Add(c11, tmp, c11) - } else { - *tmp = *c00 - gf127.Mul10(c00, c00) - gf127.Add(c00, c01, c00) - *c01 = *tmp - - *tmp = *c10 - gf127.Mul10(c10, c10) - gf127.Add(c10, c11, c10) - *c11 = *tmp - } -} diff --git a/tz/sl2.go b/tz/sl2.go index 14f963b..cc45281 100644 --- a/tz/sl2.go +++ b/tz/sl2.go @@ -17,11 +17,13 @@ var id = sl2{ {GF127{0, 0}, GF127{1, 0}}, } +// MarshalBinary implements encoding.BinaryMarshaler. func (c *sl2) MarshalBinary() (data []byte, err error) { s := c.ByteArray() return s[:], nil } +// UnmarshalBinary implements encoding.BinaryUnmarshaler. func (c *sl2) UnmarshalBinary(data []byte) (err error) { if len(data) != 64 { return errors.New("data must be 64-bytes long") @@ -113,6 +115,7 @@ func (c *sl2) MulB() *sl2 { return c } +// Mul returns a * b in GL_2(GF(2^127)) func (c *sl2) Mul(a, b *sl2) *sl2 { var x [4]GF127 @@ -158,16 +161,16 @@ func (c *sl2) String() string { func (c *sl2) ByteArray() (b [Size]byte) { t := c[0][0].ByteArray() - copy(b[:], t) + copy(b[:], t[:]) t = c[0][1].ByteArray() - copy(b[16:], t) + copy(b[16:], t[:]) t = c[1][0].ByteArray() - copy(b[32:], t) + copy(b[32:], t[:]) t = c[1][1].ByteArray() - copy(b[48:], t) + copy(b[48:], t[:]) return }