tzhash/tz/avx2_inline.go
Evgenii Stratonikov 73d978c31e Rewrite AVX2 loop in assembly
Helps to get rid of MOV and generating constants for each iteration.

```
name                     old time/op    new time/op    delta
Sum/AVX2Inline_digest-8    1.57ms ± 2%    1.41ms ± 0%  -10.52%  (p=0.000 n=9+9)

name                     old speed      new speed      delta
Sum/AVX2Inline_digest-8  63.6MB/s ± 1%  71.1MB/s ± 0%  +11.76%  (p=0.000 n=9+9)
```

Signed-off-by: Evgenii Stratonikov <evgeniy@nspcc.ru>
2022-01-17 17:18:36 +03:00

59 lines
1.3 KiB
Go

// Copyright 2019 (c) NSPCC
//
// This file contains AVX2 implementation with inlined
// assembly calls.
package tz
import (
"hash"
"github.com/nspcc-dev/tzhash/gf127/avx2"
)
type digest3 struct {
x [2]avx2.GF127x2
}
// type assertion
var _ hash.Hash = (*digest3)(nil)
func newAVX2Inline() *digest3 {
d := new(digest3)
d.Reset()
return d
}
func (d *digest3) Write(data []byte) (n int, err error) {
n = len(data)
if len(data) != 0 {
mulByteSliceRightx2(&d.x[0], &d.x[1], n, &data[0])
}
return
}
func (d *digest3) Sum(in []byte) []byte {
// Make a copy of d so that caller can keep writing and summing.
d0 := *d
h := d0.checkSum()
return append(in, h[:]...)
}
func (d *digest3) Reset() {
d.x[0] = avx2.GF127x2{GF127{1, 0}, GF127{0, 0}}
d.x[1] = avx2.GF127x2{GF127{0, 0}, GF127{1, 0}}
}
func (d *digest3) Size() int { return hashSize }
func (d *digest3) BlockSize() int { return hashBlockSize }
func (d *digest3) checkSum() (b [hashSize]byte) {
// Matrix is stored transposed,
// but we need to use order consistent with digest.
h := d.x[0].ByteArray()
copy(b[:], h[:16])
copy(b[32:], h[16:])
h = d.x[1].ByteArray()
copy(b[16:], h[:16])
copy(b[48:], h[16:])
return
}
func mulByteSliceRightx2(c00c10 *avx2.GF127x2, c01c11 *avx2.GF127x2, n int, data *byte)