Merge pull request #16 from nspcc-dev/feat/cpuid

Move cpu id to a separate package
This commit is contained in:
fyrchik 2019-10-09 18:18:41 +03:00 committed by GitHub
commit 5142f695cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 52 additions and 181 deletions

2
go.mod
View file

@ -1,3 +1,5 @@
module github.com/nspcc-dev/tzhash
go 1.12
require github.com/stretchr/testify v1.3.0

View file

@ -28,7 +28,7 @@
// +build 386 amd64 amd64p32
package tz
package cpuid
const (
bitOSXSAVE = 1 << 27
@ -36,14 +36,19 @@ const (
bitAVX2 = 1 << 5
)
func setFeatures() {
var (
hasAVX bool
hasAVX2 bool
)
func init() {
maxID, _, _, _ := cpuid(0, 0)
if maxID < 1 {
return
}
_, _, ecx1, _ := cpuid(1, 0)
hasOSXSAVE = isSet(ecx1, bitOSXSAVE)
hasOSXSAVE := isSet(ecx1, bitOSXSAVE)
osSupportsAVX := false
if hasOSXSAVE {
@ -61,6 +66,9 @@ func setFeatures() {
hasAVX2 = isSet(ebx7, bitAVX2) && osSupportsAVX
}
func HasAVX() bool { return hasAVX }
func HasAVX2() bool { return hasAVX2 }
func isSet(hwc uint32, value uint32) bool {
return hwc&value != 0
}

View file

@ -1,5 +1,28 @@
#include "textflag.h"
#define mask(bit, src, tmp, to1, to2) \
MOVQ src, tmp \
SHRQ bit, tmp \
ANDQ $1, tmp \
NEGQ tmp \
MOVQ tmp, to1 \
VPBROADCASTB to1, to2
#define mulBit(bit) \
VPSLLQ $1, Y0, Y1 \
VPALIGNR $8, Y1, Y0, Y2 \
VPSRLQ $63, Y2, Y2 \
VPXOR Y1, Y2, Y2 \
VPSRLQ $63, Y1, Y3 \
VPSLLQ $63, Y3, Y3 \
VPUNPCKHQDQ Y3, Y3, Y3 \
VPXOR Y2, Y3, Y3 \
mask(bit, CX, DX, X1, Y2) \
VPXOR Y3, Y8, Y3 \
VPAND Y3, Y2, Y4 \
VPXOR Y4, Y0, Y8 \
VMOVDQA Y3, Y0
// func mulByteRightx2(c00c10, c01c11 *[4]uint64, b byte)
TEXT ·mulByteRightx2(SB),NOSPLIT,$0
MOVQ c00c10+0(FP), AX
@ -8,179 +31,16 @@ TEXT ·mulByteRightx2(SB),NOSPLIT,$0
VMOVDQA (BX), Y8
MOVB b+16(FP), CX
// 1 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
mulBit($7)
mulBit($6)
mulBit($5)
mulBit($4)
mulBit($3)
mulBit($2)
mulBit($1)
mulBit($0)
MOVQ CX, DX
SHRQ $7, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y3, Y0
// 2 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ CX, DX
SHRQ $6, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y3, Y0
// 3 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ CX, DX
SHRQ $5, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y3, Y0
// 4 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ CX, DX
SHRQ $4, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y3, Y0
// 5 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ CX, DX
SHRQ $3, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y3, Y0
// 6 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ CX, DX
SHRQ $2, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y3, Y0
// 7 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ CX, DX
SHRQ $1, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y3, Y0
// 8 bit
VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2
VPSRLQ $63, Y2, Y2
VPXOR Y1, Y2, Y2
VPSRLQ $63, Y1, Y3
VPSLLQ $63, Y3, Y3
VPUNPCKHQDQ Y3, Y3, Y3
VPXOR Y2, Y3, Y3
MOVQ CX, DX
ANDQ $1, DX
NEGQ DX
MOVQ DX, X1
VPBROADCASTB X1, Y2
VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8
VMOVDQA Y8, (BX)
VMOVDQA Y3, (AX)
VMOVDQA Y0, (AX)
RET

View file

@ -6,6 +6,8 @@ package tz
import (
"errors"
"hash"
"github.com/nspcc-dev/tzhash/internal/cpuid"
)
type Implementation int
@ -22,9 +24,10 @@ const (
)
var (
hasAVX bool
hasAVX2 bool
hasOSXSAVE bool
hasAVX = cpuid.HasAVX()
// Having AVX2 does not guarantee
// that AVX is also present.
hasAVX2 = cpuid.HasAVX2() && hasAVX
)
func (impl Implementation) String() string {

View file

@ -17,8 +17,6 @@ var id = sl2{
var mul func(a, b, c *sl2, x *[4]gf127.GF127)
func init() {
setFeatures()
if hasAVX {
mul = mulSL2AVX
} else {