forked from TrueCloudLab/tzhash
d891a9c591
Provide default implementations in gf127 package and all optimizations in subpackages. This way it will be easier to use from a client.
35 lines
732 B
ArmAsm
35 lines
732 B
ArmAsm
#include "textflag.h"
|
|
|
|
// func Mul10x2(a, b) *[4]uint64
|
|
TEXT ·Mul10x2(SB),NOSPLIT,$0
|
|
MOVQ a+0(FP), AX
|
|
VMOVDQA (AX), Y0
|
|
VPSLLQ $1, Y0, Y1
|
|
VPALIGNR $8, Y1, Y0, Y2
|
|
VPSRLQ $63, Y2, Y2
|
|
VPXOR Y1, Y2, Y2
|
|
VPSRLQ $63, Y1, Y3
|
|
VPSLLQ $63, Y3, Y3
|
|
VPUNPCKHQDQ Y3, Y3, Y3
|
|
VPXOR Y2, Y3, Y3
|
|
MOVQ b+8(FP), AX
|
|
VMOVDQA Y3, (AX)
|
|
RET
|
|
|
|
|
|
// func Mul11x2(a, b) *[4]uint64
|
|
TEXT ·Mul11x2(SB),NOSPLIT,$0
|
|
MOVQ a+0(FP), AX
|
|
VMOVDQA (AX), Y0
|
|
VPSLLQ $1, Y0, Y1
|
|
VPALIGNR $8, Y1, Y0, Y2
|
|
VPSRLQ $63, Y2, Y2
|
|
VPXOR Y1, Y2, Y2
|
|
VPSRLQ $63, Y1, Y3
|
|
VPSLLQ $63, Y3, Y3
|
|
VPUNPCKHQDQ Y3, Y3, Y3
|
|
VPXOR Y2, Y3, Y3
|
|
VPXOR Y0, Y3, Y3
|
|
MOVQ b+8(FP), AX
|
|
VMOVDQA Y3, (AX)
|
|
RET
|