Use unaligned move in AVX2 implementation

Signed-off-by: Evgenii Stratonikov <evgeniy@nspcc.ru>
This commit is contained in:
Evgenii Stratonikov 2021-12-29 09:48:28 +03:00 committed by Alex Vanin
parent c8a32b25ec
commit bbbcf3fa5c

View file

@ -18,14 +18,14 @@
VPXOR Y3, Y8, Y3 \ VPXOR Y3, Y8, Y3 \
VPAND Y3, Y2, Y4 \ VPAND Y3, Y2, Y4 \
VPXOR Y4, Y0, Y8 \ VPXOR Y4, Y0, Y8 \
VMOVDQA Y3, Y0 VMOVDQU Y3, Y0
// func mulByteRightx2(c00c10, c01c11 *[4]uint64, b byte) // func mulByteRightx2(c00c10, c01c11 *[4]uint64, b byte)
TEXT ·mulByteRightx2(SB),NOSPLIT,$0 TEXT ·mulByteRightx2(SB),NOSPLIT,$0
MOVQ c00c10+0(FP), AX MOVQ c00c10+0(FP), AX
VMOVDQA (AX), Y0 VMOVDQU (AX), Y0
MOVQ c01c11+8(FP), BX MOVQ c01c11+8(FP), BX
VMOVDQA (BX), Y8 VMOVDQU (BX), Y8
VPXOR Y13, Y13, Y13 // Y13 = 0x0000... VPXOR Y13, Y13, Y13 // Y13 = 0x0000...
VPCMPEQB Y12, Y12, Y12 // Y12 = 0xFFFF... VPCMPEQB Y12, Y12, Y12 // Y12 = 0xFFFF...
@ -43,17 +43,17 @@ TEXT ·mulByteRightx2(SB),NOSPLIT,$0
mulBit($1) mulBit($1)
mulBit($0) mulBit($0)
VMOVDQA Y8, (BX) VMOVDQU Y8, (BX)
VMOVDQA Y0, (AX) VMOVDQU Y0, (AX)
RET RET
// func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64) // func mulBitRightx2(c00c10, c01c11 *[4]uint64, e *[2]uint64)
TEXT ·mulBitRightx2(SB),NOSPLIT,$0 TEXT ·mulBitRightx2(SB),NOSPLIT,$0
MOVQ c00c10+0(FP), AX MOVQ c00c10+0(FP), AX
VMOVDQA (AX), Y0 VMOVDQU (AX), Y0
MOVQ c01c11+8(FP), BX MOVQ c01c11+8(FP), BX
VMOVDQA (BX), Y8 VMOVDQU (BX), Y8
VPSLLQ $1, Y0, Y1 VPSLLQ $1, Y0, Y1
VPALIGNR $8, Y1, Y0, Y2 VPALIGNR $8, Y1, Y0, Y2
@ -70,6 +70,6 @@ TEXT ·mulBitRightx2(SB),NOSPLIT,$0
VPXOR Y3, Y8, Y3 VPXOR Y3, Y8, Y3
VPAND Y3, Y2, Y4 VPAND Y3, Y2, Y4
VPXOR Y4, Y0, Y8 VPXOR Y4, Y0, Y8
VMOVDQA Y8, (BX) VMOVDQU Y8, (BX)
VMOVDQA Y3, (AX) VMOVDQU Y3, (AX)
RET RET