From 1d4e7550fc1f01e4a4d4e76c661b89e55d1a0f19 Mon Sep 17 00:00:00 2001 From: Evgenii Stratonikov Date: Thu, 10 Oct 2019 11:29:40 +0300 Subject: [PATCH] Use macros in AVX hash implementation --- tz/avx_amd64.s | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tz/avx_amd64.s b/tz/avx_amd64.s index 063a154..97e41b9 100644 --- a/tz/avx_amd64.s +++ b/tz/avx_amd64.s @@ -1,5 +1,17 @@ #include "textflag.h" +// mul2 multiplicates FROM by 2, stores result in R1 +// and uses R1, R2 and R3 for internal computations. +#define mul2(FROM, TO, R2, R3) \ + VPSLLQ $1, FROM, TO \ + VPALIGNR $8, TO, FROM, R2 \ + PSRLQ $63, R2 \ + MOVUPD ·x127x63(SB), R3 \ + ANDPD TO, R3 \ + VPUNPCKHQDQ R3, R3, R3 \ + XORPD R2, TO \ + XORPD R3, TO + // func mulBitRight(c00, c01, c10, c11, e *[2]uint64) TEXT ·mulBitRight(SB),NOSPLIT,$0 MOVQ c00+0(FP), AX @@ -14,14 +26,7 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0 MOVUPD (DX), X3 // c00 *= 2 - VPSLLQ $1, X0, X5 - VPALIGNR $8, X5, X0, X6 - PSRLQ $63, X6 - MOVUPD ·x127x63(SB), X7 - ANDPD X5, X7 - VPUNPCKHQDQ X7, X7, X7 - XORPD X6, X5 - XORPD X7, X5 + mul2(X0, X5, X6, X7) MOVUPD X5, X0 // c00 += c01 @@ -29,14 +34,7 @@ TEXT ·mulBitRight(SB),NOSPLIT,$0 MOVUPD X0, (AX) // c10 *= 2 - VPSLLQ $1, X2, X5 - VPALIGNR $8, X5, X2, X6 - PSRLQ $63, X6 - MOVUPD ·x127x63(SB), X7 - ANDPD X5, X7 - VPUNPCKHQDQ X7, X7, X7 - XORPD X6, X5 - XORPD X7, X5 + mul2(X2, X5, X6, X7) MOVUPD X5, X2 // c10 += c11