distribution/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s
Sebastiaan van Stijn 79976446f7
vendor: github.com/klauspost/compress v1.17.4
newer versions continue to include performance improvements, so it's good
to stay up-to-date.

full diff: https://github.com/klauspost/compress/compare/v1.16.5...v1.17.4

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2023-12-01 10:33:39 +01:00

68 lines
1.2 KiB
ArmAsm

// Copied from S2 implementation.
//go:build !appengine && !noasm && gc && !noasm
#include "textflag.h"
// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
CMPL DX, $0x08
JB matchlen_match4_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAL -8(DX), DX
LEAL 8(SI), SI
CMPL DX, $0x08
JAE matchlen_loopback_standalone
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
MOVL (AX)(SI*1), BX
CMPL (CX)(SI*1), BX
JNE matchlen_match2_standalone
LEAL -4(DX), DX
LEAL 4(SI), SI
matchlen_match2_standalone:
CMPL DX, $0x02
JB matchlen_match1_standalone
MOVW (AX)(SI*1), BX
CMPW (CX)(SI*1), BX
JNE matchlen_match1_standalone
LEAL -2(DX), DX
LEAL 2(SI), SI
matchlen_match1_standalone:
CMPL DX, $0x01
JB gen_match_len_end
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
INCL SI
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET