Merge pull request #643 from restic/update-poly1305

Update golang.org/x/crypto/poly1305
This commit is contained in:
Alexander Neumann 2016-10-14 15:51:57 +02:00
commit 50212805aa
10 changed files with 531 additions and 931 deletions

2
vendor/manifest vendored
View file

@ -85,7 +85,7 @@
{
"importpath": "golang.org/x/crypto/poly1305",
"repository": "https://go.googlesource.com/crypto",
"revision": "81372b2fc2f10bef2a7f338da115c315a56b2726",
"revision": "5f31782cfb2b6373211f8f9fbf31283fa234b570",
"branch": "master",
"path": "/poly1305"
},

View file

@ -1,45 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
DATA ·SCALE(SB)/8, $0x37F4000000000000
GLOBL ·SCALE(SB), 8, $8
DATA ·TWO32(SB)/8, $0x41F0000000000000
GLOBL ·TWO32(SB), 8, $8
DATA ·TWO64(SB)/8, $0x43F0000000000000
GLOBL ·TWO64(SB), 8, $8
DATA ·TWO96(SB)/8, $0x45F0000000000000
GLOBL ·TWO96(SB), 8, $8
DATA ·ALPHA32(SB)/8, $0x45E8000000000000
GLOBL ·ALPHA32(SB), 8, $8
DATA ·ALPHA64(SB)/8, $0x47E8000000000000
GLOBL ·ALPHA64(SB), 8, $8
DATA ·ALPHA96(SB)/8, $0x49E8000000000000
GLOBL ·ALPHA96(SB), 8, $8
DATA ·ALPHA130(SB)/8, $0x4C08000000000000
GLOBL ·ALPHA130(SB), 8, $8
DATA ·DOFFSET0(SB)/8, $0x4330000000000000
GLOBL ·DOFFSET0(SB), 8, $8
DATA ·DOFFSET1(SB)/8, $0x4530000000000000
GLOBL ·DOFFSET1(SB), 8, $8
DATA ·DOFFSET2(SB)/8, $0x4730000000000000
GLOBL ·DOFFSET2(SB), 8, $8
DATA ·DOFFSET3(SB)/8, $0x4930000000000000
GLOBL ·DOFFSET3(SB), 8, $8
DATA ·DOFFSET3MINUSTWO128(SB)/8, $0x492FFFFE00000000
GLOBL ·DOFFSET3MINUSTWO128(SB), 8, $8
DATA ·HOFFSET0(SB)/8, $0x43300001FFFFFFFB
GLOBL ·HOFFSET0(SB), 8, $8
DATA ·HOFFSET1(SB)/8, $0x45300001FFFFFFFE
GLOBL ·HOFFSET1(SB), 8, $8
DATA ·HOFFSET2(SB)/8, $0x47300001FFFFFFFE
GLOBL ·HOFFSET2(SB), 8, $8
DATA ·HOFFSET3(SB)/8, $0x49300003FFFFFFFE
GLOBL ·HOFFSET3(SB), 8, $8
DATA ·ROUNDING(SB)/2, $0x137f
GLOBL ·ROUNDING(SB), 8, $2

View file

@ -1,497 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 6a from the public
// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
// +build amd64,!gccgo,!appengine
// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
TEXT ·poly1305(SB),0,$224-32
MOVQ out+0(FP),DI
MOVQ m+8(FP),SI
MOVQ mlen+16(FP),DX
MOVQ key+24(FP),CX
MOVQ SP,R11
MOVQ $31,R9
NOTQ R9
ANDQ R9,SP
ADDQ $32,SP
MOVQ R11,32(SP)
MOVQ R12,40(SP)
MOVQ R13,48(SP)
MOVQ R14,56(SP)
MOVQ R15,64(SP)
MOVQ BX,72(SP)
MOVQ BP,80(SP)
FLDCW ·ROUNDING(SB)
MOVL 0(CX),R8
MOVL 4(CX),R9
MOVL 8(CX),AX
MOVL 12(CX),R10
MOVQ DI,88(SP)
MOVQ CX,96(SP)
MOVL $0X43300000,108(SP)
MOVL $0X45300000,116(SP)
MOVL $0X47300000,124(SP)
MOVL $0X49300000,132(SP)
ANDL $0X0FFFFFFF,R8
ANDL $0X0FFFFFFC,R9
ANDL $0X0FFFFFFC,AX
ANDL $0X0FFFFFFC,R10
MOVL R8,104(SP)
MOVL R9,112(SP)
MOVL AX,120(SP)
MOVL R10,128(SP)
FMOVD 104(SP), F0
FSUBD ·DOFFSET0(SB), F0
FMOVD 112(SP), F0
FSUBD ·DOFFSET1(SB), F0
FMOVD 120(SP), F0
FSUBD ·DOFFSET2(SB), F0
FMOVD 128(SP), F0
FSUBD ·DOFFSET3(SB), F0
FXCHD F0, F3
FMOVDP F0, 136(SP)
FXCHD F0, F1
FMOVD F0, 144(SP)
FMULD ·SCALE(SB), F0
FMOVDP F0, 152(SP)
FMOVD F0, 160(SP)
FMULD ·SCALE(SB), F0
FMOVDP F0, 168(SP)
FMOVD F0, 176(SP)
FMULD ·SCALE(SB), F0
FMOVDP F0, 184(SP)
FLDZ
FLDZ
FLDZ
FLDZ
CMPQ DX,$16
JB ADDATMOST15BYTES
INITIALATLEAST16BYTES:
MOVL 12(SI),DI
MOVL 8(SI),CX
MOVL 4(SI),R8
MOVL 0(SI),R9
MOVL DI,128(SP)
MOVL CX,120(SP)
MOVL R8,112(SP)
MOVL R9,104(SP)
ADDQ $16,SI
SUBQ $16,DX
FXCHD F0, F3
FADDD 128(SP), F0
FSUBD ·DOFFSET3MINUSTWO128(SB), F0
FXCHD F0, F1
FADDD 112(SP), F0
FSUBD ·DOFFSET1(SB), F0
FXCHD F0, F2
FADDD 120(SP), F0
FSUBD ·DOFFSET2(SB), F0
FXCHD F0, F3
FADDD 104(SP), F0
FSUBD ·DOFFSET0(SB), F0
CMPQ DX,$16
JB MULTIPLYADDATMOST15BYTES
MULTIPLYADDATLEAST16BYTES:
MOVL 12(SI),DI
MOVL 8(SI),CX
MOVL 4(SI),R8
MOVL 0(SI),R9
MOVL DI,128(SP)
MOVL CX,120(SP)
MOVL R8,112(SP)
MOVL R9,104(SP)
ADDQ $16,SI
SUBQ $16,DX
FMOVD ·ALPHA130(SB), F0
FADDD F2,F0
FSUBD ·ALPHA130(SB), F0
FSUBD F0,F2
FMULD ·SCALE(SB), F0
FMOVD ·ALPHA32(SB), F0
FADDD F2,F0
FSUBD ·ALPHA32(SB), F0
FSUBD F0,F2
FXCHD F0, F2
FADDDP F0,F1
FMOVD ·ALPHA64(SB), F0
FADDD F4,F0
FSUBD ·ALPHA64(SB), F0
FSUBD F0,F4
FMOVD ·ALPHA96(SB), F0
FADDD F6,F0
FSUBD ·ALPHA96(SB), F0
FSUBD F0,F6
FXCHD F0, F6
FADDDP F0,F1
FXCHD F0, F3
FADDDP F0,F5
FXCHD F0, F3
FADDDP F0,F1
FMOVD 176(SP), F0
FMULD F3,F0
FMOVD 160(SP), F0
FMULD F4,F0
FMOVD 144(SP), F0
FMULD F5,F0
FMOVD 136(SP), F0
FMULDP F0,F6
FMOVD 160(SP), F0
FMULD F4,F0
FADDDP F0,F3
FMOVD 144(SP), F0
FMULD F4,F0
FADDDP F0,F2
FMOVD 136(SP), F0
FMULD F4,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULDP F0,F4
FXCHD F0, F3
FADDDP F0,F5
FMOVD 144(SP), F0
FMULD F4,F0
FADDDP F0,F2
FMOVD 136(SP), F0
FMULD F4,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULD F4,F0
FADDDP F0,F3
FMOVD 168(SP), F0
FMULDP F0,F4
FXCHD F0, F3
FADDDP F0,F4
FMOVD 136(SP), F0
FMULD F5,F0
FADDDP F0,F1
FXCHD F0, F3
FMOVD 184(SP), F0
FMULD F5,F0
FADDDP F0,F3
FXCHD F0, F1
FMOVD 168(SP), F0
FMULD F5,F0
FADDDP F0,F1
FMOVD 152(SP), F0
FMULDP F0,F5
FXCHD F0, F4
FADDDP F0,F1
CMPQ DX,$16
FXCHD F0, F2
FMOVD 128(SP), F0
FSUBD ·DOFFSET3MINUSTWO128(SB), F0
FADDDP F0,F1
FXCHD F0, F1
FMOVD 120(SP), F0
FSUBD ·DOFFSET2(SB), F0
FADDDP F0,F1
FXCHD F0, F3
FMOVD 112(SP), F0
FSUBD ·DOFFSET1(SB), F0
FADDDP F0,F1
FXCHD F0, F2
FMOVD 104(SP), F0
FSUBD ·DOFFSET0(SB), F0
FADDDP F0,F1
JAE MULTIPLYADDATLEAST16BYTES
MULTIPLYADDATMOST15BYTES:
FMOVD ·ALPHA130(SB), F0
FADDD F2,F0
FSUBD ·ALPHA130(SB), F0
FSUBD F0,F2
FMULD ·SCALE(SB), F0
FMOVD ·ALPHA32(SB), F0
FADDD F2,F0
FSUBD ·ALPHA32(SB), F0
FSUBD F0,F2
FMOVD ·ALPHA64(SB), F0
FADDD F5,F0
FSUBD ·ALPHA64(SB), F0
FSUBD F0,F5
FMOVD ·ALPHA96(SB), F0
FADDD F7,F0
FSUBD ·ALPHA96(SB), F0
FSUBD F0,F7
FXCHD F0, F7
FADDDP F0,F1
FXCHD F0, F5
FADDDP F0,F1
FXCHD F0, F3
FADDDP F0,F5
FADDDP F0,F1
FMOVD 176(SP), F0
FMULD F1,F0
FMOVD 160(SP), F0
FMULD F2,F0
FMOVD 144(SP), F0
FMULD F3,F0
FMOVD 136(SP), F0
FMULDP F0,F4
FMOVD 160(SP), F0
FMULD F5,F0
FADDDP F0,F3
FMOVD 144(SP), F0
FMULD F5,F0
FADDDP F0,F2
FMOVD 136(SP), F0
FMULD F5,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULDP F0,F5
FXCHD F0, F4
FADDDP F0,F3
FMOVD 144(SP), F0
FMULD F5,F0
FADDDP F0,F2
FMOVD 136(SP), F0
FMULD F5,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULD F5,F0
FADDDP F0,F4
FMOVD 168(SP), F0
FMULDP F0,F5
FXCHD F0, F4
FADDDP F0,F2
FMOVD 136(SP), F0
FMULD F5,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULD F5,F0
FADDDP F0,F4
FMOVD 168(SP), F0
FMULD F5,F0
FADDDP F0,F3
FMOVD 152(SP), F0
FMULDP F0,F5
FXCHD F0, F4
FADDDP F0,F1
ADDATMOST15BYTES:
CMPQ DX,$0
JE NOMOREBYTES
MOVL $0,0(SP)
MOVL $0, 4 (SP)
MOVL $0, 8 (SP)
MOVL $0, 12 (SP)
LEAQ 0(SP),DI
MOVQ DX,CX
REP; MOVSB
MOVB $1,0(DI)
MOVL 12 (SP),DI
MOVL 8 (SP),SI
MOVL 4 (SP),DX
MOVL 0(SP),CX
MOVL DI,128(SP)
MOVL SI,120(SP)
MOVL DX,112(SP)
MOVL CX,104(SP)
FXCHD F0, F3
FADDD 128(SP), F0
FSUBD ·DOFFSET3(SB), F0
FXCHD F0, F2
FADDD 120(SP), F0
FSUBD ·DOFFSET2(SB), F0
FXCHD F0, F1
FADDD 112(SP), F0
FSUBD ·DOFFSET1(SB), F0
FXCHD F0, F3
FADDD 104(SP), F0
FSUBD ·DOFFSET0(SB), F0
FMOVD ·ALPHA130(SB), F0
FADDD F3,F0
FSUBD ·ALPHA130(SB), F0
FSUBD F0,F3
FMULD ·SCALE(SB), F0
FMOVD ·ALPHA32(SB), F0
FADDD F2,F0
FSUBD ·ALPHA32(SB), F0
FSUBD F0,F2
FMOVD ·ALPHA64(SB), F0
FADDD F6,F0
FSUBD ·ALPHA64(SB), F0
FSUBD F0,F6
FMOVD ·ALPHA96(SB), F0
FADDD F5,F0
FSUBD ·ALPHA96(SB), F0
FSUBD F0,F5
FXCHD F0, F4
FADDDP F0,F3
FXCHD F0, F6
FADDDP F0,F1
FXCHD F0, F3
FADDDP F0,F5
FXCHD F0, F3
FADDDP F0,F1
FMOVD 176(SP), F0
FMULD F3,F0
FMOVD 160(SP), F0
FMULD F4,F0
FMOVD 144(SP), F0
FMULD F5,F0
FMOVD 136(SP), F0
FMULDP F0,F6
FMOVD 160(SP), F0
FMULD F5,F0
FADDDP F0,F3
FMOVD 144(SP), F0
FMULD F5,F0
FADDDP F0,F2
FMOVD 136(SP), F0
FMULD F5,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULDP F0,F5
FXCHD F0, F4
FADDDP F0,F5
FMOVD 144(SP), F0
FMULD F6,F0
FADDDP F0,F2
FMOVD 136(SP), F0
FMULD F6,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULD F6,F0
FADDDP F0,F4
FMOVD 168(SP), F0
FMULDP F0,F6
FXCHD F0, F5
FADDDP F0,F4
FMOVD 136(SP), F0
FMULD F2,F0
FADDDP F0,F1
FMOVD 184(SP), F0
FMULD F2,F0
FADDDP F0,F5
FMOVD 168(SP), F0
FMULD F2,F0
FADDDP F0,F3
FMOVD 152(SP), F0
FMULDP F0,F2
FXCHD F0, F1
FADDDP F0,F3
FXCHD F0, F3
FXCHD F0, F2
NOMOREBYTES:
MOVL $0,R10
FMOVD ·ALPHA130(SB), F0
FADDD F4,F0
FSUBD ·ALPHA130(SB), F0
FSUBD F0,F4
FMULD ·SCALE(SB), F0
FMOVD ·ALPHA32(SB), F0
FADDD F2,F0
FSUBD ·ALPHA32(SB), F0
FSUBD F0,F2
FMOVD ·ALPHA64(SB), F0
FADDD F4,F0
FSUBD ·ALPHA64(SB), F0
FSUBD F0,F4
FMOVD ·ALPHA96(SB), F0
FADDD F6,F0
FSUBD ·ALPHA96(SB), F0
FXCHD F0, F6
FSUBD F6,F0
FXCHD F0, F4
FADDDP F0,F3
FXCHD F0, F4
FADDDP F0,F1
FXCHD F0, F2
FADDDP F0,F3
FXCHD F0, F4
FADDDP F0,F3
FXCHD F0, F3
FADDD ·HOFFSET0(SB), F0
FXCHD F0, F3
FADDD ·HOFFSET1(SB), F0
FXCHD F0, F1
FADDD ·HOFFSET2(SB), F0
FXCHD F0, F2
FADDD ·HOFFSET3(SB), F0
FXCHD F0, F3
FMOVDP F0, 104(SP)
FMOVDP F0, 112(SP)
FMOVDP F0, 120(SP)
FMOVDP F0, 128(SP)
MOVL 108(SP),DI
ANDL $63,DI
MOVL 116(SP),SI
ANDL $63,SI
MOVL 124(SP),DX
ANDL $63,DX
MOVL 132(SP),CX
ANDL $63,CX
MOVL 112(SP),R8
ADDL DI,R8
MOVQ R8,112(SP)
MOVL 120(SP),DI
ADCL SI,DI
MOVQ DI,120(SP)
MOVL 128(SP),DI
ADCL DX,DI
MOVQ DI,128(SP)
MOVL R10,DI
ADCL CX,DI
MOVQ DI,136(SP)
MOVQ $5,DI
MOVL 104(SP),SI
ADDL SI,DI
MOVQ DI,104(SP)
MOVL R10,DI
MOVQ 112(SP),DX
ADCL DX,DI
MOVQ DI,112(SP)
MOVL R10,DI
MOVQ 120(SP),CX
ADCL CX,DI
MOVQ DI,120(SP)
MOVL R10,DI
MOVQ 128(SP),R8
ADCL R8,DI
MOVQ DI,128(SP)
MOVQ $0XFFFFFFFC,DI
MOVQ 136(SP),R9
ADCL R9,DI
SARL $16,DI
MOVQ DI,R9
XORL $0XFFFFFFFF,R9
ANDQ DI,SI
MOVQ 104(SP),AX
ANDQ R9,AX
ORQ AX,SI
ANDQ DI,DX
MOVQ 112(SP),AX
ANDQ R9,AX
ORQ AX,DX
ANDQ DI,CX
MOVQ 120(SP),AX
ANDQ R9,AX
ORQ AX,CX
ANDQ DI,R8
MOVQ 128(SP),DI
ANDQ R9,DI
ORQ DI,R8
MOVQ 88(SP),DI
MOVQ 96(SP),R9
ADDL 16(R9),SI
ADCL 20(R9),DX
ADCL 24(R9),CX
ADCL 28(R9),R8
MOVL SI,0(DI)
MOVL DX,4(DI)
MOVL CX,8(DI)
MOVL R8,12(DI)
MOVQ 32(SP),R11
MOVQ 40(SP),R12
MOVQ 48(SP),R13
MOVQ 56(SP),R14
MOVQ 64(SP),R15
MOVQ 72(SP),BX
MOVQ 80(SP),BP
MOVQ R11,SP
RET

View file

@ -1,379 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This code was translated into a form compatible with 5a from the public
// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
// +build arm,!gccgo,!appengine
DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
GLOBL poly1305_init_constants_armv6<>(SB), 8, $20
// Warning: the linker may use R11 to synthesize certain instructions. Please
// take care and verify that no synthetic instructions use it.
TEXT poly1305_init_ext_armv6<>(SB),4,$-4
MOVM.DB.W [R4-R11], (R13)
MOVM.IA.W (R1), [R2-R5]
MOVW $poly1305_init_constants_armv6<>(SB), R7
MOVW R2, R8
MOVW R2>>26, R9
MOVW R3>>20, g
MOVW R4>>14, R11
MOVW R5>>8, R12
ORR R3<<6, R9, R9
ORR R4<<12, g, g
ORR R5<<18, R11, R11
MOVM.IA (R7), [R2-R6]
AND R8, R2, R2
AND R9, R3, R3
AND g, R4, R4
AND R11, R5, R5
AND R12, R6, R6
MOVM.IA.W [R2-R6], (R0)
EOR R2, R2, R2
EOR R3, R3, R3
EOR R4, R4, R4
EOR R5, R5, R5
EOR R6, R6, R6
MOVM.IA.W [R2-R6], (R0)
MOVM.IA.W (R1), [R2-R5]
MOVM.IA [R2-R6], (R0)
MOVM.IA.W (R13), [R4-R11]
RET
#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
MOVBU (offset+0)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+0)(Rdst); \
MOVBU (offset+1)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+1)(Rdst); \
MOVBU (offset+2)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+2)(Rdst); \
MOVBU (offset+3)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+3)(Rdst)
TEXT poly1305_blocks_armv6<>(SB),4,$-4
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
SUB $128, R13
MOVW R0, 36(R13)
MOVW R1, 40(R13)
MOVW R2, 44(R13)
MOVW R1, R14
MOVW R2, R12
MOVW 56(R0), R8
WORD $0xe1180008 // TST R8, R8 not working see issue 5921
EOR R6, R6, R6
MOVW.EQ $(1<<24), R6
MOVW R6, 32(R13)
ADD $64, R13, g
MOVM.IA (R0), [R0-R9]
MOVM.IA [R0-R4], (g)
CMP $16, R12
BLO poly1305_blocks_armv6_done
poly1305_blocks_armv6_mainloop:
WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
BEQ poly1305_blocks_armv6_mainloop_aligned
ADD $48, R13, g
MOVW_UNALIGNED(R14, g, R0, 0)
MOVW_UNALIGNED(R14, g, R0, 4)
MOVW_UNALIGNED(R14, g, R0, 8)
MOVW_UNALIGNED(R14, g, R0, 12)
MOVM.IA (g), [R0-R3]
ADD $16, R14
B poly1305_blocks_armv6_mainloop_loaded
poly1305_blocks_armv6_mainloop_aligned:
MOVM.IA.W (R14), [R0-R3]
poly1305_blocks_armv6_mainloop_loaded:
MOVW R0>>26, g
MOVW R1>>20, R11
MOVW R2>>14, R12
MOVW R14, 40(R13)
MOVW R3>>8, R4
ORR R1<<6, g, g
ORR R2<<12, R11, R11
ORR R3<<18, R12, R12
BIC $0xfc000000, R0, R0
BIC $0xfc000000, g, g
MOVW 32(R13), R3
BIC $0xfc000000, R11, R11
BIC $0xfc000000, R12, R12
ADD R0, R5, R5
ADD g, R6, R6
ORR R3, R4, R4
ADD R11, R7, R7
ADD $64, R13, R14
ADD R12, R8, R8
ADD R4, R9, R9
MOVM.IA (R14), [R0-R4]
MULLU R4, R5, (R11, g)
MULLU R3, R5, (R14, R12)
MULALU R3, R6, (R11, g)
MULALU R2, R6, (R14, R12)
MULALU R2, R7, (R11, g)
MULALU R1, R7, (R14, R12)
ADD R4<<2, R4, R4
ADD R3<<2, R3, R3
MULALU R1, R8, (R11, g)
MULALU R0, R8, (R14, R12)
MULALU R0, R9, (R11, g)
MULALU R4, R9, (R14, R12)
MOVW g, 24(R13)
MOVW R11, 28(R13)
MOVW R12, 16(R13)
MOVW R14, 20(R13)
MULLU R2, R5, (R11, g)
MULLU R1, R5, (R14, R12)
MULALU R1, R6, (R11, g)
MULALU R0, R6, (R14, R12)
MULALU R0, R7, (R11, g)
MULALU R4, R7, (R14, R12)
ADD R2<<2, R2, R2
ADD R1<<2, R1, R1
MULALU R4, R8, (R11, g)
MULALU R3, R8, (R14, R12)
MULALU R3, R9, (R11, g)
MULALU R2, R9, (R14, R12)
MOVW g, 8(R13)
MOVW R11, 12(R13)
MOVW R12, 0(R13)
MOVW R14, w+4(SP)
MULLU R0, R5, (R11, g)
MULALU R4, R6, (R11, g)
MULALU R3, R7, (R11, g)
MULALU R2, R8, (R11, g)
MULALU R1, R9, (R11, g)
MOVM.IA (R13), [R0-R7]
MOVW g>>26, R12
MOVW R4>>26, R14
ORR R11<<6, R12, R12
ORR R5<<6, R14, R14
BIC $0xfc000000, g, g
BIC $0xfc000000, R4, R4
ADD.S R12, R0, R0
ADC $0, R1, R1
ADD.S R14, R6, R6
ADC $0, R7, R7
MOVW R0>>26, R12
MOVW R6>>26, R14
ORR R1<<6, R12, R12
ORR R7<<6, R14, R14
BIC $0xfc000000, R0, R0
BIC $0xfc000000, R6, R6
ADD R14<<2, R14, R14
ADD.S R12, R2, R2
ADC $0, R3, R3
ADD R14, g, g
MOVW R2>>26, R12
MOVW g>>26, R14
ORR R3<<6, R12, R12
BIC $0xfc000000, g, R5
BIC $0xfc000000, R2, R7
ADD R12, R4, R4
ADD R14, R0, R0
MOVW R4>>26, R12
BIC $0xfc000000, R4, R8
ADD R12, R6, R9
MOVW w+44(SP), R12
MOVW w+40(SP), R14
MOVW R0, R6
CMP $32, R12
SUB $16, R12, R12
MOVW R12, 44(R13)
BHS poly1305_blocks_armv6_mainloop
poly1305_blocks_armv6_done:
MOVW 36(R13), R12
MOVW R5, 20(R12)
MOVW R6, 24(R12)
MOVW R7, 28(R12)
MOVW R8, 32(R12)
MOVW R9, 36(R12)
ADD $128, R13, R13
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
RET
#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst); \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst)
#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
SUB $16, R13, R13
MOVW R0, R5
MOVW R1, R6
MOVW R2, R7
MOVW R3, R8
AND.S R2, R2, R2
BEQ poly1305_finish_ext_armv6_noremaining
EOR R0, R0
MOVW R13, R9
MOVW R0, 0(R13)
MOVW R0, 4(R13)
MOVW R0, 8(R13)
MOVW R0, 12(R13)
WORD $0xe3110003 // TST R1, #3 not working see issue 5921
BEQ poly1305_finish_ext_armv6_aligned
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8
MOVWP_UNALIGNED(R1, R9, g)
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip8:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip4:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHUP_UNALIGNED(R1, R9, g)
B poly1305_finish_ext_armv6_skip2
poly1305_finish_ext_armv6_aligned:
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8_aligned
MOVM.IA.W (R1), [g-R11]
MOVM.IA.W [g-R11], (R9)
poly1305_finish_ext_armv6_skip8_aligned:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4_aligned
MOVW.P 4(R1), g
MOVW.P g, 4(R9)
poly1305_finish_ext_armv6_skip4_aligned:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHU.P 2(R1), g
MOVH.P g, 2(R9)
poly1305_finish_ext_armv6_skip2:
WORD $0xe3120001 // TST $1, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip1
MOVBU.P 1(R1), g
MOVBU.P g, 1(R9)
poly1305_finish_ext_armv6_skip1:
MOVW $1, R11
MOVBU R11, 0(R9)
MOVW R11, 56(R5)
MOVW R5, R0
MOVW R13, R1
MOVW $16, R2
BL poly1305_blocks_armv6<>(SB)
poly1305_finish_ext_armv6_noremaining:
MOVW 20(R5), R0
MOVW 24(R5), R1
MOVW 28(R5), R2
MOVW 32(R5), R3
MOVW 36(R5), R4
MOVW R4>>26, R12
BIC $0xfc000000, R4, R4
ADD R12<<2, R12, R12
ADD R12, R0, R0
MOVW R0>>26, R12
BIC $0xfc000000, R0, R0
ADD R12, R1, R1
MOVW R1>>26, R12
BIC $0xfc000000, R1, R1
ADD R12, R2, R2
MOVW R2>>26, R12
BIC $0xfc000000, R2, R2
ADD R12, R3, R3
MOVW R3>>26, R12
BIC $0xfc000000, R3, R3
ADD R12, R4, R4
ADD $5, R0, R6
MOVW R6>>26, R12
BIC $0xfc000000, R6, R6
ADD R12, R1, R7
MOVW R7>>26, R12
BIC $0xfc000000, R7, R7
ADD R12, R2, g
MOVW g>>26, R12
BIC $0xfc000000, g, g
ADD R12, R3, R11
MOVW $-(1<<26), R12
ADD R11>>26, R12, R12
BIC $0xfc000000, R11, R11
ADD R12, R4, R14
MOVW R14>>31, R12
SUB $1, R12
AND R12, R6, R6
AND R12, R7, R7
AND R12, g, g
AND R12, R11, R11
AND R12, R14, R14
MVN R12, R12
AND R12, R0, R0
AND R12, R1, R1
AND R12, R2, R2
AND R12, R3, R3
AND R12, R4, R4
ORR R6, R0, R0
ORR R7, R1, R1
ORR g, R2, R2
ORR R11, R3, R3
ORR R14, R4, R4
ORR R1<<26, R0, R0
MOVW R1>>6, R1
ORR R2<<20, R1, R1
MOVW R2>>12, R2
ORR R3<<14, R2, R2
MOVW R3>>18, R3
ORR R4<<8, R3, R3
MOVW 40(R5), R6
MOVW 44(R5), R7
MOVW 48(R5), g
MOVW 52(R5), R11
ADD.S R6, R0, R0
ADC.S R7, R1, R1
ADC.S g, R2, R2
ADC.S R11, R3, R3
MOVM.IA [R0-R3], (R8)
MOVW R5, R12
EOR R0, R0, R0
EOR R1, R1, R1
EOR R2, R2, R2
EOR R3, R3, R3
EOR R4, R4, R4
EOR R5, R5, R5
EOR R6, R6, R6
EOR R7, R7, R7
MOVM.IA.W [R0-R7], (R12)
MOVM.IA [R0-R7], (R12)
ADD $16, R13, R13
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
RET
// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
TEXT ·poly1305_auth_armv6(SB),0,$280-16
MOVW out+0(FP), R4
MOVW m+4(FP), R5
MOVW mlen+8(FP), R6
MOVW key+12(FP), R7
MOVW R13, R8
BIC $63, R13
SUB $64, R13, R13
MOVW R13, R0
MOVW R7, R1
BL poly1305_init_ext_armv6<>(SB)
BIC.S $15, R6, R2
BEQ poly1305_auth_armv6_noblocks
MOVW R13, R0
MOVW R5, R1
ADD R2, R5, R5
SUB R2, R6, R6
BL poly1305_blocks_armv6<>(SB)
poly1305_auth_armv6_noblocks:
MOVW R13, R0
MOVW R5, R1
MOVW R6, R2
MOVW R4, R3
BL poly1305_finish_ext_armv6<>(SB)
MOVW R8, R13
RET

View file

@ -33,6 +33,12 @@ var testData = []struct {
make([]byte, 32),
make([]byte, 16),
},
{
// This test triggers an edge-case. See https://go-review.googlesource.com/#/c/30101/.
[]byte{0x81, 0xd8, 0xb2, 0xe4, 0x6a, 0x25, 0x21, 0x3b, 0x58, 0xfe, 0xe4, 0x21, 0x3a, 0x2a, 0x28, 0xe9, 0x21, 0xc1, 0x2a, 0x96, 0x32, 0x51, 0x6d, 0x3b, 0x73, 0x27, 0x27, 0x27, 0xbe, 0xcf, 0x21, 0x29},
[]byte{0x3b, 0x3a, 0x29, 0xe9, 0x3b, 0x21, 0x3a, 0x5c, 0x5c, 0x3b, 0x3b, 0x05, 0x3a, 0x3a, 0x8c, 0x0d},
[]byte{0x6d, 0xc1, 0x8b, 0x8c, 0x34, 0x4c, 0xd7, 0x99, 0x27, 0x11, 0x8b, 0xbe, 0x84, 0xb7, 0xf3, 0x14},
},
}
func testSum(t *testing.T, unaligned bool) {

View file

@ -2,14 +2,12 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine
// +build amd64,!gccgo,!appengine,go1.7
package poly1305
// This function is implemented in poly1305_amd64.s
// This function is implemented in sum_amd64.s
//go:noescape
func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
// Sum generates an authenticator for m using a one-time key and puts the

View file

@ -0,0 +1,125 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64,!gccgo,!appengine,go1.7
#include "textflag.h"
#define POLY1305_ADD(msg, h0, h1, h2) \
ADDQ 0(msg), h0; \
ADCQ 8(msg), h1; \
ADCQ $1, h2; \
LEAQ 16(msg), msg
#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
MOVQ r0, AX; \
MULQ h0; \
MOVQ AX, t0; \
MOVQ DX, t1; \
MOVQ r0, AX; \
MULQ h1; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ r0, t2; \
IMULQ h2, t2; \
ADDQ DX, t2; \
\
MOVQ r1, AX; \
MULQ h0; \
ADDQ AX, t1; \
ADCQ $0, DX; \
MOVQ DX, h0; \
MOVQ r1, t3; \
IMULQ h2, t3; \
MOVQ r1, AX; \
MULQ h1; \
ADDQ AX, t2; \
ADCQ DX, t3; \
ADDQ h0, t2; \
ADCQ $0, t3; \
\
MOVQ t0, h0; \
MOVQ t1, h1; \
MOVQ t2, h2; \
ANDQ $3, h2; \
MOVQ t2, t0; \
ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
ADDQ t0, h0; \
ADCQ t3, h1; \
ADCQ $0, h2; \
SHRQ $2, t3, t2; \
SHRQ $2, t3; \
ADDQ t2, h0; \
ADCQ t3, h1; \
ADCQ $0, h2
DATA poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
DATA poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
GLOBL poly1305Mask<>(SB), RODATA, $16
// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
TEXT ·poly1305(SB), $0-32
MOVQ out+0(FP), DI
MOVQ m+8(FP), SI
MOVQ mlen+16(FP), R15
MOVQ key+24(FP), AX
MOVQ 0(AX), R11
MOVQ 8(AX), R12
ANDQ poly1305Mask<>(SB), R11 // r0
ANDQ poly1305Mask<>+8(SB), R12 // r1
XORQ R8, R8 // h0
XORQ R9, R9 // h1
XORQ R10, R10 // h2
CMPQ R15, $16
JB bytes_between_0_and_15
loop:
POLY1305_ADD(SI, R8, R9, R10)
multiply:
POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
SUBQ $16, R15
CMPQ R15, $16
JAE loop
bytes_between_0_and_15:
TESTQ R15, R15
JZ done
MOVQ $1, BX
XORQ CX, CX
XORQ R13, R13
ADDQ R15, SI
flush_buffer:
SHLQ $8, BX, CX
SHLQ $8, BX
MOVB -1(SI), R13
XORQ R13, BX
DECQ SI
DECQ R15
JNZ flush_buffer
ADDQ BX, R8
ADCQ CX, R9
ADCQ $0, R10
MOVQ $16, R15
JMP multiply
done:
MOVQ R8, AX
MOVQ R9, BX
SUBQ $0xFFFFFFFFFFFFFFFB, AX
SBBQ $0xFFFFFFFFFFFFFFFF, BX
SBBQ $3, R10
CMOVQCS R8, AX
CMOVQCS R9, BX
MOVQ key+24(FP), R8
ADDQ 16(R8), AX
ADCQ 24(R8), BX
MOVQ AX, 0(DI)
MOVQ BX, 8(DI)
RET

View file

@ -2,14 +2,12 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build arm,!gccgo,!appengine
// +build arm,!gccgo,!appengine,!nacl
package poly1305
// This function is implemented in poly1305_arm.s
// This function is implemented in sum_arm.s
//go:noescape
func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
// Sum generates an authenticator for m using a one-time key and puts the

View file

@ -0,0 +1,394 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// This code was translated into a form compatible with 5a from the public
// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
// +build arm,!gccgo,!appengine,!nacl
DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
GLOBL poly1305_init_constants_armv6<>(SB), 8, $20
// Warning: the linker may use R11 to synthesize certain instructions. Please
// take care and verify that no synthetic instructions use it.
TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT|NOFRAME, $0
MOVM.DB.W [R4-R11], (R13)
MOVM.IA.W (R1), [R2-R5]
MOVW $poly1305_init_constants_armv6<>(SB), R7
MOVW R2, R8
MOVW R2>>26, R9
MOVW R3>>20, g
MOVW R4>>14, R11
MOVW R5>>8, R12
ORR R3<<6, R9, R9
ORR R4<<12, g, g
ORR R5<<18, R11, R11
MOVM.IA (R7), [R2-R6]
AND R8, R2, R2
AND R9, R3, R3
AND g, R4, R4
AND R11, R5, R5
AND R12, R6, R6
MOVM.IA.W [R2-R6], (R0)
EOR R2, R2, R2
EOR R3, R3, R3
EOR R4, R4, R4
EOR R5, R5, R5
EOR R6, R6, R6
MOVM.IA.W [R2-R6], (R0)
MOVM.IA.W (R1), [R2-R5]
MOVM.IA [R2-R6], (R0)
MOVM.IA.W (R13), [R4-R11]
RET
#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
MOVBU (offset+0)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+0)(Rdst); \
MOVBU (offset+1)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+1)(Rdst); \
MOVBU (offset+2)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+2)(Rdst); \
MOVBU (offset+3)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+3)(Rdst)
TEXT poly1305_blocks_armv6<>(SB), NOSPLIT|NOFRAME, $0
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
SUB $128, R13
MOVW R0, 36(R13)
MOVW R1, 40(R13)
MOVW R2, 44(R13)
MOVW R1, R14
MOVW R2, R12
MOVW 56(R0), R8
WORD $0xe1180008 // TST R8, R8 not working see issue 5921
EOR R6, R6, R6
MOVW.EQ $(1<<24), R6
MOVW R6, 32(R13)
ADD $64, R13, g
MOVM.IA (R0), [R0-R9]
MOVM.IA [R0-R4], (g)
CMP $16, R12
BLO poly1305_blocks_armv6_done
poly1305_blocks_armv6_mainloop:
WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
BEQ poly1305_blocks_armv6_mainloop_aligned
ADD $48, R13, g
MOVW_UNALIGNED(R14, g, R0, 0)
MOVW_UNALIGNED(R14, g, R0, 4)
MOVW_UNALIGNED(R14, g, R0, 8)
MOVW_UNALIGNED(R14, g, R0, 12)
MOVM.IA (g), [R0-R3]
ADD $16, R14
B poly1305_blocks_armv6_mainloop_loaded
poly1305_blocks_armv6_mainloop_aligned:
MOVM.IA.W (R14), [R0-R3]
poly1305_blocks_armv6_mainloop_loaded:
MOVW R0>>26, g
MOVW R1>>20, R11
MOVW R2>>14, R12
MOVW R14, 40(R13)
MOVW R3>>8, R4
ORR R1<<6, g, g
ORR R2<<12, R11, R11
ORR R3<<18, R12, R12
BIC $0xfc000000, R0, R0
BIC $0xfc000000, g, g
MOVW 32(R13), R3
BIC $0xfc000000, R11, R11
BIC $0xfc000000, R12, R12
ADD R0, R5, R5
ADD g, R6, R6
ORR R3, R4, R4
ADD R11, R7, R7
ADD $64, R13, R14
ADD R12, R8, R8
ADD R4, R9, R9
MOVM.IA (R14), [R0-R4]
MULLU R4, R5, (R11, g)
MULLU R3, R5, (R14, R12)
MULALU R3, R6, (R11, g)
MULALU R2, R6, (R14, R12)
MULALU R2, R7, (R11, g)
MULALU R1, R7, (R14, R12)
ADD R4<<2, R4, R4
ADD R3<<2, R3, R3
MULALU R1, R8, (R11, g)
MULALU R0, R8, (R14, R12)
MULALU R0, R9, (R11, g)
MULALU R4, R9, (R14, R12)
MOVW g, 24(R13)
MOVW R11, 28(R13)
MOVW R12, 16(R13)
MOVW R14, 20(R13)
MULLU R2, R5, (R11, g)
MULLU R1, R5, (R14, R12)
MULALU R1, R6, (R11, g)
MULALU R0, R6, (R14, R12)
MULALU R0, R7, (R11, g)
MULALU R4, R7, (R14, R12)
ADD R2<<2, R2, R2
ADD R1<<2, R1, R1
MULALU R4, R8, (R11, g)
MULALU R3, R8, (R14, R12)
MULALU R3, R9, (R11, g)
MULALU R2, R9, (R14, R12)
MOVW g, 8(R13)
MOVW R11, 12(R13)
MOVW R12, 0(R13)
MOVW R14, w+4(SP)
MULLU R0, R5, (R11, g)
MULALU R4, R6, (R11, g)
MULALU R3, R7, (R11, g)
MULALU R2, R8, (R11, g)
MULALU R1, R9, (R11, g)
MOVM.IA (R13), [R0-R7]
MOVW g>>26, R12
MOVW R4>>26, R14
ORR R11<<6, R12, R12
ORR R5<<6, R14, R14
BIC $0xfc000000, g, g
BIC $0xfc000000, R4, R4
ADD.S R12, R0, R0
ADC $0, R1, R1
ADD.S R14, R6, R6
ADC $0, R7, R7
MOVW R0>>26, R12
MOVW R6>>26, R14
ORR R1<<6, R12, R12
ORR R7<<6, R14, R14
BIC $0xfc000000, R0, R0
BIC $0xfc000000, R6, R6
ADD R14<<2, R14, R14
ADD.S R12, R2, R2
ADC $0, R3, R3
ADD R14, g, g
MOVW R2>>26, R12
MOVW g>>26, R14
ORR R3<<6, R12, R12
BIC $0xfc000000, g, R5
BIC $0xfc000000, R2, R7
ADD R12, R4, R4
ADD R14, R0, R0
MOVW R4>>26, R12
BIC $0xfc000000, R4, R8
ADD R12, R6, R9
MOVW w+44(SP), R12
MOVW w+40(SP), R14
MOVW R0, R6
CMP $32, R12
SUB $16, R12, R12
MOVW R12, 44(R13)
BHS poly1305_blocks_armv6_mainloop
poly1305_blocks_armv6_done:
MOVW 36(R13), R12
MOVW R5, 20(R12)
MOVW R6, 24(R12)
MOVW R7, 28(R12)
MOVW R8, 32(R12)
MOVW R9, 36(R12)
ADD $128, R13, R13
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
RET
#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst); \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst)
#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
TEXT poly1305_finish_ext_armv6<>(SB), NOSPLIT | NOFRAME, $0
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
SUB $16, R13, R13
MOVW R0, R5
MOVW R1, R6
MOVW R2, R7
MOVW R3, R8
AND.S R2, R2, R2
BEQ poly1305_finish_ext_armv6_noremaining
EOR R0, R0
MOVW R13, R9
MOVW R0, 0(R13)
MOVW R0, 4(R13)
MOVW R0, 8(R13)
MOVW R0, 12(R13)
WORD $0xe3110003 // TST R1, #3 not working see issue 5921
BEQ poly1305_finish_ext_armv6_aligned
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8
MOVWP_UNALIGNED(R1, R9, g)
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip8:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip4:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHUP_UNALIGNED(R1, R9, g)
B poly1305_finish_ext_armv6_skip2
poly1305_finish_ext_armv6_aligned:
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8_aligned
MOVM.IA.W (R1), [g-R11]
MOVM.IA.W [g-R11], (R9)
poly1305_finish_ext_armv6_skip8_aligned:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4_aligned
MOVW.P 4(R1), g
MOVW.P g, 4(R9)
poly1305_finish_ext_armv6_skip4_aligned:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHU.P 2(R1), g
MOVH.P g, 2(R9)
poly1305_finish_ext_armv6_skip2:
WORD $0xe3120001 // TST $1, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip1
MOVBU.P 1(R1), g
MOVBU.P g, 1(R9)
poly1305_finish_ext_armv6_skip1:
MOVW $1, R11
MOVBU R11, 0(R9)
MOVW R11, 56(R5)
MOVW R5, R0
MOVW R13, R1
MOVW $16, R2
BL poly1305_blocks_armv6<>(SB)
poly1305_finish_ext_armv6_noremaining:
MOVW 20(R5), R0
MOVW 24(R5), R1
MOVW 28(R5), R2
MOVW 32(R5), R3
MOVW 36(R5), R4
MOVW R4>>26, R12
BIC $0xfc000000, R4, R4
ADD R12<<2, R12, R12
ADD R12, R0, R0
MOVW R0>>26, R12
BIC $0xfc000000, R0, R0
ADD R12, R1, R1
MOVW R1>>26, R12
BIC $0xfc000000, R1, R1
ADD R12, R2, R2
MOVW R2>>26, R12
BIC $0xfc000000, R2, R2
ADD R12, R3, R3
MOVW R3>>26, R12
BIC $0xfc000000, R3, R3
ADD R12, R4, R4
ADD $5, R0, R6
MOVW R6>>26, R12
BIC $0xfc000000, R6, R6
ADD R12, R1, R7
MOVW R7>>26, R12
BIC $0xfc000000, R7, R7
ADD R12, R2, g
MOVW g>>26, R12
BIC $0xfc000000, g, g
ADD R12, R3, R11
MOVW $-(1<<26), R12
ADD R11>>26, R12, R12
BIC $0xfc000000, R11, R11
ADD R12, R4, R14
MOVW R14>>31, R12
SUB $1, R12
AND R12, R6, R6
AND R12, R7, R7
AND R12, g, g
AND R12, R11, R11
AND R12, R14, R14
MVN R12, R12
AND R12, R0, R0
AND R12, R1, R1
AND R12, R2, R2
AND R12, R3, R3
AND R12, R4, R4
ORR R6, R0, R0
ORR R7, R1, R1
ORR g, R2, R2
ORR R11, R3, R3
ORR R14, R4, R4
ORR R1<<26, R0, R0
MOVW R1>>6, R1
ORR R2<<20, R1, R1
MOVW R2>>12, R2
ORR R3<<14, R2, R2
MOVW R3>>18, R3
ORR R4<<8, R3, R3
MOVW 40(R5), R6
MOVW 44(R5), R7
MOVW 48(R5), g
MOVW 52(R5), R11
ADD.S R6, R0, R0
ADC.S R7, R1, R1
ADC.S g, R2, R2
ADC.S R11, R3, R3
MOVM.IA [R0-R3], (R8)
MOVW R5, R12
EOR R0, R0, R0
EOR R1, R1, R1
EOR R2, R2, R2
EOR R3, R3, R3
EOR R4, R4, R4
EOR R5, R5, R5
EOR R6, R6, R6
EOR R7, R7, R7
MOVM.IA.W [R0-R7], (R12)
MOVM.IA [R0-R7], (R12)
ADD $16, R13, R13
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
RET
// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
TEXT ·poly1305_auth_armv6(SB), $280-16
MOVW out+0(FP), R4
MOVW m+4(FP), R5
MOVW mlen+8(FP), R6
MOVW key+12(FP), R7
MOVW R13, R8
BIC $63, R13
SUB $64, R13, R13
MOVW R13, R0
MOVW R7, R1
BL poly1305_init_ext_armv6<>(SB)
BIC.S $15, R6, R2
BEQ poly1305_auth_armv6_noblocks
MOVW R13, R0
MOVW R5, R1
ADD R2, R5, R5
SUB R2, R6, R6
BL poly1305_blocks_armv6<>(SB)
poly1305_auth_armv6_noblocks:
MOVW R13, R0
MOVW R5, R1
MOVW R6, R2
MOVW R4, R3
BL poly1305_finish_ext_armv6<>(SB)
MOVW R8, R13
RET

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!arm gccgo appengine
// +build !amd64,!arm gccgo appengine !go1.7
package poly1305