vendor: add uncommitted files from previous change

2020-01-11 17:56:14 +00:00 · 2020-01-11 17:56:14 +00:00 · e0d41da3e3
commit e0d41da3e3
parent 92662baceb
11 changed files with 1518 additions and 0 deletions
--- a/vendor/golang.org/x/crypto/chacha20/chacha_arm64.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_arm64.go
@ -0,0 +1,17 @@
 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build go1.11
 // +build !gccgo,!appengine
 package chacha20
 const bufSize = 256
 //go:noescape
 func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
 func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
 	xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
 }
--- a/vendor/golang.org/x/crypto/chacha20/chacha_arm64.s
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_arm64.s
@ -0,0 +1,308 @@
 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build go1.11
 // +build !gccgo,!appengine
 #include "textflag.h"
 #define NUM_ROUNDS 10
 // func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
 TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
 	MOVD	dst+0(FP), R1
 	MOVD	src+24(FP), R2
 	MOVD	src_len+32(FP), R3
 	MOVD	key+48(FP), R4
 	MOVD	nonce+56(FP), R6
 	MOVD	counter+64(FP), R7
 	MOVD	$·constants(SB), R10
 	MOVD	$·incRotMatrix(SB), R11
 	MOVW	(R7), R20
 	AND	$~255, R3, R13
 	ADD	R2, R13, R12 // R12 for block end
 	AND	$255, R3, R13
 loop:
 	MOVD	$NUM_ROUNDS, R21
 	VLD1	(R11), [V30.S4, V31.S4]
 	// load contants
 	// VLD4R (R10), [V0.S4, V1.S4, V2.S4, V3.S4]
 	WORD	$0x4D60E940
 	// load keys
 	// VLD4R 16(R4), [V4.S4, V5.S4, V6.S4, V7.S4]
 	WORD	$0x4DFFE884
 	// VLD4R 16(R4), [V8.S4, V9.S4, V10.S4, V11.S4]
 	WORD	$0x4DFFE888
 	SUB	$32, R4
 	// load counter + nonce
 	// VLD1R (R7), [V12.S4]
 	WORD	$0x4D40C8EC
 	// VLD3R (R6), [V13.S4, V14.S4, V15.S4]
 	WORD	$0x4D40E8CD
 	// update counter
 	VADD	V30.S4, V12.S4, V12.S4
 chacha:
 	// V0..V3 += V4..V7
 	// V12..V15 <<<= ((V12..V15 XOR V0..V3), 16)
 	VADD	V0.S4, V4.S4, V0.S4
 	VADD	V1.S4, V5.S4, V1.S4
 	VADD	V2.S4, V6.S4, V2.S4
 	VADD	V3.S4, V7.S4, V3.S4
 	VEOR	V12.B16, V0.B16, V12.B16
 	VEOR	V13.B16, V1.B16, V13.B16
 	VEOR	V14.B16, V2.B16, V14.B16
 	VEOR	V15.B16, V3.B16, V15.B16
 	VREV32	V12.H8, V12.H8
 	VREV32	V13.H8, V13.H8
 	VREV32	V14.H8, V14.H8
 	VREV32	V15.H8, V15.H8
 	// V8..V11 += V12..V15
 	// V4..V7 <<<= ((V4..V7 XOR V8..V11), 12)
 	VADD	V8.S4, V12.S4, V8.S4
 	VADD	V9.S4, V13.S4, V9.S4
 	VADD	V10.S4, V14.S4, V10.S4
 	VADD	V11.S4, V15.S4, V11.S4
 	VEOR	V8.B16, V4.B16, V16.B16
 	VEOR	V9.B16, V5.B16, V17.B16
 	VEOR	V10.B16, V6.B16, V18.B16
 	VEOR	V11.B16, V7.B16, V19.B16
 	VSHL	$12, V16.S4, V4.S4
 	VSHL	$12, V17.S4, V5.S4
 	VSHL	$12, V18.S4, V6.S4
 	VSHL	$12, V19.S4, V7.S4
 	VSRI	$20, V16.S4, V4.S4
 	VSRI	$20, V17.S4, V5.S4
 	VSRI	$20, V18.S4, V6.S4
 	VSRI	$20, V19.S4, V7.S4
 	// V0..V3 += V4..V7
 	// V12..V15 <<<= ((V12..V15 XOR V0..V3), 8)
 	VADD	V0.S4, V4.S4, V0.S4
 	VADD	V1.S4, V5.S4, V1.S4
 	VADD	V2.S4, V6.S4, V2.S4
 	VADD	V3.S4, V7.S4, V3.S4
 	VEOR	V12.B16, V0.B16, V12.B16
 	VEOR	V13.B16, V1.B16, V13.B16
 	VEOR	V14.B16, V2.B16, V14.B16
 	VEOR	V15.B16, V3.B16, V15.B16
 	VTBL	V31.B16, [V12.B16], V12.B16
 	VTBL	V31.B16, [V13.B16], V13.B16
 	VTBL	V31.B16, [V14.B16], V14.B16
 	VTBL	V31.B16, [V15.B16], V15.B16
 	// V8..V11 += V12..V15
 	// V4..V7 <<<= ((V4..V7 XOR V8..V11), 7)
 	VADD	V12.S4, V8.S4, V8.S4
 	VADD	V13.S4, V9.S4, V9.S4
 	VADD	V14.S4, V10.S4, V10.S4
 	VADD	V15.S4, V11.S4, V11.S4
 	VEOR	V8.B16, V4.B16, V16.B16
 	VEOR	V9.B16, V5.B16, V17.B16
 	VEOR	V10.B16, V6.B16, V18.B16
 	VEOR	V11.B16, V7.B16, V19.B16
 	VSHL	$7, V16.S4, V4.S4
 	VSHL	$7, V17.S4, V5.S4
 	VSHL	$7, V18.S4, V6.S4
 	VSHL	$7, V19.S4, V7.S4
 	VSRI	$25, V16.S4, V4.S4
 	VSRI	$25, V17.S4, V5.S4
 	VSRI	$25, V18.S4, V6.S4
 	VSRI	$25, V19.S4, V7.S4
 	// V0..V3 += V5..V7, V4
 	// V15,V12-V14 <<<= ((V15,V12-V14 XOR V0..V3), 16)
 	VADD	V0.S4, V5.S4, V0.S4
 	VADD	V1.S4, V6.S4, V1.S4
 	VADD	V2.S4, V7.S4, V2.S4
 	VADD	V3.S4, V4.S4, V3.S4
 	VEOR	V15.B16, V0.B16, V15.B16
 	VEOR	V12.B16, V1.B16, V12.B16
 	VEOR	V13.B16, V2.B16, V13.B16
 	VEOR	V14.B16, V3.B16, V14.B16
 	VREV32	V12.H8, V12.H8
 	VREV32	V13.H8, V13.H8
 	VREV32	V14.H8, V14.H8
 	VREV32	V15.H8, V15.H8
 	// V10 += V15; V5 <<<= ((V10 XOR V5), 12)
 	// ...
 	VADD	V15.S4, V10.S4, V10.S4
 	VADD	V12.S4, V11.S4, V11.S4
 	VADD	V13.S4, V8.S4, V8.S4
 	VADD	V14.S4, V9.S4, V9.S4
 	VEOR	V10.B16, V5.B16, V16.B16
 	VEOR	V11.B16, V6.B16, V17.B16
 	VEOR	V8.B16, V7.B16, V18.B16
 	VEOR	V9.B16, V4.B16, V19.B16
 	VSHL	$12, V16.S4, V5.S4
 	VSHL	$12, V17.S4, V6.S4
 	VSHL	$12, V18.S4, V7.S4
 	VSHL	$12, V19.S4, V4.S4
 	VSRI	$20, V16.S4, V5.S4
 	VSRI	$20, V17.S4, V6.S4
 	VSRI	$20, V18.S4, V7.S4
 	VSRI	$20, V19.S4, V4.S4
 	// V0 += V5; V15 <<<= ((V0 XOR V15), 8)
 	// ...
 	VADD	V5.S4, V0.S4, V0.S4
 	VADD	V6.S4, V1.S4, V1.S4
 	VADD	V7.S4, V2.S4, V2.S4
 	VADD	V4.S4, V3.S4, V3.S4
 	VEOR	V0.B16, V15.B16, V15.B16
 	VEOR	V1.B16, V12.B16, V12.B16
 	VEOR	V2.B16, V13.B16, V13.B16
 	VEOR	V3.B16, V14.B16, V14.B16
 	VTBL	V31.B16, [V12.B16], V12.B16
 	VTBL	V31.B16, [V13.B16], V13.B16
 	VTBL	V31.B16, [V14.B16], V14.B16
 	VTBL	V31.B16, [V15.B16], V15.B16
 	// V10 += V15; V5 <<<= ((V10 XOR V5), 7)
 	// ...
 	VADD	V15.S4, V10.S4, V10.S4
 	VADD	V12.S4, V11.S4, V11.S4
 	VADD	V13.S4, V8.S4, V8.S4
 	VADD	V14.S4, V9.S4, V9.S4
 	VEOR	V10.B16, V5.B16, V16.B16
 	VEOR	V11.B16, V6.B16, V17.B16
 	VEOR	V8.B16, V7.B16, V18.B16
 	VEOR	V9.B16, V4.B16, V19.B16
 	VSHL	$7, V16.S4, V5.S4
 	VSHL	$7, V17.S4, V6.S4
 	VSHL	$7, V18.S4, V7.S4
 	VSHL	$7, V19.S4, V4.S4
 	VSRI	$25, V16.S4, V5.S4
 	VSRI	$25, V17.S4, V6.S4
 	VSRI	$25, V18.S4, V7.S4
 	VSRI	$25, V19.S4, V4.S4
 	SUB	$1, R21
 	CBNZ	R21, chacha
 	// VLD4R (R10), [V16.S4, V17.S4, V18.S4, V19.S4]
 	WORD	$0x4D60E950
 	// VLD4R 16(R4), [V20.S4, V21.S4, V22.S4, V23.S4]
 	WORD	$0x4DFFE894
 	VADD	V30.S4, V12.S4, V12.S4
 	VADD	V16.S4, V0.S4, V0.S4
 	VADD	V17.S4, V1.S4, V1.S4
 	VADD	V18.S4, V2.S4, V2.S4
 	VADD	V19.S4, V3.S4, V3.S4
 	// VLD4R 16(R4), [V24.S4, V25.S4, V26.S4, V27.S4]
 	WORD	$0x4DFFE898
 	// restore R4
 	SUB	$32, R4
 	// load counter + nonce
 	// VLD1R (R7), [V28.S4]
 	WORD	$0x4D40C8FC
 	// VLD3R (R6), [V29.S4, V30.S4, V31.S4]
 	WORD	$0x4D40E8DD
 	VADD	V20.S4, V4.S4, V4.S4
 	VADD	V21.S4, V5.S4, V5.S4
 	VADD	V22.S4, V6.S4, V6.S4
 	VADD	V23.S4, V7.S4, V7.S4
 	VADD	V24.S4, V8.S4, V8.S4
 	VADD	V25.S4, V9.S4, V9.S4
 	VADD	V26.S4, V10.S4, V10.S4
 	VADD	V27.S4, V11.S4, V11.S4
 	VADD	V28.S4, V12.S4, V12.S4
 	VADD	V29.S4, V13.S4, V13.S4
 	VADD	V30.S4, V14.S4, V14.S4
 	VADD	V31.S4, V15.S4, V15.S4
 	VZIP1	V1.S4, V0.S4, V16.S4
 	VZIP2	V1.S4, V0.S4, V17.S4
 	VZIP1	V3.S4, V2.S4, V18.S4
 	VZIP2	V3.S4, V2.S4, V19.S4
 	VZIP1	V5.S4, V4.S4, V20.S4
 	VZIP2	V5.S4, V4.S4, V21.S4
 	VZIP1	V7.S4, V6.S4, V22.S4
 	VZIP2	V7.S4, V6.S4, V23.S4
 	VZIP1	V9.S4, V8.S4, V24.S4
 	VZIP2	V9.S4, V8.S4, V25.S4
 	VZIP1	V11.S4, V10.S4, V26.S4
 	VZIP2	V11.S4, V10.S4, V27.S4
 	VZIP1	V13.S4, V12.S4, V28.S4
 	VZIP2	V13.S4, V12.S4, V29.S4
 	VZIP1	V15.S4, V14.S4, V30.S4
 	VZIP2	V15.S4, V14.S4, V31.S4
 	VZIP1	V18.D2, V16.D2, V0.D2
 	VZIP2	V18.D2, V16.D2, V4.D2
 	VZIP1	V19.D2, V17.D2, V8.D2
 	VZIP2	V19.D2, V17.D2, V12.D2
 	VLD1.P	64(R2), [V16.B16, V17.B16, V18.B16, V19.B16]
 	VZIP1	V22.D2, V20.D2, V1.D2
 	VZIP2	V22.D2, V20.D2, V5.D2
 	VZIP1	V23.D2, V21.D2, V9.D2
 	VZIP2	V23.D2, V21.D2, V13.D2
 	VLD1.P	64(R2), [V20.B16, V21.B16, V22.B16, V23.B16]
 	VZIP1	V26.D2, V24.D2, V2.D2
 	VZIP2	V26.D2, V24.D2, V6.D2
 	VZIP1	V27.D2, V25.D2, V10.D2
 	VZIP2	V27.D2, V25.D2, V14.D2
 	VLD1.P	64(R2), [V24.B16, V25.B16, V26.B16, V27.B16]
 	VZIP1	V30.D2, V28.D2, V3.D2
 	VZIP2	V30.D2, V28.D2, V7.D2
 	VZIP1	V31.D2, V29.D2, V11.D2
 	VZIP2	V31.D2, V29.D2, V15.D2
 	VLD1.P	64(R2), [V28.B16, V29.B16, V30.B16, V31.B16]
 	VEOR	V0.B16, V16.B16, V16.B16
 	VEOR	V1.B16, V17.B16, V17.B16
 	VEOR	V2.B16, V18.B16, V18.B16
 	VEOR	V3.B16, V19.B16, V19.B16
 	VST1.P	[V16.B16, V17.B16, V18.B16, V19.B16], 64(R1)
 	VEOR	V4.B16, V20.B16, V20.B16
 	VEOR	V5.B16, V21.B16, V21.B16
 	VEOR	V6.B16, V22.B16, V22.B16
 	VEOR	V7.B16, V23.B16, V23.B16
 	VST1.P	[V20.B16, V21.B16, V22.B16, V23.B16], 64(R1)
 	VEOR	V8.B16, V24.B16, V24.B16
 	VEOR	V9.B16, V25.B16, V25.B16
 	VEOR	V10.B16, V26.B16, V26.B16
 	VEOR	V11.B16, V27.B16, V27.B16
 	VST1.P	[V24.B16, V25.B16, V26.B16, V27.B16], 64(R1)
 	VEOR	V12.B16, V28.B16, V28.B16
 	VEOR	V13.B16, V29.B16, V29.B16
 	VEOR	V14.B16, V30.B16, V30.B16
 	VEOR	V15.B16, V31.B16, V31.B16
 	VST1.P	[V28.B16, V29.B16, V30.B16, V31.B16], 64(R1)
 	ADD	$4, R20
 	MOVW	R20, (R7) // update counter
 	CMP	R2, R12
 	BGT	loop
 	RET
 DATA	·constants+0x00(SB)/4, $0x61707865
 DATA	·constants+0x04(SB)/4, $0x3320646e
 DATA	·constants+0x08(SB)/4, $0x79622d32
 DATA	·constants+0x0c(SB)/4, $0x6b206574
 GLOBL	·constants(SB), NOPTR|RODATA, $32
 DATA	·incRotMatrix+0x00(SB)/4, $0x00000000
 DATA	·incRotMatrix+0x04(SB)/4, $0x00000001
 DATA	·incRotMatrix+0x08(SB)/4, $0x00000002
 DATA	·incRotMatrix+0x0c(SB)/4, $0x00000003
 DATA	·incRotMatrix+0x10(SB)/4, $0x02010003
 DATA	·incRotMatrix+0x14(SB)/4, $0x06050407
 DATA	·incRotMatrix+0x18(SB)/4, $0x0A09080B
 DATA	·incRotMatrix+0x1c(SB)/4, $0x0E0D0C0F
 GLOBL	·incRotMatrix(SB), NOPTR|RODATA, $32
--- a/vendor/golang.org/x/crypto/chacha20/chacha_generic.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_generic.go
@ -0,0 +1,364 @@
 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // Package chacha20 implements the ChaCha20 and XChaCha20 encryption algorithms
 // as specified in RFC 8439 and draft-irtf-cfrg-xchacha-01.
 package chacha20
 import (
 	"crypto/cipher"
 	"encoding/binary"
 	"errors"
 	"math/bits"
 	"golang.org/x/crypto/internal/subtle"
 )
 const (
 	// KeySize is the size of the key used by this cipher, in bytes.
 	KeySize = 32
 	// NonceSize is the size of the nonce used with the standard variant of this
 	// cipher, in bytes.
 	//
 	// Note that this is too short to be safely generated at random if the same
 	// key is reused more than 2³² times.
 	NonceSize = 12
 	// NonceSizeX is the size of the nonce used with the XChaCha20 variant of
 	// this cipher, in bytes.
 	NonceSizeX = 24
 )
 // Cipher is a stateful instance of ChaCha20 or XChaCha20 using a particular key
 // and nonce. A *Cipher implements the cipher.Stream interface.
 type Cipher struct {
 	// The ChaCha20 state is 16 words: 4 constant, 8 of key, 1 of counter
 	// (incremented after each block), and 3 of nonce.
 	key     [8]uint32
 	counter uint32
 	nonce   [3]uint32
 	// The last len bytes of buf are leftover key stream bytes from the previous
 	// XORKeyStream invocation. The size of buf depends on how many blocks are
 	// computed at a time.
 	buf [bufSize]byte
 	len int
 	// The counter-independent results of the first round are cached after they
 	// are computed the first time.
 	precompDone      bool
 	p1, p5, p9, p13  uint32
 	p2, p6, p10, p14 uint32
 	p3, p7, p11, p15 uint32
 }
 var _ cipher.Stream = (*Cipher)(nil)
 // NewUnauthenticatedCipher creates a new ChaCha20 stream cipher with the given
 // 32 bytes key and a 12 or 24 bytes nonce. If a nonce of 24 bytes is provided,
 // the XChaCha20 construction will be used. It returns an error if key or nonce
 // have any other length.
 //
 // Note that ChaCha20, like all stream ciphers, is not authenticated and allows
 // attackers to silently tamper with the plaintext. For this reason, it is more
 // appropriate as a building block than as a standalone encryption mechanism.
 // Instead, consider using package golang.org/x/crypto/chacha20poly1305.
 func NewUnauthenticatedCipher(key, nonce []byte) (*Cipher, error) {
 	// This function is split into a wrapper so that the Cipher allocation will
 	// be inlined, and depending on how the caller uses the return value, won't
 	// escape to the heap.
 	c := &Cipher{}
 	return newUnauthenticatedCipher(c, key, nonce)
 }
 func newUnauthenticatedCipher(c *Cipher, key, nonce []byte) (*Cipher, error) {
 	if len(key) != KeySize {
 		return nil, errors.New("chacha20: wrong key size")
 	}
 	if len(nonce) == NonceSizeX {
 		// XChaCha20 uses the ChaCha20 core to mix 16 bytes of the nonce into a
 		// derived key, allowing it to operate on a nonce of 24 bytes. See
 		// draft-irtf-cfrg-xchacha-01, Section 2.3.
 		key, _ = HChaCha20(key, nonce[0:16])
 		cNonce := make([]byte, NonceSize)
 		copy(cNonce[4:12], nonce[16:24])
 		nonce = cNonce
 	} else if len(nonce) != NonceSize {
 		return nil, errors.New("chacha20: wrong nonce size")
 	}
 	c.key = [8]uint32{
 		binary.LittleEndian.Uint32(key[0:4]),
 		binary.LittleEndian.Uint32(key[4:8]),
 		binary.LittleEndian.Uint32(key[8:12]),
 		binary.LittleEndian.Uint32(key[12:16]),
 		binary.LittleEndian.Uint32(key[16:20]),
 		binary.LittleEndian.Uint32(key[20:24]),
 		binary.LittleEndian.Uint32(key[24:28]),
 		binary.LittleEndian.Uint32(key[28:32]),
 	}
 	c.nonce = [3]uint32{
 		binary.LittleEndian.Uint32(nonce[0:4]),
 		binary.LittleEndian.Uint32(nonce[4:8]),
 		binary.LittleEndian.Uint32(nonce[8:12]),
 	}
 	return c, nil
 }
 // The constant first 4 words of the ChaCha20 state.
 const (
 	j0 uint32 = 0x61707865 // expa
 	j1 uint32 = 0x3320646e // nd 3
 	j2 uint32 = 0x79622d32 // 2-by
 	j3 uint32 = 0x6b206574 // te k
 )
 const blockSize = 64
 // quarterRound is the core of ChaCha20. It shuffles the bits of 4 state words.
 // It's executed 4 times for each of the 20 ChaCha20 rounds, operating on all 16
 // words each round, in columnar or diagonal groups of 4 at a time.
 func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
 	a += b
 	d ^= a
 	d = bits.RotateLeft32(d, 16)
 	c += d
 	b ^= c
 	b = bits.RotateLeft32(b, 12)
 	a += b
 	d ^= a
 	d = bits.RotateLeft32(d, 8)
 	c += d
 	b ^= c
 	b = bits.RotateLeft32(b, 7)
 	return a, b, c, d
 }
 // XORKeyStream XORs each byte in the given slice with a byte from the
 // cipher's key stream. Dst and src must overlap entirely or not at all.
 //
 // If len(dst) < len(src), XORKeyStream will panic. It is acceptable
 // to pass a dst bigger than src, and in that case, XORKeyStream will
 // only update dst[:len(src)] and will not touch the rest of dst.
 //
 // Multiple calls to XORKeyStream behave as if the concatenation of
 // the src buffers was passed in a single run. That is, Cipher
 // maintains state and does not reset at each XORKeyStream call.
 func (s *Cipher) XORKeyStream(dst, src []byte) {
 	if len(src) == 0 {
 		return
 	}
 	if len(dst) < len(src) {
 		panic("chacha20: output smaller than input")
 	}
 	dst = dst[:len(src)]
 	if subtle.InexactOverlap(dst, src) {
 		panic("chacha20: invalid buffer overlap")
 	}
 	// First, drain any remaining key stream from a previous XORKeyStream.
 	if s.len != 0 {
 		keyStream := s.buf[bufSize-s.len:]
 		if len(src) < len(keyStream) {
 			keyStream = keyStream[:len(src)]
 		}
 		_ = src[len(keyStream)-1] // bounds check elimination hint
 		for i, b := range keyStream {
 			dst[i] = src[i] ^ b
 		}
 		s.len -= len(keyStream)
 		src = src[len(keyStream):]
 		dst = dst[len(keyStream):]
 	}
 	const blocksPerBuf = bufSize / blockSize
 	numBufs := (uint64(len(src)) + bufSize - 1) / bufSize
 	if uint64(s.counter)+numBufs*blocksPerBuf >= 1<<32 {
 		panic("chacha20: counter overflow")
 	}
 	// xorKeyStreamBlocks implementations expect input lengths that are a
 	// multiple of bufSize. Platform-specific ones process multiple blocks at a
 	// time, so have bufSizes that are a multiple of blockSize.
 	rem := len(src) % bufSize
 	full := len(src) - rem
 	if full > 0 {
 		s.xorKeyStreamBlocks(dst[:full], src[:full])
 	}
 	// If we have a partial (multi-)block, pad it for xorKeyStreamBlocks, and
 	// keep the leftover keystream for the next XORKeyStream invocation.
 	if rem > 0 {
 		s.buf = [bufSize]byte{}
 		copy(s.buf[:], src[full:])
 		s.xorKeyStreamBlocks(s.buf[:], s.buf[:])
 		s.len = bufSize - copy(dst[full:], s.buf[:])
 	}
 }
 func (s *Cipher) xorKeyStreamBlocksGeneric(dst, src []byte) {
 	if len(dst) != len(src) || len(dst)%blockSize != 0 {
 		panic("chacha20: internal error: wrong dst and/or src length")
 	}
 	// To generate each block of key stream, the initial cipher state
 	// (represented below) is passed through 20 rounds of shuffling,
 	// alternatively applying quarterRounds by columns (like 1, 5, 9, 13)
 	// or by diagonals (like 1, 6, 11, 12).
 	//
 	//      0:cccccccc   1:cccccccc   2:cccccccc   3:cccccccc
 	//      4:kkkkkkkk   5:kkkkkkkk   6:kkkkkkkk   7:kkkkkkkk
 	//      8:kkkkkkkk   9:kkkkkkkk  10:kkkkkkkk  11:kkkkkkkk
 	//     12:bbbbbbbb  13:nnnnnnnn  14:nnnnnnnn  15:nnnnnnnn
 	//
 	//            c=constant k=key b=blockcount n=nonce
 	var (
 		c0, c1, c2, c3   = j0, j1, j2, j3
 		c4, c5, c6, c7   = s.key[0], s.key[1], s.key[2], s.key[3]
 		c8, c9, c10, c11 = s.key[4], s.key[5], s.key[6], s.key[7]
 		_, c13, c14, c15 = s.counter, s.nonce[0], s.nonce[1], s.nonce[2]
 	)
 	// Three quarters of the first round don't depend on the counter, so we can
 	// calculate them here, and reuse them for multiple blocks in the loop, and
 	// for future XORKeyStream invocations.
 	if !s.precompDone {
 		s.p1, s.p5, s.p9, s.p13 = quarterRound(c1, c5, c9, c13)
 		s.p2, s.p6, s.p10, s.p14 = quarterRound(c2, c6, c10, c14)
 		s.p3, s.p7, s.p11, s.p15 = quarterRound(c3, c7, c11, c15)
 		s.precompDone = true
 	}
 	for i := 0; i < len(src); i += blockSize {
 		// The remainder of the first column round.
 		fcr0, fcr4, fcr8, fcr12 := quarterRound(c0, c4, c8, s.counter)
 		// The second diagonal round.
 		x0, x5, x10, x15 := quarterRound(fcr0, s.p5, s.p10, s.p15)
 		x1, x6, x11, x12 := quarterRound(s.p1, s.p6, s.p11, fcr12)
 		x2, x7, x8, x13 := quarterRound(s.p2, s.p7, fcr8, s.p13)
 		x3, x4, x9, x14 := quarterRound(s.p3, fcr4, s.p9, s.p14)
 		// The remaining 18 rounds.
 		for i := 0; i < 9; i++ {
 			// Column round.
 			x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
 			x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
 			x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
 			x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
 			// Diagonal round.
 			x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
 			x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
 			x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
 			x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
 		}
 		// Finally, add back the initial state to generate the key stream.
 		x0 += c0
 		x1 += c1
 		x2 += c2
 		x3 += c3
 		x4 += c4
 		x5 += c5
 		x6 += c6
 		x7 += c7
 		x8 += c8
 		x9 += c9
 		x10 += c10
 		x11 += c11
 		x12 += s.counter
 		x13 += c13
 		x14 += c14
 		x15 += c15
 		s.counter += 1
 		if s.counter == 0 {
 			panic("chacha20: internal error: counter overflow")
 		}
 		in, out := src[i:], dst[i:]
 		in, out = in[:blockSize], out[:blockSize] // bounds check elimination hint
 		// XOR the key stream with the source and write out the result.
 		xor(out[0:], in[0:], x0)
 		xor(out[4:], in[4:], x1)
 		xor(out[8:], in[8:], x2)
 		xor(out[12:], in[12:], x3)
 		xor(out[16:], in[16:], x4)
 		xor(out[20:], in[20:], x5)
 		xor(out[24:], in[24:], x6)
 		xor(out[28:], in[28:], x7)
 		xor(out[32:], in[32:], x8)
 		xor(out[36:], in[36:], x9)
 		xor(out[40:], in[40:], x10)
 		xor(out[44:], in[44:], x11)
 		xor(out[48:], in[48:], x12)
 		xor(out[52:], in[52:], x13)
 		xor(out[56:], in[56:], x14)
 		xor(out[60:], in[60:], x15)
 	}
 }
 // HChaCha20 uses the ChaCha20 core to generate a derived key from a 32 bytes
 // key and a 16 bytes nonce. It returns an error if key or nonce have any other
 // length. It is used as part of the XChaCha20 construction.
 func HChaCha20(key, nonce []byte) ([]byte, error) {
 	// This function is split into a wrapper so that the slice allocation will
 	// be inlined, and depending on how the caller uses the return value, won't
 	// escape to the heap.
 	out := make([]byte, 32)
 	return hChaCha20(out, key, nonce)
 }
 func hChaCha20(out, key, nonce []byte) ([]byte, error) {
 	if len(key) != KeySize {
 		return nil, errors.New("chacha20: wrong HChaCha20 key size")
 	}
 	if len(nonce) != 16 {
 		return nil, errors.New("chacha20: wrong HChaCha20 nonce size")
 	}
 	x0, x1, x2, x3 := j0, j1, j2, j3
 	x4 := binary.LittleEndian.Uint32(key[0:4])
 	x5 := binary.LittleEndian.Uint32(key[4:8])
 	x6 := binary.LittleEndian.Uint32(key[8:12])
 	x7 := binary.LittleEndian.Uint32(key[12:16])
 	x8 := binary.LittleEndian.Uint32(key[16:20])
 	x9 := binary.LittleEndian.Uint32(key[20:24])
 	x10 := binary.LittleEndian.Uint32(key[24:28])
 	x11 := binary.LittleEndian.Uint32(key[28:32])
 	x12 := binary.LittleEndian.Uint32(nonce[0:4])
 	x13 := binary.LittleEndian.Uint32(nonce[4:8])
 	x14 := binary.LittleEndian.Uint32(nonce[8:12])
 	x15 := binary.LittleEndian.Uint32(nonce[12:16])
 	for i := 0; i < 10; i++ {
 		// Diagonal round.
 		x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
 		x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
 		x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
 		x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
 		// Column round.
 		x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
 		x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
 		x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
 		x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
 	}
 	_ = out[31] // bounds check elimination hint
 	binary.LittleEndian.PutUint32(out[0:4], x0)
 	binary.LittleEndian.PutUint32(out[4:8], x1)
 	binary.LittleEndian.PutUint32(out[8:12], x2)
 	binary.LittleEndian.PutUint32(out[12:16], x3)
 	binary.LittleEndian.PutUint32(out[16:20], x12)
 	binary.LittleEndian.PutUint32(out[20:24], x13)
 	binary.LittleEndian.PutUint32(out[24:28], x14)
 	binary.LittleEndian.PutUint32(out[28:32], x15)
 	return out, nil
 }
--- a/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go
@ -0,0 +1,13 @@
 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !arm64,!s390x,!ppc64le arm64,!go1.11 gccgo appengine
 package chacha20
 const bufSize = blockSize
 func (s *Cipher) xorKeyStreamBlocks(dst, src []byte) {
 	s.xorKeyStreamBlocksGeneric(dst, src)
 }
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.go
@ -0,0 +1,16 @@
 // Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !gccgo,!appengine
 package chacha20
 const bufSize = 256
 //go:noescape
 func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
 func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
 	chaCha20_ctr32_vsx(&dst[0], &src[0], len(src), &c.key, &c.counter)
 }
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
@ -0,0 +1,449 @@
 // Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // Based on CRYPTOGAMS code with the following comment:
 // # ====================================================================
 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 // # project. The module is, however, dual licensed under OpenSSL and
 // # CRYPTOGAMS licenses depending on where you obtain it. For further
 // # details see http://www.openssl.org/~appro/cryptogams/.
 // # ====================================================================
 // Code for the perl script that generates the ppc64 assembler
 // can be found in the cryptogams repository at the link below. It is based on
 // the original from openssl.
 // https://github.com/dot-asm/cryptogams/commit/a60f5b50ed908e91
 // The differences in this and the original implementation are
 // due to the calling conventions and initialization of constants.
 // +build !gccgo,!appengine
 #include "textflag.h"
 #define OUT  R3
 #define INP  R4
 #define LEN  R5
 #define KEY  R6
 #define CNT  R7
 #define TMP  R15
 #define CONSTBASE  R16
 #define BLOCKS R17
 DATA consts<>+0x00(SB)/8, $0x3320646e61707865
 DATA consts<>+0x08(SB)/8, $0x6b20657479622d32
 DATA consts<>+0x10(SB)/8, $0x0000000000000001
 DATA consts<>+0x18(SB)/8, $0x0000000000000000
 DATA consts<>+0x20(SB)/8, $0x0000000000000004
 DATA consts<>+0x28(SB)/8, $0x0000000000000000
 DATA consts<>+0x30(SB)/8, $0x0a0b08090e0f0c0d
 DATA consts<>+0x38(SB)/8, $0x0203000106070405
 DATA consts<>+0x40(SB)/8, $0x090a0b080d0e0f0c
 DATA consts<>+0x48(SB)/8, $0x0102030005060704
 DATA consts<>+0x50(SB)/8, $0x6170786561707865
 DATA consts<>+0x58(SB)/8, $0x6170786561707865
 DATA consts<>+0x60(SB)/8, $0x3320646e3320646e
 DATA consts<>+0x68(SB)/8, $0x3320646e3320646e
 DATA consts<>+0x70(SB)/8, $0x79622d3279622d32
 DATA consts<>+0x78(SB)/8, $0x79622d3279622d32
 DATA consts<>+0x80(SB)/8, $0x6b2065746b206574
 DATA consts<>+0x88(SB)/8, $0x6b2065746b206574
 DATA consts<>+0x90(SB)/8, $0x0000000100000000
 DATA consts<>+0x98(SB)/8, $0x0000000300000002
 GLOBL consts<>(SB), RODATA, $0xa0
 //func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
 TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
 	MOVD out+0(FP), OUT
 	MOVD inp+8(FP), INP
 	MOVD len+16(FP), LEN
 	MOVD key+24(FP), KEY
 	MOVD counter+32(FP), CNT
 	// Addressing for constants
 	MOVD $consts<>+0x00(SB), CONSTBASE
 	MOVD $16, R8
 	MOVD $32, R9
 	MOVD $48, R10
 	MOVD $64, R11
 	SRD $6, LEN, BLOCKS
 	// V16
 	LXVW4X (CONSTBASE)(R0), VS48
 	ADD $80,CONSTBASE
 	// Load key into V17,V18
 	LXVW4X (KEY)(R0), VS49
 	LXVW4X (KEY)(R8), VS50
 	// Load CNT, NONCE into V19
 	LXVW4X (CNT)(R0), VS51
 	// Clear V27
 	VXOR V27, V27, V27
 	// V28
 	LXVW4X (CONSTBASE)(R11), VS60
 	// splat slot from V19 -> V26
 	VSPLTW $0, V19, V26
 	VSLDOI $4, V19, V27, V19
 	VSLDOI $12, V27, V19, V19
 	VADDUWM V26, V28, V26
 	MOVD $10, R14
 	MOVD R14, CTR
 loop_outer_vsx:
 	// V0, V1, V2, V3
 	LXVW4X (R0)(CONSTBASE), VS32
 	LXVW4X (R8)(CONSTBASE), VS33
 	LXVW4X (R9)(CONSTBASE), VS34
 	LXVW4X (R10)(CONSTBASE), VS35
 	// splat values from V17, V18 into V4-V11
 	VSPLTW $0, V17, V4
 	VSPLTW $1, V17, V5
 	VSPLTW $2, V17, V6
 	VSPLTW $3, V17, V7
 	VSPLTW $0, V18, V8
 	VSPLTW $1, V18, V9
 	VSPLTW $2, V18, V10
 	VSPLTW $3, V18, V11
 	// VOR
 	VOR V26, V26, V12
 	// splat values from V19 -> V13, V14, V15
 	VSPLTW $1, V19, V13
 	VSPLTW $2, V19, V14
 	VSPLTW $3, V19, V15
 	// splat   const values
 	VSPLTISW $-16, V27
 	VSPLTISW $12, V28
 	VSPLTISW $8, V29
 	VSPLTISW $7, V30
 loop_vsx:
 	VADDUWM V0, V4, V0
 	VADDUWM V1, V5, V1
 	VADDUWM V2, V6, V2
 	VADDUWM V3, V7, V3
 	VXOR V12, V0, V12
 	VXOR V13, V1, V13
 	VXOR V14, V2, V14
 	VXOR V15, V3, V15
 	VRLW V12, V27, V12
 	VRLW V13, V27, V13
 	VRLW V14, V27, V14
 	VRLW V15, V27, V15
 	VADDUWM V8, V12, V8
 	VADDUWM V9, V13, V9
 	VADDUWM V10, V14, V10
 	VADDUWM V11, V15, V11
 	VXOR V4, V8, V4
 	VXOR V5, V9, V5
 	VXOR V6, V10, V6
 	VXOR V7, V11, V7
 	VRLW V4, V28, V4
 	VRLW V5, V28, V5
 	VRLW V6, V28, V6
 	VRLW V7, V28, V7
 	VADDUWM V0, V4, V0
 	VADDUWM V1, V5, V1
 	VADDUWM V2, V6, V2
 	VADDUWM V3, V7, V3
 	VXOR V12, V0, V12
 	VXOR V13, V1, V13
 	VXOR V14, V2, V14
 	VXOR V15, V3, V15
 	VRLW V12, V29, V12
 	VRLW V13, V29, V13
 	VRLW V14, V29, V14
 	VRLW V15, V29, V15
 	VADDUWM V8, V12, V8
 	VADDUWM V9, V13, V9
 	VADDUWM V10, V14, V10
 	VADDUWM V11, V15, V11
 	VXOR V4, V8, V4
 	VXOR V5, V9, V5
 	VXOR V6, V10, V6
 	VXOR V7, V11, V7
 	VRLW V4, V30, V4
 	VRLW V5, V30, V5
 	VRLW V6, V30, V6
 	VRLW V7, V30, V7
 	VADDUWM V0, V5, V0
 	VADDUWM V1, V6, V1
 	VADDUWM V2, V7, V2
 	VADDUWM V3, V4, V3
 	VXOR V15, V0, V15
 	VXOR V12, V1, V12
 	VXOR V13, V2, V13
 	VXOR V14, V3, V14
 	VRLW V15, V27, V15
 	VRLW V12, V27, V12
 	VRLW V13, V27, V13
 	VRLW V14, V27, V14
 	VADDUWM V10, V15, V10
 	VADDUWM V11, V12, V11
 	VADDUWM V8, V13, V8
 	VADDUWM V9, V14, V9
 	VXOR V5, V10, V5
 	VXOR V6, V11, V6
 	VXOR V7, V8, V7
 	VXOR V4, V9, V4
 	VRLW V5, V28, V5
 	VRLW V6, V28, V6
 	VRLW V7, V28, V7
 	VRLW V4, V28, V4
 	VADDUWM V0, V5, V0
 	VADDUWM V1, V6, V1
 	VADDUWM V2, V7, V2
 	VADDUWM V3, V4, V3
 	VXOR V15, V0, V15
 	VXOR V12, V1, V12
 	VXOR V13, V2, V13
 	VXOR V14, V3, V14
 	VRLW V15, V29, V15
 	VRLW V12, V29, V12
 	VRLW V13, V29, V13
 	VRLW V14, V29, V14
 	VADDUWM V10, V15, V10
 	VADDUWM V11, V12, V11
 	VADDUWM V8, V13, V8
 	VADDUWM V9, V14, V9
 	VXOR V5, V10, V5
 	VXOR V6, V11, V6
 	VXOR V7, V8, V7
 	VXOR V4, V9, V4
 	VRLW V5, V30, V5
 	VRLW V6, V30, V6
 	VRLW V7, V30, V7
 	VRLW V4, V30, V4
 	BC   16, LT, loop_vsx
 	VADDUWM V12, V26, V12
 	WORD $0x13600F8C		// VMRGEW V0, V1, V27
 	WORD $0x13821F8C		// VMRGEW V2, V3, V28
 	WORD $0x10000E8C		// VMRGOW V0, V1, V0
 	WORD $0x10421E8C		// VMRGOW V2, V3, V2
 	WORD $0x13A42F8C		// VMRGEW V4, V5, V29
 	WORD $0x13C63F8C		// VMRGEW V6, V7, V30
 	XXPERMDI VS32, VS34, $0, VS33
 	XXPERMDI VS32, VS34, $3, VS35
 	XXPERMDI VS59, VS60, $0, VS32
 	XXPERMDI VS59, VS60, $3, VS34
 	WORD $0x10842E8C		// VMRGOW V4, V5, V4
 	WORD $0x10C63E8C		// VMRGOW V6, V7, V6
 	WORD $0x13684F8C		// VMRGEW V8, V9, V27
 	WORD $0x138A5F8C		// VMRGEW V10, V11, V28
 	XXPERMDI VS36, VS38, $0, VS37
 	XXPERMDI VS36, VS38, $3, VS39
 	XXPERMDI VS61, VS62, $0, VS36
 	XXPERMDI VS61, VS62, $3, VS38
 	WORD $0x11084E8C		// VMRGOW V8, V9, V8
 	WORD $0x114A5E8C		// VMRGOW V10, V11, V10
 	WORD $0x13AC6F8C		// VMRGEW V12, V13, V29
 	WORD $0x13CE7F8C		// VMRGEW V14, V15, V30
 	XXPERMDI VS40, VS42, $0, VS41
 	XXPERMDI VS40, VS42, $3, VS43
 	XXPERMDI VS59, VS60, $0, VS40
 	XXPERMDI VS59, VS60, $3, VS42
 	WORD $0x118C6E8C		// VMRGOW V12, V13, V12
 	WORD $0x11CE7E8C		// VMRGOW V14, V15, V14
 	VSPLTISW $4, V27
 	VADDUWM V26, V27, V26
 	XXPERMDI VS44, VS46, $0, VS45
 	XXPERMDI VS44, VS46, $3, VS47
 	XXPERMDI VS61, VS62, $0, VS44
 	XXPERMDI VS61, VS62, $3, VS46
 	VADDUWM V0, V16, V0
 	VADDUWM V4, V17, V4
 	VADDUWM V8, V18, V8
 	VADDUWM V12, V19, V12
 	CMPU LEN, $64
 	BLT tail_vsx
 	// Bottom of loop
 	LXVW4X (INP)(R0), VS59
 	LXVW4X (INP)(R8), VS60
 	LXVW4X (INP)(R9), VS61
 	LXVW4X (INP)(R10), VS62
 	VXOR V27, V0, V27
 	VXOR V28, V4, V28
 	VXOR V29, V8, V29
 	VXOR V30, V12, V30
 	STXVW4X VS59, (OUT)(R0)
 	STXVW4X VS60, (OUT)(R8)
 	ADD     $64, INP
 	STXVW4X VS61, (OUT)(R9)
 	ADD     $-64, LEN
 	STXVW4X VS62, (OUT)(R10)
 	ADD     $64, OUT
 	BEQ     done_vsx
 	VADDUWM V1, V16, V0
 	VADDUWM V5, V17, V4
 	VADDUWM V9, V18, V8
 	VADDUWM V13, V19, V12
 	CMPU  LEN, $64
 	BLT   tail_vsx
 	LXVW4X (INP)(R0), VS59
 	LXVW4X (INP)(R8), VS60
 	LXVW4X (INP)(R9), VS61
 	LXVW4X (INP)(R10), VS62
 	VXOR   V27, V0, V27
 	VXOR V28, V4, V28
 	VXOR V29, V8, V29
 	VXOR V30, V12, V30
 	STXVW4X VS59, (OUT)(R0)
 	STXVW4X VS60, (OUT)(R8)
 	ADD     $64, INP
 	STXVW4X VS61, (OUT)(R9)
 	ADD     $-64, LEN
 	STXVW4X VS62, (OUT)(V10)
 	ADD     $64, OUT
 	BEQ     done_vsx
 	VADDUWM V2, V16, V0
 	VADDUWM V6, V17, V4
 	VADDUWM V10, V18, V8
 	VADDUWM V14, V19, V12
 	CMPU LEN, $64
 	BLT  tail_vsx
 	LXVW4X (INP)(R0), VS59
 	LXVW4X (INP)(R8), VS60
 	LXVW4X (INP)(R9), VS61
 	LXVW4X (INP)(R10), VS62
 	VXOR V27, V0, V27
 	VXOR V28, V4, V28
 	VXOR V29, V8, V29
 	VXOR V30, V12, V30
 	STXVW4X VS59, (OUT)(R0)
 	STXVW4X VS60, (OUT)(R8)
 	ADD     $64, INP
 	STXVW4X VS61, (OUT)(R9)
 	ADD     $-64, LEN
 	STXVW4X VS62, (OUT)(R10)
 	ADD     $64, OUT
 	BEQ     done_vsx
 	VADDUWM V3, V16, V0
 	VADDUWM V7, V17, V4
 	VADDUWM V11, V18, V8
 	VADDUWM V15, V19, V12
 	CMPU  LEN, $64
 	BLT   tail_vsx
 	LXVW4X (INP)(R0), VS59
 	LXVW4X (INP)(R8), VS60
 	LXVW4X (INP)(R9), VS61
 	LXVW4X (INP)(R10), VS62
 	VXOR V27, V0, V27
 	VXOR V28, V4, V28
 	VXOR V29, V8, V29
 	VXOR V30, V12, V30
 	STXVW4X VS59, (OUT)(R0)
 	STXVW4X VS60, (OUT)(R8)
 	ADD     $64, INP
 	STXVW4X VS61, (OUT)(R9)
 	ADD     $-64, LEN
 	STXVW4X VS62, (OUT)(R10)
 	ADD     $64, OUT
 	MOVD $10, R14
 	MOVD R14, CTR
 	BNE  loop_outer_vsx
 done_vsx:
 	// Increment counter by number of 64 byte blocks
 	MOVD (CNT), R14
 	ADD  BLOCKS, R14
 	MOVD R14, (CNT)
 	RET
 tail_vsx:
 	ADD  $32, R1, R11
 	MOVD LEN, CTR
 	// Save values on stack to copy from
 	STXVW4X VS32, (R11)(R0)
 	STXVW4X VS36, (R11)(R8)
 	STXVW4X VS40, (R11)(R9)
 	STXVW4X VS44, (R11)(R10)
 	ADD $-1, R11, R12
 	ADD $-1, INP
 	ADD $-1, OUT
 looptail_vsx:
 	// Copying the result to OUT
 	// in bytes.
 	MOVBZU 1(R12), KEY
 	MOVBZU 1(INP), TMP
 	XOR    KEY, TMP, KEY
 	MOVBU  KEY, 1(OUT)
 	BC     16, LT, looptail_vsx
 	// Clear the stack values
 	STXVW4X VS48, (R11)(R0)
 	STXVW4X VS48, (R11)(R8)
 	STXVW4X VS48, (R11)(R9)
 	STXVW4X VS48, (R11)(R10)
 	BR      done_vsx
--- a/vendor/golang.org/x/crypto/chacha20/chacha_s390x.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_s390x.go
@ -0,0 +1,26 @@
 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !gccgo,!appengine
 package chacha20
 import "golang.org/x/sys/cpu"
 var haveAsm = cpu.S390X.HasVX
 const bufSize = 256
 // xorKeyStreamVX is an assembly implementation of XORKeyStream. It must only
 // be called when the vector facility is available. Implementation in asm_s390x.s.
 //go:noescape
 func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
 func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
 	if cpu.S390X.HasVX {
 		xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
 	} else {
 		c.xorKeyStreamBlocksGeneric(dst, src)
 	}
 }
--- a/vendor/golang.org/x/crypto/chacha20/chacha_s390x.s
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_s390x.s
@ -0,0 +1,224 @@
 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !gccgo,!appengine
 #include "go_asm.h"
 #include "textflag.h"
 // This is an implementation of the ChaCha20 encryption algorithm as
 // specified in RFC 7539. It uses vector instructions to compute
 // 4 keystream blocks in parallel (256 bytes) which are then XORed
 // with the bytes in the input slice.
 GLOBL ·constants<>(SB), RODATA|NOPTR, $32
 // BSWAP: swap bytes in each 4-byte element
 DATA ·constants<>+0x00(SB)/4, $0x03020100
 DATA ·constants<>+0x04(SB)/4, $0x07060504
 DATA ·constants<>+0x08(SB)/4, $0x0b0a0908
 DATA ·constants<>+0x0c(SB)/4, $0x0f0e0d0c
 // J0: [j0, j1, j2, j3]
 DATA ·constants<>+0x10(SB)/4, $0x61707865
 DATA ·constants<>+0x14(SB)/4, $0x3320646e
 DATA ·constants<>+0x18(SB)/4, $0x79622d32
 DATA ·constants<>+0x1c(SB)/4, $0x6b206574
 #define BSWAP V5
 #define J0    V6
 #define KEY0  V7
 #define KEY1  V8
 #define NONCE V9
 #define CTR   V10
 #define M0    V11
 #define M1    V12
 #define M2    V13
 #define M3    V14
 #define INC   V15
 #define X0    V16
 #define X1    V17
 #define X2    V18
 #define X3    V19
 #define X4    V20
 #define X5    V21
 #define X6    V22
 #define X7    V23
 #define X8    V24
 #define X9    V25
 #define X10   V26
 #define X11   V27
 #define X12   V28
 #define X13   V29
 #define X14   V30
 #define X15   V31
 #define NUM_ROUNDS 20
 #define ROUND4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
 	VAF    a1, a0, a0  \
 	VAF    b1, b0, b0  \
 	VAF    c1, c0, c0  \
 	VAF    d1, d0, d0  \
 	VX     a0, a2, a2  \
 	VX     b0, b2, b2  \
 	VX     c0, c2, c2  \
 	VX     d0, d2, d2  \
 	VERLLF $16, a2, a2 \
 	VERLLF $16, b2, b2 \
 	VERLLF $16, c2, c2 \
 	VERLLF $16, d2, d2 \
 	VAF    a2, a3, a3  \
 	VAF    b2, b3, b3  \
 	VAF    c2, c3, c3  \
 	VAF    d2, d3, d3  \
 	VX     a3, a1, a1  \
 	VX     b3, b1, b1  \
 	VX     c3, c1, c1  \
 	VX     d3, d1, d1  \
 	VERLLF $12, a1, a1 \
 	VERLLF $12, b1, b1 \
 	VERLLF $12, c1, c1 \
 	VERLLF $12, d1, d1 \
 	VAF    a1, a0, a0  \
 	VAF    b1, b0, b0  \
 	VAF    c1, c0, c0  \
 	VAF    d1, d0, d0  \
 	VX     a0, a2, a2  \
 	VX     b0, b2, b2  \
 	VX     c0, c2, c2  \
 	VX     d0, d2, d2  \
 	VERLLF $8, a2, a2  \
 	VERLLF $8, b2, b2  \
 	VERLLF $8, c2, c2  \
 	VERLLF $8, d2, d2  \
 	VAF    a2, a3, a3  \
 	VAF    b2, b3, b3  \
 	VAF    c2, c3, c3  \
 	VAF    d2, d3, d3  \
 	VX     a3, a1, a1  \
 	VX     b3, b1, b1  \
 	VX     c3, c1, c1  \
 	VX     d3, d1, d1  \
 	VERLLF $7, a1, a1  \
 	VERLLF $7, b1, b1  \
 	VERLLF $7, c1, c1  \
 	VERLLF $7, d1, d1
 #define PERMUTE(mask, v0, v1, v2, v3) \
 	VPERM v0, v0, mask, v0 \
 	VPERM v1, v1, mask, v1 \
 	VPERM v2, v2, mask, v2 \
 	VPERM v3, v3, mask, v3
 #define ADDV(x, v0, v1, v2, v3) \
 	VAF x, v0, v0 \
 	VAF x, v1, v1 \
 	VAF x, v2, v2 \
 	VAF x, v3, v3
 #define XORV(off, dst, src, v0, v1, v2, v3) \
 	VLM  off(src), M0, M3          \
 	PERMUTE(BSWAP, v0, v1, v2, v3) \
 	VX   v0, M0, M0                \
 	VX   v1, M1, M1                \
 	VX   v2, M2, M2                \
 	VX   v3, M3, M3                \
 	VSTM M0, M3, off(dst)
 #define SHUFFLE(a, b, c, d, t, u, v, w) \
 	VMRHF a, c, t \ // t = {a[0], c[0], a[1], c[1]}
 	VMRHF b, d, u \ // u = {b[0], d[0], b[1], d[1]}
 	VMRLF a, c, v \ // v = {a[2], c[2], a[3], c[3]}
 	VMRLF b, d, w \ // w = {b[2], d[2], b[3], d[3]}
 	VMRHF t, u, a \ // a = {a[0], b[0], c[0], d[0]}
 	VMRLF t, u, b \ // b = {a[1], b[1], c[1], d[1]}
 	VMRHF v, w, c \ // c = {a[2], b[2], c[2], d[2]}
 	VMRLF v, w, d // d = {a[3], b[3], c[3], d[3]}
 // func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
 TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
 	MOVD $·constants<>(SB), R1
 	MOVD dst+0(FP), R2         // R2=&dst[0]
 	LMG  src+24(FP), R3, R4    // R3=&src[0] R4=len(src)
 	MOVD key+48(FP), R5        // R5=key
 	MOVD nonce+56(FP), R6      // R6=nonce
 	MOVD counter+64(FP), R7    // R7=counter
 	// load BSWAP and J0
 	VLM (R1), BSWAP, J0
 	// setup
 	MOVD  $95, R0
 	VLM   (R5), KEY0, KEY1
 	VLL   R0, (R6), NONCE
 	VZERO M0
 	VLEIB $7, $32, M0
 	VSRLB M0, NONCE, NONCE
 	// initialize counter values
 	VLREPF (R7), CTR
 	VZERO  INC
 	VLEIF  $1, $1, INC
 	VLEIF  $2, $2, INC
 	VLEIF  $3, $3, INC
 	VAF    INC, CTR, CTR
 	VREPIF $4, INC
 chacha:
 	VREPF $0, J0, X0
 	VREPF $1, J0, X1
 	VREPF $2, J0, X2
 	VREPF $3, J0, X3
 	VREPF $0, KEY0, X4
 	VREPF $1, KEY0, X5
 	VREPF $2, KEY0, X6
 	VREPF $3, KEY0, X7
 	VREPF $0, KEY1, X8
 	VREPF $1, KEY1, X9
 	VREPF $2, KEY1, X10
 	VREPF $3, KEY1, X11
 	VLR   CTR, X12
 	VREPF $1, NONCE, X13
 	VREPF $2, NONCE, X14
 	VREPF $3, NONCE, X15
 	MOVD $(NUM_ROUNDS/2), R1
 loop:
 	ROUND4(X0, X4, X12,  X8, X1, X5, X13,  X9, X2, X6, X14, X10, X3, X7, X15, X11)
 	ROUND4(X0, X5, X15, X10, X1, X6, X12, X11, X2, X7, X13, X8,  X3, X4, X14, X9)
 	ADD $-1, R1
 	BNE loop
 	// decrement length
 	ADD $-256, R4
 	// rearrange vectors
 	SHUFFLE(X0, X1, X2, X3, M0, M1, M2, M3)
 	ADDV(J0, X0, X1, X2, X3)
 	SHUFFLE(X4, X5, X6, X7, M0, M1, M2, M3)
 	ADDV(KEY0, X4, X5, X6, X7)
 	SHUFFLE(X8, X9, X10, X11, M0, M1, M2, M3)
 	ADDV(KEY1, X8, X9, X10, X11)
 	VAF CTR, X12, X12
 	SHUFFLE(X12, X13, X14, X15, M0, M1, M2, M3)
 	ADDV(NONCE, X12, X13, X14, X15)
 	// increment counters
 	VAF INC, CTR, CTR
 	// xor keystream with plaintext
 	XORV(0*64, R2, R3, X0, X4,  X8, X12)
 	XORV(1*64, R2, R3, X1, X5,  X9, X13)
 	XORV(2*64, R2, R3, X2, X6, X10, X14)
 	XORV(3*64, R2, R3, X3, X7, X11, X15)
 	// increment pointers
 	MOVD $256(R2), R2
 	MOVD $256(R3), R3
 	CMPBNE  R4, $0, chacha
 	VSTEF $0, CTR, (R7)
 	RET
--- a/vendor/golang.org/x/crypto/chacha20/xor.go
+++ b/vendor/golang.org/x/crypto/chacha20/xor.go
@ -0,0 +1,41 @@
 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found src the LICENSE file.
 package chacha20
 import "runtime"
 // Platforms that have fast unaligned 32-bit little endian accesses.
 const unaligned = runtime.GOARCH == "386" ||
 	runtime.GOARCH == "amd64" ||
 	runtime.GOARCH == "arm64" ||
 	runtime.GOARCH == "ppc64le" ||
 	runtime.GOARCH == "s390x"
 // xor reads a little endian uint32 from src, XORs it with u and
 // places the result in little endian byte order in dst.
 func xor(dst, src []byte, u uint32) {
 	_, _ = src[3], dst[3] // eliminate bounds checks
 	if unaligned {
 		// The compiler should optimize this code into
 		// 32-bit unaligned little endian loads and stores.
 		// TODO: delete once the compiler does a reliably
 		// good job with the generic code below.
 		// See issue #25111 for more details.
 		v := uint32(src[0])
 		v |= uint32(src[1]) << 8
 		v |= uint32(src[2]) << 16
 		v |= uint32(src[3]) << 24
 		v ^= u
 		dst[0] = byte(v)
 		dst[1] = byte(v >> 8)
 		dst[2] = byte(v >> 16)
 		dst[3] = byte(v >> 24)
 	} else {
 		dst[0] = src[0] ^ byte(u)
 		dst[1] = src[1] ^ byte(u>>8)
 		dst[2] = src[2] ^ byte(u>>16)
 		dst[3] = src[3] ^ byte(u>>24)
 	}
 }
--- a/vendor/golang.org/x/crypto/poly1305/bits_compat.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_compat.go
@ -0,0 +1,39 @@
 // Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !go1.13
 package poly1305
 // Generic fallbacks for the math/bits intrinsics, copied from
 // src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had
 // variable time fallbacks until Go 1.13.
 func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
 	sum = x + y + carry
 	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
 	return
 }
 func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
 	diff = x - y - borrow
 	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
 	return
 }
 func bitsMul64(x, y uint64) (hi, lo uint64) {
 	const mask32 = 1<<32 - 1
 	x0 := x & mask32
 	x1 := x >> 32
 	y0 := y & mask32
 	y1 := y >> 32
 	w0 := x0 * y0
 	t := x1*y0 + w0>>32
 	w1 := t & mask32
 	w2 := t >> 32
 	w1 += x0 * y1
 	hi = x1*y1 + w2 + w1>>32
 	lo = x * y
 	return
 }
--- a/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
@ -0,0 +1,21 @@
 // Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build go1.13
 package poly1305
 import "math/bits"
 func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
 	return bits.Add64(x, y, carry)
 }
 func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
 	return bits.Sub64(x, y, borrow)
 }
 func bitsMul64(x, y uint64) (hi, lo uint64) {
 	return bits.Mul64(x, y)
 }