a4f1f3d4e8
This fixes a regression in the rclone tests from the v1.0.6 upgrade of uplink. The failure was due to an improperly converted error resulting in the wrong type of error.
500 lines
20 KiB
Go
500 lines
20 KiB
Go
//+build !noasm,!appengine
|
|
|
|
/*
|
|
* Minio Cloud Storage, (C) 2017 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package sha256
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"hash"
|
|
"sort"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
//go:noescape
|
|
func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
|
|
|
|
// Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to
|
|
// differentiate with default initialiation value of 0
|
|
const Avx512ServerUID = 16
|
|
|
|
var uidCounter uint64
|
|
|
|
// NewAvx512 - initialize sha256 Avx512 implementation.
|
|
func NewAvx512(a512srv *Avx512Server) hash.Hash {
|
|
uid := atomic.AddUint64(&uidCounter, 1)
|
|
return &Avx512Digest{uid: uid, a512srv: a512srv}
|
|
}
|
|
|
|
// Avx512Digest - Type for computing SHA256 using Avx512
|
|
type Avx512Digest struct {
|
|
uid uint64
|
|
a512srv *Avx512Server
|
|
x [chunk]byte
|
|
nx int
|
|
len uint64
|
|
final bool
|
|
result [Size]byte
|
|
}
|
|
|
|
// Size - Return size of checksum
|
|
func (d *Avx512Digest) Size() int { return Size }
|
|
|
|
// BlockSize - Return blocksize of checksum
|
|
func (d Avx512Digest) BlockSize() int { return BlockSize }
|
|
|
|
// Reset - reset sha digest to its initial values
|
|
func (d *Avx512Digest) Reset() {
|
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true}
|
|
d.nx = 0
|
|
d.len = 0
|
|
d.final = false
|
|
}
|
|
|
|
// Write to digest
|
|
func (d *Avx512Digest) Write(p []byte) (nn int, err error) {
|
|
|
|
if d.final {
|
|
return 0, errors.New("Avx512Digest already finalized. Reset first before writing again")
|
|
}
|
|
|
|
nn = len(p)
|
|
d.len += uint64(nn)
|
|
if d.nx > 0 {
|
|
n := copy(d.x[d.nx:], p)
|
|
d.nx += n
|
|
if d.nx == chunk {
|
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]}
|
|
d.nx = 0
|
|
}
|
|
p = p[n:]
|
|
}
|
|
if len(p) >= chunk {
|
|
n := len(p) &^ (chunk - 1)
|
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]}
|
|
p = p[n:]
|
|
}
|
|
if len(p) > 0 {
|
|
d.nx = copy(d.x[:], p)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Sum - Return sha256 sum in bytes
|
|
func (d *Avx512Digest) Sum(in []byte) (result []byte) {
|
|
|
|
if d.final {
|
|
return append(in, d.result[:]...)
|
|
}
|
|
|
|
trail := make([]byte, 0, 128)
|
|
trail = append(trail, d.x[:d.nx]...)
|
|
|
|
len := d.len
|
|
// Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
|
|
var tmp [64]byte
|
|
tmp[0] = 0x80
|
|
if len%64 < 56 {
|
|
trail = append(trail, tmp[0:56-len%64]...)
|
|
} else {
|
|
trail = append(trail, tmp[0:64+56-len%64]...)
|
|
}
|
|
d.nx = 0
|
|
|
|
// Length in bits.
|
|
len <<= 3
|
|
for i := uint(0); i < 8; i++ {
|
|
tmp[i] = byte(len >> (56 - 8*i))
|
|
}
|
|
trail = append(trail, tmp[0:8]...)
|
|
|
|
sumCh := make(chan [Size]byte)
|
|
d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh}
|
|
d.result = <-sumCh
|
|
d.final = true
|
|
return append(in, d.result[:]...)
|
|
}
|
|
|
|
var table = [512]uint64{
|
|
0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
|
|
0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
|
|
0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
|
|
0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
|
|
0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
|
|
0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
|
|
0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
|
|
0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
|
|
0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
|
|
0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
|
|
0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
|
|
0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
|
|
0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
|
|
0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
|
|
0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
|
|
0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
|
|
0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
|
|
0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
|
|
0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
|
|
0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
|
|
0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
|
|
0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
|
|
0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
|
|
0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
|
|
0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
|
|
0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
|
|
0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
|
|
0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
|
|
0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
|
|
0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
|
|
0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
|
|
0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
|
|
0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
|
|
0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
|
|
0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
|
|
0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
|
|
0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
|
|
0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
|
|
0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
|
|
0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
|
|
0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
|
|
0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
|
|
0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
|
|
0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
|
|
0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
|
|
0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
|
|
0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
|
|
0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
|
|
0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
|
|
0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
|
|
0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
|
|
0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
|
|
0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
|
|
0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
|
|
0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
|
|
0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
|
|
0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
|
|
0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
|
|
0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
|
|
0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
|
|
0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
|
|
0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
|
|
0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
|
|
0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
|
|
0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
|
|
0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
|
|
0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
|
|
0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
|
|
0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
|
|
0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
|
|
0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
|
|
0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
|
|
0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
|
|
0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
|
|
0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
|
|
0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
|
|
0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
|
|
0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
|
|
0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
|
|
0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
|
|
0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
|
|
0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
|
|
0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
|
|
0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
|
|
0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
|
|
0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
|
|
0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
|
|
0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
|
|
0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
|
|
0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
|
|
0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
|
|
0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
|
|
0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
|
|
0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
|
|
0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
|
|
0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
|
|
0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
|
|
0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
|
|
0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
|
|
0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
|
|
0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
|
|
0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
|
|
0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
|
|
0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
|
|
0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
|
|
0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
|
|
0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
|
|
0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
|
|
0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
|
|
0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
|
|
0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
|
|
0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
|
|
0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
|
|
0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
|
|
0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
|
|
0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
|
|
0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
|
|
0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
|
|
0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
|
|
0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
|
|
0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
|
|
0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
|
|
0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
|
|
0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
|
|
0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
|
|
0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
|
|
0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2,
|
|
0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2}
|
|
|
|
// Interface function to assembly ode
|
|
func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte {
|
|
|
|
scratch := [512]byte{}
|
|
sha256X16Avx512(digests, &scratch, &table, mask, input)
|
|
|
|
output := [16][Size]byte{}
|
|
for i := 0; i < 16; i++ {
|
|
output[i] = getDigest(i, digests[:])
|
|
}
|
|
|
|
return output
|
|
}
|
|
|
|
func getDigest(index int, state []byte) (sum [Size]byte) {
|
|
for j := 0; j < 16; j += 2 {
|
|
for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size {
|
|
binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4]))
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// Message to send across input channel
|
|
type blockInput struct {
|
|
uid uint64
|
|
msg []byte
|
|
reset bool
|
|
final bool
|
|
sumCh chan [Size]byte
|
|
}
|
|
|
|
// Avx512Server - Type to implement 16x parallel handling of SHA256 invocations
|
|
type Avx512Server struct {
|
|
blocksCh chan blockInput // Input channel
|
|
totalIn int // Total number of inputs waiting to be processed
|
|
lanes [16]Avx512LaneInfo // Array with info per lane (out of 16)
|
|
digests map[uint64][Size]byte // Map of uids to (interim) digest results
|
|
}
|
|
|
|
// Avx512LaneInfo - Info for each lane
|
|
type Avx512LaneInfo struct {
|
|
uid uint64 // unique identification for this SHA processing
|
|
block []byte // input block to be processed
|
|
outputCh chan [Size]byte // channel for output result
|
|
}
|
|
|
|
// NewAvx512Server - Create new object for parallel processing handling
|
|
func NewAvx512Server() *Avx512Server {
|
|
a512srv := &Avx512Server{}
|
|
a512srv.digests = make(map[uint64][Size]byte)
|
|
a512srv.blocksCh = make(chan blockInput)
|
|
|
|
// Start a single thread for reading from the input channel
|
|
go a512srv.Process()
|
|
return a512srv
|
|
}
|
|
|
|
// Process - Sole handler for reading from the input channel
|
|
func (a512srv *Avx512Server) Process() {
|
|
for {
|
|
select {
|
|
case block := <-a512srv.blocksCh:
|
|
if block.reset {
|
|
a512srv.reset(block.uid)
|
|
continue
|
|
}
|
|
index := block.uid & 0xf
|
|
// fmt.Println("Adding message:", block.uid, index)
|
|
|
|
if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs
|
|
//fmt.Println("Invoking Blocks()")
|
|
a512srv.blocks()
|
|
}
|
|
a512srv.totalIn++
|
|
a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg}
|
|
if block.final {
|
|
a512srv.lanes[index].outputCh = block.sumCh
|
|
}
|
|
if a512srv.totalIn == len(a512srv.lanes) {
|
|
// fmt.Println("Invoking Blocks() while FULL: ")
|
|
a512srv.blocks()
|
|
}
|
|
|
|
// TODO: test with larger timeout
|
|
case <-time.After(1 * time.Microsecond):
|
|
for _, lane := range a512srv.lanes {
|
|
if lane.block != nil { // check if there is any input to process
|
|
// fmt.Println("Invoking Blocks() on TIMEOUT: ")
|
|
a512srv.blocks()
|
|
break // we are done
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Do a reset for this calculation
|
|
func (a512srv *Avx512Server) reset(uid uint64) {
|
|
|
|
// Check if there is a message still waiting to be processed (and remove if so)
|
|
for i, lane := range a512srv.lanes {
|
|
if lane.uid == uid {
|
|
if lane.block != nil {
|
|
a512srv.lanes[i] = Avx512LaneInfo{} // clear message
|
|
a512srv.totalIn--
|
|
}
|
|
}
|
|
}
|
|
|
|
// Delete entry from hash map
|
|
delete(a512srv.digests, uid)
|
|
}
|
|
|
|
// Invoke assembly and send results back
|
|
func (a512srv *Avx512Server) blocks() {
|
|
|
|
inputs := [16][]byte{}
|
|
for i := range inputs {
|
|
inputs[i] = a512srv.lanes[i].block
|
|
}
|
|
|
|
mask := expandMask(genMask(inputs))
|
|
outputs := blockAvx512(a512srv.getDigests(), inputs, mask)
|
|
|
|
a512srv.totalIn = 0
|
|
for i := 0; i < len(outputs); i++ {
|
|
uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh
|
|
a512srv.digests[uid] = outputs[i]
|
|
a512srv.lanes[i] = Avx512LaneInfo{}
|
|
|
|
if outputCh != nil {
|
|
// Send back result
|
|
outputCh <- outputs[i]
|
|
delete(a512srv.digests, uid) // Delete entry from hashmap
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) {
|
|
a512srv.blocksCh <- blockInput{uid: uid, msg: p}
|
|
return len(p), nil
|
|
}
|
|
|
|
// Sum - return sha256 sum in bytes for a given sum id.
|
|
func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte {
|
|
sumCh := make(chan [32]byte)
|
|
a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh}
|
|
return <-sumCh
|
|
}
|
|
|
|
func (a512srv *Avx512Server) getDigests() *[512]byte {
|
|
digests := [512]byte{}
|
|
for i, lane := range a512srv.lanes {
|
|
a, ok := a512srv.digests[lane.uid]
|
|
if ok {
|
|
binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4]))
|
|
binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8]))
|
|
binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12]))
|
|
binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16]))
|
|
binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20]))
|
|
binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24]))
|
|
binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28]))
|
|
binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32]))
|
|
} else {
|
|
binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0)
|
|
binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1)
|
|
binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2)
|
|
binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3)
|
|
binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4)
|
|
binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5)
|
|
binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6)
|
|
binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7)
|
|
}
|
|
}
|
|
return &digests
|
|
}
|
|
|
|
// Helper struct for sorting blocks based on length
|
|
type lane struct {
|
|
len uint
|
|
pos uint
|
|
}
|
|
|
|
type lanes []lane
|
|
|
|
func (lns lanes) Len() int { return len(lns) }
|
|
func (lns lanes) Swap(i, j int) { lns[i], lns[j] = lns[j], lns[i] }
|
|
func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len }
|
|
|
|
// Helper struct for
|
|
type maskRounds struct {
|
|
mask uint64
|
|
rounds uint64
|
|
}
|
|
|
|
func genMask(input [16][]byte) [16]maskRounds {
|
|
|
|
// Sort on blocks length small to large
|
|
var sorted [16]lane
|
|
for c, inpt := range input {
|
|
sorted[c] = lane{uint(len(inpt)), uint(c)}
|
|
}
|
|
sort.Sort(lanes(sorted[:]))
|
|
|
|
// Create mask array including 'rounds' between masks
|
|
m, round, index := uint64(0xffff), uint64(0), 0
|
|
var mr [16]maskRounds
|
|
for _, s := range sorted {
|
|
if s.len > 0 {
|
|
if uint64(s.len)>>6 > round {
|
|
mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round}
|
|
index++
|
|
}
|
|
round = uint64(s.len) >> 6
|
|
}
|
|
m = m & ^(1 << uint(s.pos))
|
|
}
|
|
|
|
return mr
|
|
}
|
|
|
|
// TODO: remove function
|
|
func expandMask(mr [16]maskRounds) []uint64 {
|
|
size := uint64(0)
|
|
for _, r := range mr {
|
|
size += r.rounds
|
|
}
|
|
result, index := make([]uint64, size), 0
|
|
for _, r := range mr {
|
|
for j := uint64(0); j < r.rounds; j++ {
|
|
result[index] = r.mask
|
|
index++
|
|
}
|
|
}
|
|
return result
|
|
}
|