diff options
Diffstat (limited to 'vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go')
| -rw-r--r-- | vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go | 501 |
1 files changed, 501 insertions, 0 deletions
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go new file mode 100644 index 0000000..4b9473a --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go | |||
| @@ -0,0 +1,501 @@ | |||
| 1 | //go:build !noasm && !appengine && gc | ||
| 2 | // +build !noasm,!appengine,gc | ||
| 3 | |||
| 4 | /* | ||
| 5 | * Minio Cloud Storage, (C) 2017 Minio, Inc. | ||
| 6 | * | ||
| 7 | * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 8 | * you may not use this file except in compliance with the License. | ||
| 9 | * You may obtain a copy of the License at | ||
| 10 | * | ||
| 11 | * http://www.apache.org/licenses/LICENSE-2.0 | ||
| 12 | * | ||
| 13 | * Unless required by applicable law or agreed to in writing, software | ||
| 14 | * distributed under the License is distributed on an "AS IS" BASIS, | ||
| 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 16 | * See the License for the specific language governing permissions and | ||
| 17 | * limitations under the License. | ||
| 18 | */ | ||
| 19 | |||
| 20 | package sha256 | ||
| 21 | |||
| 22 | import ( | ||
| 23 | "encoding/binary" | ||
| 24 | "errors" | ||
| 25 | "hash" | ||
| 26 | "sort" | ||
| 27 | "sync/atomic" | ||
| 28 | "time" | ||
| 29 | ) | ||
| 30 | |||
| 31 | //go:noescape | ||
| 32 | func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte) | ||
| 33 | |||
| 34 | // Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to | ||
| 35 | // differentiate with default initialiation value of 0 | ||
| 36 | const Avx512ServerUID = 16 | ||
| 37 | |||
| 38 | var uidCounter uint64 | ||
| 39 | |||
| 40 | // NewAvx512 - initialize sha256 Avx512 implementation. | ||
| 41 | func NewAvx512(a512srv *Avx512Server) hash.Hash { | ||
| 42 | uid := atomic.AddUint64(&uidCounter, 1) | ||
| 43 | return &Avx512Digest{uid: uid, a512srv: a512srv} | ||
| 44 | } | ||
| 45 | |||
| 46 | // Avx512Digest - Type for computing SHA256 using Avx512 | ||
| 47 | type Avx512Digest struct { | ||
| 48 | uid uint64 | ||
| 49 | a512srv *Avx512Server | ||
| 50 | x [chunk]byte | ||
| 51 | nx int | ||
| 52 | len uint64 | ||
| 53 | final bool | ||
| 54 | result [Size]byte | ||
| 55 | } | ||
| 56 | |||
| 57 | // Size - Return size of checksum | ||
| 58 | func (d *Avx512Digest) Size() int { return Size } | ||
| 59 | |||
| 60 | // BlockSize - Return blocksize of checksum | ||
| 61 | func (d Avx512Digest) BlockSize() int { return BlockSize } | ||
| 62 | |||
| 63 | // Reset - reset sha digest to its initial values | ||
| 64 | func (d *Avx512Digest) Reset() { | ||
| 65 | d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true} | ||
| 66 | d.nx = 0 | ||
| 67 | d.len = 0 | ||
| 68 | d.final = false | ||
| 69 | } | ||
| 70 | |||
| 71 | // Write to digest | ||
| 72 | func (d *Avx512Digest) Write(p []byte) (nn int, err error) { | ||
| 73 | |||
| 74 | if d.final { | ||
| 75 | return 0, errors.New("Avx512Digest already finalized. Reset first before writing again") | ||
| 76 | } | ||
| 77 | |||
| 78 | nn = len(p) | ||
| 79 | d.len += uint64(nn) | ||
| 80 | if d.nx > 0 { | ||
| 81 | n := copy(d.x[d.nx:], p) | ||
| 82 | d.nx += n | ||
| 83 | if d.nx == chunk { | ||
| 84 | d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]} | ||
| 85 | d.nx = 0 | ||
| 86 | } | ||
| 87 | p = p[n:] | ||
| 88 | } | ||
| 89 | if len(p) >= chunk { | ||
| 90 | n := len(p) &^ (chunk - 1) | ||
| 91 | d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]} | ||
| 92 | p = p[n:] | ||
| 93 | } | ||
| 94 | if len(p) > 0 { | ||
| 95 | d.nx = copy(d.x[:], p) | ||
| 96 | } | ||
| 97 | return | ||
| 98 | } | ||
| 99 | |||
| 100 | // Sum - Return sha256 sum in bytes | ||
| 101 | func (d *Avx512Digest) Sum(in []byte) (result []byte) { | ||
| 102 | |||
| 103 | if d.final { | ||
| 104 | return append(in, d.result[:]...) | ||
| 105 | } | ||
| 106 | |||
| 107 | trail := make([]byte, 0, 128) | ||
| 108 | trail = append(trail, d.x[:d.nx]...) | ||
| 109 | |||
| 110 | len := d.len | ||
| 111 | // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. | ||
| 112 | var tmp [64]byte | ||
| 113 | tmp[0] = 0x80 | ||
| 114 | if len%64 < 56 { | ||
| 115 | trail = append(trail, tmp[0:56-len%64]...) | ||
| 116 | } else { | ||
| 117 | trail = append(trail, tmp[0:64+56-len%64]...) | ||
| 118 | } | ||
| 119 | d.nx = 0 | ||
| 120 | |||
| 121 | // Length in bits. | ||
| 122 | len <<= 3 | ||
| 123 | for i := uint(0); i < 8; i++ { | ||
| 124 | tmp[i] = byte(len >> (56 - 8*i)) | ||
| 125 | } | ||
| 126 | trail = append(trail, tmp[0:8]...) | ||
| 127 | |||
| 128 | sumCh := make(chan [Size]byte) | ||
| 129 | d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh} | ||
| 130 | d.result = <-sumCh | ||
| 131 | d.final = true | ||
| 132 | return append(in, d.result[:]...) | ||
| 133 | } | ||
| 134 | |||
| 135 | var table = [512]uint64{ | ||
| 136 | 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, | ||
| 137 | 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, | ||
| 138 | 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, | ||
| 139 | 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, | ||
| 140 | 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, | ||
| 141 | 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, | ||
| 142 | 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, | ||
| 143 | 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, | ||
| 144 | 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, | ||
| 145 | 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, | ||
| 146 | 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, | ||
| 147 | 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, | ||
| 148 | 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, | ||
| 149 | 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, | ||
| 150 | 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, | ||
| 151 | 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, | ||
| 152 | 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, | ||
| 153 | 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, | ||
| 154 | 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, | ||
| 155 | 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, | ||
| 156 | 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, | ||
| 157 | 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, | ||
| 158 | 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, | ||
| 159 | 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, | ||
| 160 | 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, | ||
| 161 | 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, | ||
| 162 | 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, | ||
| 163 | 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, | ||
| 164 | 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, | ||
| 165 | 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, | ||
| 166 | 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, | ||
| 167 | 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, | ||
| 168 | 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, | ||
| 169 | 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, | ||
| 170 | 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, | ||
| 171 | 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, | ||
| 172 | 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, | ||
| 173 | 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, | ||
| 174 | 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, | ||
| 175 | 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, | ||
| 176 | 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, | ||
| 177 | 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, | ||
| 178 | 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, | ||
| 179 | 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, | ||
| 180 | 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, | ||
| 181 | 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, | ||
| 182 | 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, | ||
| 183 | 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, | ||
| 184 | 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, | ||
| 185 | 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, | ||
| 186 | 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, | ||
| 187 | 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, | ||
| 188 | 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, | ||
| 189 | 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, | ||
| 190 | 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, | ||
| 191 | 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, | ||
| 192 | 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, | ||
| 193 | 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, | ||
| 194 | 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, | ||
| 195 | 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, | ||
| 196 | 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, | ||
| 197 | 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, | ||
| 198 | 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, | ||
| 199 | 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, | ||
| 200 | 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, | ||
| 201 | 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, | ||
| 202 | 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, | ||
| 203 | 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, | ||
| 204 | 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, | ||
| 205 | 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, | ||
| 206 | 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, | ||
| 207 | 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, | ||
| 208 | 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, | ||
| 209 | 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, | ||
| 210 | 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, | ||
| 211 | 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, | ||
| 212 | 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, | ||
| 213 | 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, | ||
| 214 | 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, | ||
| 215 | 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, | ||
| 216 | 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, | ||
| 217 | 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, | ||
| 218 | 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, | ||
| 219 | 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, | ||
| 220 | 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, | ||
| 221 | 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, | ||
| 222 | 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, | ||
| 223 | 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, | ||
| 224 | 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, | ||
| 225 | 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, | ||
| 226 | 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, | ||
| 227 | 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, | ||
| 228 | 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, | ||
| 229 | 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, | ||
| 230 | 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, | ||
| 231 | 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, | ||
| 232 | 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, | ||
| 233 | 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, | ||
| 234 | 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, | ||
| 235 | 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, | ||
| 236 | 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, | ||
| 237 | 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, | ||
| 238 | 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, | ||
| 239 | 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, | ||
| 240 | 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, | ||
| 241 | 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, | ||
| 242 | 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, | ||
| 243 | 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, | ||
| 244 | 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, | ||
| 245 | 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, | ||
| 246 | 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, | ||
| 247 | 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, | ||
| 248 | 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, | ||
| 249 | 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, | ||
| 250 | 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, | ||
| 251 | 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, | ||
| 252 | 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, | ||
| 253 | 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, | ||
| 254 | 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, | ||
| 255 | 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, | ||
| 256 | 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, | ||
| 257 | 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, | ||
| 258 | 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, | ||
| 259 | 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, | ||
| 260 | 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, | ||
| 261 | 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, | ||
| 262 | 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, | ||
| 263 | 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2} | ||
| 264 | |||
| 265 | // Interface function to assembly ode | ||
| 266 | func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte { | ||
| 267 | |||
| 268 | scratch := [512]byte{} | ||
| 269 | sha256X16Avx512(digests, &scratch, &table, mask, input) | ||
| 270 | |||
| 271 | output := [16][Size]byte{} | ||
| 272 | for i := 0; i < 16; i++ { | ||
| 273 | output[i] = getDigest(i, digests[:]) | ||
| 274 | } | ||
| 275 | |||
| 276 | return output | ||
| 277 | } | ||
| 278 | |||
| 279 | func getDigest(index int, state []byte) (sum [Size]byte) { | ||
| 280 | for j := 0; j < 16; j += 2 { | ||
| 281 | for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size { | ||
| 282 | binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4])) | ||
| 283 | } | ||
| 284 | } | ||
| 285 | return | ||
| 286 | } | ||
| 287 | |||
| 288 | // Message to send across input channel | ||
| 289 | type blockInput struct { | ||
| 290 | uid uint64 | ||
| 291 | msg []byte | ||
| 292 | reset bool | ||
| 293 | final bool | ||
| 294 | sumCh chan [Size]byte | ||
| 295 | } | ||
| 296 | |||
| 297 | // Avx512Server - Type to implement 16x parallel handling of SHA256 invocations | ||
| 298 | type Avx512Server struct { | ||
| 299 | blocksCh chan blockInput // Input channel | ||
| 300 | totalIn int // Total number of inputs waiting to be processed | ||
| 301 | lanes [16]Avx512LaneInfo // Array with info per lane (out of 16) | ||
| 302 | digests map[uint64][Size]byte // Map of uids to (interim) digest results | ||
| 303 | } | ||
| 304 | |||
| 305 | // Avx512LaneInfo - Info for each lane | ||
| 306 | type Avx512LaneInfo struct { | ||
| 307 | uid uint64 // unique identification for this SHA processing | ||
| 308 | block []byte // input block to be processed | ||
| 309 | outputCh chan [Size]byte // channel for output result | ||
| 310 | } | ||
| 311 | |||
| 312 | // NewAvx512Server - Create new object for parallel processing handling | ||
| 313 | func NewAvx512Server() *Avx512Server { | ||
| 314 | a512srv := &Avx512Server{} | ||
| 315 | a512srv.digests = make(map[uint64][Size]byte) | ||
| 316 | a512srv.blocksCh = make(chan blockInput) | ||
| 317 | |||
| 318 | // Start a single thread for reading from the input channel | ||
| 319 | go a512srv.Process() | ||
| 320 | return a512srv | ||
| 321 | } | ||
| 322 | |||
| 323 | // Process - Sole handler for reading from the input channel | ||
| 324 | func (a512srv *Avx512Server) Process() { | ||
| 325 | for { | ||
| 326 | select { | ||
| 327 | case block := <-a512srv.blocksCh: | ||
| 328 | if block.reset { | ||
| 329 | a512srv.reset(block.uid) | ||
| 330 | continue | ||
| 331 | } | ||
| 332 | index := block.uid & 0xf | ||
| 333 | // fmt.Println("Adding message:", block.uid, index) | ||
| 334 | |||
| 335 | if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs | ||
| 336 | //fmt.Println("Invoking Blocks()") | ||
| 337 | a512srv.blocks() | ||
| 338 | } | ||
| 339 | a512srv.totalIn++ | ||
| 340 | a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg} | ||
| 341 | if block.final { | ||
| 342 | a512srv.lanes[index].outputCh = block.sumCh | ||
| 343 | } | ||
| 344 | if a512srv.totalIn == len(a512srv.lanes) { | ||
| 345 | // fmt.Println("Invoking Blocks() while FULL: ") | ||
| 346 | a512srv.blocks() | ||
| 347 | } | ||
| 348 | |||
| 349 | // TODO: test with larger timeout | ||
| 350 | case <-time.After(1 * time.Microsecond): | ||
| 351 | for _, lane := range a512srv.lanes { | ||
| 352 | if lane.block != nil { // check if there is any input to process | ||
| 353 | // fmt.Println("Invoking Blocks() on TIMEOUT: ") | ||
| 354 | a512srv.blocks() | ||
| 355 | break // we are done | ||
| 356 | } | ||
| 357 | } | ||
| 358 | } | ||
| 359 | } | ||
| 360 | } | ||
| 361 | |||
| 362 | // Do a reset for this calculation | ||
| 363 | func (a512srv *Avx512Server) reset(uid uint64) { | ||
| 364 | |||
| 365 | // Check if there is a message still waiting to be processed (and remove if so) | ||
| 366 | for i, lane := range a512srv.lanes { | ||
| 367 | if lane.uid == uid { | ||
| 368 | if lane.block != nil { | ||
| 369 | a512srv.lanes[i] = Avx512LaneInfo{} // clear message | ||
| 370 | a512srv.totalIn-- | ||
| 371 | } | ||
| 372 | } | ||
| 373 | } | ||
| 374 | |||
| 375 | // Delete entry from hash map | ||
| 376 | delete(a512srv.digests, uid) | ||
| 377 | } | ||
| 378 | |||
| 379 | // Invoke assembly and send results back | ||
| 380 | func (a512srv *Avx512Server) blocks() { | ||
| 381 | |||
| 382 | inputs := [16][]byte{} | ||
| 383 | for i := range inputs { | ||
| 384 | inputs[i] = a512srv.lanes[i].block | ||
| 385 | } | ||
| 386 | |||
| 387 | mask := expandMask(genMask(inputs)) | ||
| 388 | outputs := blockAvx512(a512srv.getDigests(), inputs, mask) | ||
| 389 | |||
| 390 | a512srv.totalIn = 0 | ||
| 391 | for i := 0; i < len(outputs); i++ { | ||
| 392 | uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh | ||
| 393 | a512srv.digests[uid] = outputs[i] | ||
| 394 | a512srv.lanes[i] = Avx512LaneInfo{} | ||
| 395 | |||
| 396 | if outputCh != nil { | ||
| 397 | // Send back result | ||
| 398 | outputCh <- outputs[i] | ||
| 399 | delete(a512srv.digests, uid) // Delete entry from hashmap | ||
| 400 | } | ||
| 401 | } | ||
| 402 | } | ||
| 403 | |||
| 404 | func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) { | ||
| 405 | a512srv.blocksCh <- blockInput{uid: uid, msg: p} | ||
| 406 | return len(p), nil | ||
| 407 | } | ||
| 408 | |||
| 409 | // Sum - return sha256 sum in bytes for a given sum id. | ||
| 410 | func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte { | ||
| 411 | sumCh := make(chan [32]byte) | ||
| 412 | a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh} | ||
| 413 | return <-sumCh | ||
| 414 | } | ||
| 415 | |||
| 416 | func (a512srv *Avx512Server) getDigests() *[512]byte { | ||
| 417 | digests := [512]byte{} | ||
| 418 | for i, lane := range a512srv.lanes { | ||
| 419 | a, ok := a512srv.digests[lane.uid] | ||
| 420 | if ok { | ||
| 421 | binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4])) | ||
| 422 | binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8])) | ||
| 423 | binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12])) | ||
| 424 | binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16])) | ||
| 425 | binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20])) | ||
| 426 | binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24])) | ||
| 427 | binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28])) | ||
| 428 | binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32])) | ||
| 429 | } else { | ||
| 430 | binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0) | ||
| 431 | binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1) | ||
| 432 | binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2) | ||
| 433 | binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3) | ||
| 434 | binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4) | ||
| 435 | binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5) | ||
| 436 | binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6) | ||
| 437 | binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7) | ||
| 438 | } | ||
| 439 | } | ||
| 440 | return &digests | ||
| 441 | } | ||
| 442 | |||
| 443 | // Helper struct for sorting blocks based on length | ||
| 444 | type lane struct { | ||
| 445 | len uint | ||
| 446 | pos uint | ||
| 447 | } | ||
| 448 | |||
| 449 | type lanes []lane | ||
| 450 | |||
| 451 | func (lns lanes) Len() int { return len(lns) } | ||
| 452 | func (lns lanes) Swap(i, j int) { lns[i], lns[j] = lns[j], lns[i] } | ||
| 453 | func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len } | ||
| 454 | |||
| 455 | // Helper struct for | ||
| 456 | type maskRounds struct { | ||
| 457 | mask uint64 | ||
| 458 | rounds uint64 | ||
| 459 | } | ||
| 460 | |||
| 461 | func genMask(input [16][]byte) [16]maskRounds { | ||
| 462 | |||
| 463 | // Sort on blocks length small to large | ||
| 464 | var sorted [16]lane | ||
| 465 | for c, inpt := range input { | ||
| 466 | sorted[c] = lane{uint(len(inpt)), uint(c)} | ||
| 467 | } | ||
| 468 | sort.Sort(lanes(sorted[:])) | ||
| 469 | |||
| 470 | // Create mask array including 'rounds' between masks | ||
| 471 | m, round, index := uint64(0xffff), uint64(0), 0 | ||
| 472 | var mr [16]maskRounds | ||
| 473 | for _, s := range sorted { | ||
| 474 | if s.len > 0 { | ||
| 475 | if uint64(s.len)>>6 > round { | ||
| 476 | mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round} | ||
| 477 | index++ | ||
| 478 | } | ||
| 479 | round = uint64(s.len) >> 6 | ||
| 480 | } | ||
| 481 | m = m & ^(1 << uint(s.pos)) | ||
| 482 | } | ||
| 483 | |||
| 484 | return mr | ||
| 485 | } | ||
| 486 | |||
| 487 | // TODO: remove function | ||
| 488 | func expandMask(mr [16]maskRounds) []uint64 { | ||
| 489 | size := uint64(0) | ||
| 490 | for _, r := range mr { | ||
| 491 | size += r.rounds | ||
| 492 | } | ||
| 493 | result, index := make([]uint64, size), 0 | ||
| 494 | for _, r := range mr { | ||
| 495 | for j := uint64(0); j < r.rounds; j++ { | ||
| 496 | result[index] = r.mask | ||
| 497 | index++ | ||
| 498 | } | ||
| 499 | } | ||
| 500 | return result | ||
| 501 | } | ||