diff options
Diffstat (limited to 'vendor/github.com/minio/sha256-simd/sha256block_amd64.s')
| -rw-r--r-- | vendor/github.com/minio/sha256-simd/sha256block_amd64.s | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.s b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s new file mode 100644 index 0000000..c98a1d8 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | //+build !noasm,!appengine,gc | ||
| 2 | |||
| 3 | // SHA intrinsic version of SHA256 | ||
| 4 | |||
| 5 | // Kristofer Peterson, (C) 2018. | ||
| 6 | // | ||
| 7 | // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 8 | // you may not use this file except in compliance with the License. | ||
| 9 | // You may obtain a copy of the License at | ||
| 10 | // | ||
| 11 | // http://www.apache.org/licenses/LICENSE-2.0 | ||
| 12 | // | ||
| 13 | // Unless required by applicable law or agreed to in writing, software | ||
| 14 | // distributed under the License is distributed on an "AS IS" BASIS, | ||
| 15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 16 | // See the License for the specific language governing permissions and | ||
| 17 | // limitations under the License. | ||
| 18 | // | ||
| 19 | |||
| 20 | #include "textflag.h" | ||
| 21 | |||
| 22 | DATA K<>+0x00(SB)/4, $0x428a2f98 | ||
| 23 | DATA K<>+0x04(SB)/4, $0x71374491 | ||
| 24 | DATA K<>+0x08(SB)/4, $0xb5c0fbcf | ||
| 25 | DATA K<>+0x0c(SB)/4, $0xe9b5dba5 | ||
| 26 | DATA K<>+0x10(SB)/4, $0x3956c25b | ||
| 27 | DATA K<>+0x14(SB)/4, $0x59f111f1 | ||
| 28 | DATA K<>+0x18(SB)/4, $0x923f82a4 | ||
| 29 | DATA K<>+0x1c(SB)/4, $0xab1c5ed5 | ||
| 30 | DATA K<>+0x20(SB)/4, $0xd807aa98 | ||
| 31 | DATA K<>+0x24(SB)/4, $0x12835b01 | ||
| 32 | DATA K<>+0x28(SB)/4, $0x243185be | ||
| 33 | DATA K<>+0x2c(SB)/4, $0x550c7dc3 | ||
| 34 | DATA K<>+0x30(SB)/4, $0x72be5d74 | ||
| 35 | DATA K<>+0x34(SB)/4, $0x80deb1fe | ||
| 36 | DATA K<>+0x38(SB)/4, $0x9bdc06a7 | ||
| 37 | DATA K<>+0x3c(SB)/4, $0xc19bf174 | ||
| 38 | DATA K<>+0x40(SB)/4, $0xe49b69c1 | ||
| 39 | DATA K<>+0x44(SB)/4, $0xefbe4786 | ||
| 40 | DATA K<>+0x48(SB)/4, $0x0fc19dc6 | ||
| 41 | DATA K<>+0x4c(SB)/4, $0x240ca1cc | ||
| 42 | DATA K<>+0x50(SB)/4, $0x2de92c6f | ||
| 43 | DATA K<>+0x54(SB)/4, $0x4a7484aa | ||
| 44 | DATA K<>+0x58(SB)/4, $0x5cb0a9dc | ||
| 45 | DATA K<>+0x5c(SB)/4, $0x76f988da | ||
| 46 | DATA K<>+0x60(SB)/4, $0x983e5152 | ||
| 47 | DATA K<>+0x64(SB)/4, $0xa831c66d | ||
| 48 | DATA K<>+0x68(SB)/4, $0xb00327c8 | ||
| 49 | DATA K<>+0x6c(SB)/4, $0xbf597fc7 | ||
| 50 | DATA K<>+0x70(SB)/4, $0xc6e00bf3 | ||
| 51 | DATA K<>+0x74(SB)/4, $0xd5a79147 | ||
| 52 | DATA K<>+0x78(SB)/4, $0x06ca6351 | ||
| 53 | DATA K<>+0x7c(SB)/4, $0x14292967 | ||
| 54 | DATA K<>+0x80(SB)/4, $0x27b70a85 | ||
| 55 | DATA K<>+0x84(SB)/4, $0x2e1b2138 | ||
| 56 | DATA K<>+0x88(SB)/4, $0x4d2c6dfc | ||
| 57 | DATA K<>+0x8c(SB)/4, $0x53380d13 | ||
| 58 | DATA K<>+0x90(SB)/4, $0x650a7354 | ||
| 59 | DATA K<>+0x94(SB)/4, $0x766a0abb | ||
| 60 | DATA K<>+0x98(SB)/4, $0x81c2c92e | ||
| 61 | DATA K<>+0x9c(SB)/4, $0x92722c85 | ||
| 62 | DATA K<>+0xa0(SB)/4, $0xa2bfe8a1 | ||
| 63 | DATA K<>+0xa4(SB)/4, $0xa81a664b | ||
| 64 | DATA K<>+0xa8(SB)/4, $0xc24b8b70 | ||
| 65 | DATA K<>+0xac(SB)/4, $0xc76c51a3 | ||
| 66 | DATA K<>+0xb0(SB)/4, $0xd192e819 | ||
| 67 | DATA K<>+0xb4(SB)/4, $0xd6990624 | ||
| 68 | DATA K<>+0xb8(SB)/4, $0xf40e3585 | ||
| 69 | DATA K<>+0xbc(SB)/4, $0x106aa070 | ||
| 70 | DATA K<>+0xc0(SB)/4, $0x19a4c116 | ||
| 71 | DATA K<>+0xc4(SB)/4, $0x1e376c08 | ||
| 72 | DATA K<>+0xc8(SB)/4, $0x2748774c | ||
| 73 | DATA K<>+0xcc(SB)/4, $0x34b0bcb5 | ||
| 74 | DATA K<>+0xd0(SB)/4, $0x391c0cb3 | ||
| 75 | DATA K<>+0xd4(SB)/4, $0x4ed8aa4a | ||
| 76 | DATA K<>+0xd8(SB)/4, $0x5b9cca4f | ||
| 77 | DATA K<>+0xdc(SB)/4, $0x682e6ff3 | ||
| 78 | DATA K<>+0xe0(SB)/4, $0x748f82ee | ||
| 79 | DATA K<>+0xe4(SB)/4, $0x78a5636f | ||
| 80 | DATA K<>+0xe8(SB)/4, $0x84c87814 | ||
| 81 | DATA K<>+0xec(SB)/4, $0x8cc70208 | ||
| 82 | DATA K<>+0xf0(SB)/4, $0x90befffa | ||
| 83 | DATA K<>+0xf4(SB)/4, $0xa4506ceb | ||
| 84 | DATA K<>+0xf8(SB)/4, $0xbef9a3f7 | ||
| 85 | DATA K<>+0xfc(SB)/4, $0xc67178f2 | ||
| 86 | GLOBL K<>(SB), RODATA|NOPTR, $256 | ||
| 87 | |||
| 88 | DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203 | ||
| 89 | DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b | ||
| 90 | GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16 | ||
| 91 | |||
| 92 | // Register Usage | ||
| 93 | // BX base address of constant table (constant) | ||
| 94 | // DX hash_state (constant) | ||
| 95 | // SI hash_data.data | ||
| 96 | // DI hash_data.data + hash_data.length - 64 (constant) | ||
| 97 | // X0 scratch | ||
| 98 | // X1 scratch | ||
| 99 | // X2 working hash state // ABEF | ||
| 100 | // X3 working hash state // CDGH | ||
| 101 | // X4 first 16 bytes of block | ||
| 102 | // X5 second 16 bytes of block | ||
| 103 | // X6 third 16 bytes of block | ||
| 104 | // X7 fourth 16 bytes of block | ||
| 105 | // X12 saved hash state // ABEF | ||
| 106 | // X13 saved hash state // CDGH | ||
| 107 | // X15 data shuffle mask (constant) | ||
| 108 | |||
| 109 | TEXT ·blockIntelSha(SB), NOSPLIT, $0-32 | ||
| 110 | MOVQ h+0(FP), DX | ||
| 111 | MOVQ message_base+8(FP), SI | ||
| 112 | MOVQ message_len+16(FP), DI | ||
| 113 | LEAQ -64(SI)(DI*1), DI | ||
| 114 | MOVOU (DX), X2 | ||
| 115 | MOVOU 16(DX), X1 | ||
| 116 | MOVO X2, X3 | ||
| 117 | PUNPCKLLQ X1, X2 | ||
| 118 | PUNPCKHLQ X1, X3 | ||
| 119 | PSHUFD $0x27, X2, X2 | ||
| 120 | PSHUFD $0x27, X3, X3 | ||
| 121 | MOVO SHUF_MASK<>(SB), X15 | ||
| 122 | LEAQ K<>(SB), BX | ||
| 123 | |||
| 124 | JMP TEST | ||
| 125 | |||
| 126 | LOOP: | ||
| 127 | MOVO X2, X12 | ||
| 128 | MOVO X3, X13 | ||
| 129 | |||
| 130 | // load block and shuffle | ||
| 131 | MOVOU (SI), X4 | ||
| 132 | MOVOU 16(SI), X5 | ||
| 133 | MOVOU 32(SI), X6 | ||
| 134 | MOVOU 48(SI), X7 | ||
| 135 | PSHUFB X15, X4 | ||
| 136 | PSHUFB X15, X5 | ||
| 137 | PSHUFB X15, X6 | ||
| 138 | PSHUFB X15, X7 | ||
| 139 | |||
| 140 | #define ROUND456 \ | ||
| 141 | PADDL X5, X0 \ | ||
| 142 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
| 143 | MOVO X5, X1 \ | ||
| 144 | LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4 | ||
| 145 | PADDL X1, X6 \ | ||
| 146 | LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5 | ||
| 147 | PSHUFD $0x4e, X0, X0 \ | ||
| 148 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
| 149 | LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 | ||
| 150 | |||
| 151 | #define ROUND567 \ | ||
| 152 | PADDL X6, X0 \ | ||
| 153 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
| 154 | MOVO X6, X1 \ | ||
| 155 | LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4 | ||
| 156 | PADDL X1, X7 \ | ||
| 157 | LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6 | ||
| 158 | PSHUFD $0x4e, X0, X0 \ | ||
| 159 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
| 160 | LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 | ||
| 161 | |||
| 162 | #define ROUND674 \ | ||
| 163 | PADDL X7, X0 \ | ||
| 164 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
| 165 | MOVO X7, X1 \ | ||
| 166 | LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4 | ||
| 167 | PADDL X1, X4 \ | ||
| 168 | LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7 | ||
| 169 | PSHUFD $0x4e, X0, X0 \ | ||
| 170 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
| 171 | LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7 | ||
| 172 | |||
| 173 | #define ROUND745 \ | ||
| 174 | PADDL X4, X0 \ | ||
| 175 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
| 176 | MOVO X4, X1 \ | ||
| 177 | LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4 | ||
| 178 | PADDL X1, X5 \ | ||
| 179 | LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4 | ||
| 180 | PSHUFD $0x4e, X0, X0 \ | ||
| 181 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
| 182 | LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4 | ||
| 183 | |||
| 184 | // rounds 0-3 | ||
| 185 | MOVO (BX), X0 | ||
| 186 | PADDL X4, X0 | ||
| 187 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
| 188 | PSHUFD $0x4e, X0, X0 | ||
| 189 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
| 190 | |||
| 191 | // rounds 4-7 | ||
| 192 | MOVO 1*16(BX), X0 | ||
| 193 | PADDL X5, X0 | ||
| 194 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
| 195 | PSHUFD $0x4e, X0, X0 | ||
| 196 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
| 197 | LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 | ||
| 198 | |||
| 199 | // rounds 8-11 | ||
| 200 | MOVO 2*16(BX), X0 | ||
| 201 | PADDL X6, X0 | ||
| 202 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
| 203 | PSHUFD $0x4e, X0, X0 | ||
| 204 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
| 205 | LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 | ||
| 206 | |||
| 207 | MOVO 3*16(BX), X0; ROUND674 // rounds 12-15 | ||
| 208 | MOVO 4*16(BX), X0; ROUND745 // rounds 16-19 | ||
| 209 | MOVO 5*16(BX), X0; ROUND456 // rounds 20-23 | ||
| 210 | MOVO 6*16(BX), X0; ROUND567 // rounds 24-27 | ||
| 211 | MOVO 7*16(BX), X0; ROUND674 // rounds 28-31 | ||
| 212 | MOVO 8*16(BX), X0; ROUND745 // rounds 32-35 | ||
| 213 | MOVO 9*16(BX), X0; ROUND456 // rounds 36-39 | ||
| 214 | MOVO 10*16(BX), X0; ROUND567 // rounds 40-43 | ||
| 215 | MOVO 11*16(BX), X0; ROUND674 // rounds 44-47 | ||
| 216 | MOVO 12*16(BX), X0; ROUND745 // rounds 48-51 | ||
| 217 | |||
| 218 | // rounds 52-55 | ||
| 219 | MOVO 13*16(BX), X0 | ||
| 220 | PADDL X5, X0 | ||
| 221 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
| 222 | MOVO X5, X1 | ||
| 223 | LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4 | ||
| 224 | PADDL X1, X6 | ||
| 225 | LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5 | ||
| 226 | PSHUFD $0x4e, X0, X0 | ||
| 227 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
| 228 | |||
| 229 | // rounds 56-59 | ||
| 230 | MOVO 14*16(BX), X0 | ||
| 231 | PADDL X6, X0 | ||
| 232 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
| 233 | MOVO X6, X1 | ||
| 234 | LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4 | ||
| 235 | PADDL X1, X7 | ||
| 236 | LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6 | ||
| 237 | PSHUFD $0x4e, X0, X0 | ||
| 238 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
| 239 | |||
| 240 | // rounds 60-63 | ||
| 241 | MOVO 15*16(BX), X0 | ||
| 242 | PADDL X7, X0 | ||
| 243 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
| 244 | PSHUFD $0x4e, X0, X0 | ||
| 245 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
| 246 | |||
| 247 | PADDL X12, X2 | ||
| 248 | PADDL X13, X3 | ||
| 249 | |||
| 250 | ADDQ $64, SI | ||
| 251 | |||
| 252 | TEST: | ||
| 253 | CMPQ SI, DI | ||
| 254 | JBE LOOP | ||
| 255 | |||
| 256 | PSHUFD $0x4e, X3, X0 | ||
| 257 | LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0 | ||
| 258 | PSHUFD $0x4e, X2, X1 | ||
| 259 | LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f | ||
| 260 | PSHUFD $0x1b, X0, X0 | ||
| 261 | PSHUFD $0x1b, X1, X1 | ||
| 262 | |||
| 263 | MOVOU X0, (DX) | ||
| 264 | MOVOU X1, 16(DX) | ||
| 265 | |||
| 266 | RET | ||