diff options
Diffstat (limited to 'vendor/golang.org/x/crypto/argon2/blamka_amd64.s')
| -rw-r--r-- | vendor/golang.org/x/crypto/argon2/blamka_amd64.s | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s new file mode 100644 index 0000000..f3b653a --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s | |||
| @@ -0,0 +1,243 @@ | |||
| 1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
| 2 | // Use of this source code is governed by a BSD-style | ||
| 3 | // license that can be found in the LICENSE file. | ||
| 4 | |||
| 5 | //go:build amd64 && gc && !purego | ||
| 6 | |||
| 7 | #include "textflag.h" | ||
| 8 | |||
| 9 | DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 | ||
| 10 | DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b | ||
| 11 | GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 | ||
| 12 | |||
| 13 | DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 | ||
| 14 | DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a | ||
| 15 | GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 | ||
| 16 | |||
| 17 | #define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ | ||
| 18 | MOVO v4, t1; \ | ||
| 19 | MOVO v5, v4; \ | ||
| 20 | MOVO t1, v5; \ | ||
| 21 | MOVO v6, t1; \ | ||
| 22 | PUNPCKLQDQ v6, t2; \ | ||
| 23 | PUNPCKHQDQ v7, v6; \ | ||
| 24 | PUNPCKHQDQ t2, v6; \ | ||
| 25 | PUNPCKLQDQ v7, t2; \ | ||
| 26 | MOVO t1, v7; \ | ||
| 27 | MOVO v2, t1; \ | ||
| 28 | PUNPCKHQDQ t2, v7; \ | ||
| 29 | PUNPCKLQDQ v3, t2; \ | ||
| 30 | PUNPCKHQDQ t2, v2; \ | ||
| 31 | PUNPCKLQDQ t1, t2; \ | ||
| 32 | PUNPCKHQDQ t2, v3 | ||
| 33 | |||
| 34 | #define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ | ||
| 35 | MOVO v4, t1; \ | ||
| 36 | MOVO v5, v4; \ | ||
| 37 | MOVO t1, v5; \ | ||
| 38 | MOVO v2, t1; \ | ||
| 39 | PUNPCKLQDQ v2, t2; \ | ||
| 40 | PUNPCKHQDQ v3, v2; \ | ||
| 41 | PUNPCKHQDQ t2, v2; \ | ||
| 42 | PUNPCKLQDQ v3, t2; \ | ||
| 43 | MOVO t1, v3; \ | ||
| 44 | MOVO v6, t1; \ | ||
| 45 | PUNPCKHQDQ t2, v3; \ | ||
| 46 | PUNPCKLQDQ v7, t2; \ | ||
| 47 | PUNPCKHQDQ t2, v6; \ | ||
| 48 | PUNPCKLQDQ t1, t2; \ | ||
| 49 | PUNPCKHQDQ t2, v7 | ||
| 50 | |||
| 51 | #define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ | ||
| 52 | MOVO v0, t0; \ | ||
| 53 | PMULULQ v2, t0; \ | ||
| 54 | PADDQ v2, v0; \ | ||
| 55 | PADDQ t0, v0; \ | ||
| 56 | PADDQ t0, v0; \ | ||
| 57 | PXOR v0, v6; \ | ||
| 58 | PSHUFD $0xB1, v6, v6; \ | ||
| 59 | MOVO v4, t0; \ | ||
| 60 | PMULULQ v6, t0; \ | ||
| 61 | PADDQ v6, v4; \ | ||
| 62 | PADDQ t0, v4; \ | ||
| 63 | PADDQ t0, v4; \ | ||
| 64 | PXOR v4, v2; \ | ||
| 65 | PSHUFB c40, v2; \ | ||
| 66 | MOVO v0, t0; \ | ||
| 67 | PMULULQ v2, t0; \ | ||
| 68 | PADDQ v2, v0; \ | ||
| 69 | PADDQ t0, v0; \ | ||
| 70 | PADDQ t0, v0; \ | ||
| 71 | PXOR v0, v6; \ | ||
| 72 | PSHUFB c48, v6; \ | ||
| 73 | MOVO v4, t0; \ | ||
| 74 | PMULULQ v6, t0; \ | ||
| 75 | PADDQ v6, v4; \ | ||
| 76 | PADDQ t0, v4; \ | ||
| 77 | PADDQ t0, v4; \ | ||
| 78 | PXOR v4, v2; \ | ||
| 79 | MOVO v2, t0; \ | ||
| 80 | PADDQ v2, t0; \ | ||
| 81 | PSRLQ $63, v2; \ | ||
| 82 | PXOR t0, v2; \ | ||
| 83 | MOVO v1, t0; \ | ||
| 84 | PMULULQ v3, t0; \ | ||
| 85 | PADDQ v3, v1; \ | ||
| 86 | PADDQ t0, v1; \ | ||
| 87 | PADDQ t0, v1; \ | ||
| 88 | PXOR v1, v7; \ | ||
| 89 | PSHUFD $0xB1, v7, v7; \ | ||
| 90 | MOVO v5, t0; \ | ||
| 91 | PMULULQ v7, t0; \ | ||
| 92 | PADDQ v7, v5; \ | ||
| 93 | PADDQ t0, v5; \ | ||
| 94 | PADDQ t0, v5; \ | ||
| 95 | PXOR v5, v3; \ | ||
| 96 | PSHUFB c40, v3; \ | ||
| 97 | MOVO v1, t0; \ | ||
| 98 | PMULULQ v3, t0; \ | ||
| 99 | PADDQ v3, v1; \ | ||
| 100 | PADDQ t0, v1; \ | ||
| 101 | PADDQ t0, v1; \ | ||
| 102 | PXOR v1, v7; \ | ||
| 103 | PSHUFB c48, v7; \ | ||
| 104 | MOVO v5, t0; \ | ||
| 105 | PMULULQ v7, t0; \ | ||
| 106 | PADDQ v7, v5; \ | ||
| 107 | PADDQ t0, v5; \ | ||
| 108 | PADDQ t0, v5; \ | ||
| 109 | PXOR v5, v3; \ | ||
| 110 | MOVO v3, t0; \ | ||
| 111 | PADDQ v3, t0; \ | ||
| 112 | PSRLQ $63, v3; \ | ||
| 113 | PXOR t0, v3 | ||
| 114 | |||
| 115 | #define LOAD_MSG_0(block, off) \ | ||
| 116 | MOVOU 8*(off+0)(block), X0; \ | ||
| 117 | MOVOU 8*(off+2)(block), X1; \ | ||
| 118 | MOVOU 8*(off+4)(block), X2; \ | ||
| 119 | MOVOU 8*(off+6)(block), X3; \ | ||
| 120 | MOVOU 8*(off+8)(block), X4; \ | ||
| 121 | MOVOU 8*(off+10)(block), X5; \ | ||
| 122 | MOVOU 8*(off+12)(block), X6; \ | ||
| 123 | MOVOU 8*(off+14)(block), X7 | ||
| 124 | |||
| 125 | #define STORE_MSG_0(block, off) \ | ||
| 126 | MOVOU X0, 8*(off+0)(block); \ | ||
| 127 | MOVOU X1, 8*(off+2)(block); \ | ||
| 128 | MOVOU X2, 8*(off+4)(block); \ | ||
| 129 | MOVOU X3, 8*(off+6)(block); \ | ||
| 130 | MOVOU X4, 8*(off+8)(block); \ | ||
| 131 | MOVOU X5, 8*(off+10)(block); \ | ||
| 132 | MOVOU X6, 8*(off+12)(block); \ | ||
| 133 | MOVOU X7, 8*(off+14)(block) | ||
| 134 | |||
| 135 | #define LOAD_MSG_1(block, off) \ | ||
| 136 | MOVOU 8*off+0*8(block), X0; \ | ||
| 137 | MOVOU 8*off+16*8(block), X1; \ | ||
| 138 | MOVOU 8*off+32*8(block), X2; \ | ||
| 139 | MOVOU 8*off+48*8(block), X3; \ | ||
| 140 | MOVOU 8*off+64*8(block), X4; \ | ||
| 141 | MOVOU 8*off+80*8(block), X5; \ | ||
| 142 | MOVOU 8*off+96*8(block), X6; \ | ||
| 143 | MOVOU 8*off+112*8(block), X7 | ||
| 144 | |||
| 145 | #define STORE_MSG_1(block, off) \ | ||
| 146 | MOVOU X0, 8*off+0*8(block); \ | ||
| 147 | MOVOU X1, 8*off+16*8(block); \ | ||
| 148 | MOVOU X2, 8*off+32*8(block); \ | ||
| 149 | MOVOU X3, 8*off+48*8(block); \ | ||
| 150 | MOVOU X4, 8*off+64*8(block); \ | ||
| 151 | MOVOU X5, 8*off+80*8(block); \ | ||
| 152 | MOVOU X6, 8*off+96*8(block); \ | ||
| 153 | MOVOU X7, 8*off+112*8(block) | ||
| 154 | |||
| 155 | #define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ | ||
| 156 | LOAD_MSG_0(block, off); \ | ||
| 157 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
| 158 | SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
| 159 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
| 160 | SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
| 161 | STORE_MSG_0(block, off) | ||
| 162 | |||
| 163 | #define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ | ||
| 164 | LOAD_MSG_1(block, off); \ | ||
| 165 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
| 166 | SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
| 167 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
| 168 | SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
| 169 | STORE_MSG_1(block, off) | ||
| 170 | |||
| 171 | // func blamkaSSE4(b *block) | ||
| 172 | TEXT ·blamkaSSE4(SB), 4, $0-8 | ||
| 173 | MOVQ b+0(FP), AX | ||
| 174 | |||
| 175 | MOVOU ·c40<>(SB), X10 | ||
| 176 | MOVOU ·c48<>(SB), X11 | ||
| 177 | |||
| 178 | BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) | ||
| 179 | BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) | ||
| 180 | BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) | ||
| 181 | BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) | ||
| 182 | BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) | ||
| 183 | BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) | ||
| 184 | BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) | ||
| 185 | BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) | ||
| 186 | |||
| 187 | BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) | ||
| 188 | BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) | ||
| 189 | BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) | ||
| 190 | BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) | ||
| 191 | BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) | ||
| 192 | BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) | ||
| 193 | BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) | ||
| 194 | BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) | ||
| 195 | RET | ||
| 196 | |||
| 197 | // func mixBlocksSSE2(out, a, b, c *block) | ||
| 198 | TEXT ·mixBlocksSSE2(SB), 4, $0-32 | ||
| 199 | MOVQ out+0(FP), DX | ||
| 200 | MOVQ a+8(FP), AX | ||
| 201 | MOVQ b+16(FP), BX | ||
| 202 | MOVQ a+24(FP), CX | ||
| 203 | MOVQ $128, BP | ||
| 204 | |||
| 205 | loop: | ||
| 206 | MOVOU 0(AX), X0 | ||
| 207 | MOVOU 0(BX), X1 | ||
| 208 | MOVOU 0(CX), X2 | ||
| 209 | PXOR X1, X0 | ||
| 210 | PXOR X2, X0 | ||
| 211 | MOVOU X0, 0(DX) | ||
| 212 | ADDQ $16, AX | ||
| 213 | ADDQ $16, BX | ||
| 214 | ADDQ $16, CX | ||
| 215 | ADDQ $16, DX | ||
| 216 | SUBQ $2, BP | ||
| 217 | JA loop | ||
| 218 | RET | ||
| 219 | |||
| 220 | // func xorBlocksSSE2(out, a, b, c *block) | ||
| 221 | TEXT ·xorBlocksSSE2(SB), 4, $0-32 | ||
| 222 | MOVQ out+0(FP), DX | ||
| 223 | MOVQ a+8(FP), AX | ||
| 224 | MOVQ b+16(FP), BX | ||
| 225 | MOVQ a+24(FP), CX | ||
| 226 | MOVQ $128, BP | ||
| 227 | |||
| 228 | loop: | ||
| 229 | MOVOU 0(AX), X0 | ||
| 230 | MOVOU 0(BX), X1 | ||
| 231 | MOVOU 0(CX), X2 | ||
| 232 | MOVOU 0(DX), X3 | ||
| 233 | PXOR X1, X0 | ||
| 234 | PXOR X2, X0 | ||
| 235 | PXOR X3, X0 | ||
| 236 | MOVOU X0, 0(DX) | ||
| 237 | ADDQ $16, AX | ||
| 238 | ADDQ $16, BX | ||
| 239 | ADDQ $16, CX | ||
| 240 | ADDQ $16, DX | ||
| 241 | SUBQ $2, BP | ||
| 242 | JA loop | ||
| 243 | RET | ||