diff options
Diffstat (limited to 'vendor/golang.org/x/crypto/argon2/blamka_amd64.s')
-rw-r--r-- | vendor/golang.org/x/crypto/argon2/blamka_amd64.s | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s new file mode 100644 index 0000000..f3b653a --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s | |||
@@ -0,0 +1,243 @@ | |||
1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | //go:build amd64 && gc && !purego | ||
6 | |||
7 | #include "textflag.h" | ||
8 | |||
9 | DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 | ||
10 | DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b | ||
11 | GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 | ||
12 | |||
13 | DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 | ||
14 | DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a | ||
15 | GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 | ||
16 | |||
17 | #define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ | ||
18 | MOVO v4, t1; \ | ||
19 | MOVO v5, v4; \ | ||
20 | MOVO t1, v5; \ | ||
21 | MOVO v6, t1; \ | ||
22 | PUNPCKLQDQ v6, t2; \ | ||
23 | PUNPCKHQDQ v7, v6; \ | ||
24 | PUNPCKHQDQ t2, v6; \ | ||
25 | PUNPCKLQDQ v7, t2; \ | ||
26 | MOVO t1, v7; \ | ||
27 | MOVO v2, t1; \ | ||
28 | PUNPCKHQDQ t2, v7; \ | ||
29 | PUNPCKLQDQ v3, t2; \ | ||
30 | PUNPCKHQDQ t2, v2; \ | ||
31 | PUNPCKLQDQ t1, t2; \ | ||
32 | PUNPCKHQDQ t2, v3 | ||
33 | |||
34 | #define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ | ||
35 | MOVO v4, t1; \ | ||
36 | MOVO v5, v4; \ | ||
37 | MOVO t1, v5; \ | ||
38 | MOVO v2, t1; \ | ||
39 | PUNPCKLQDQ v2, t2; \ | ||
40 | PUNPCKHQDQ v3, v2; \ | ||
41 | PUNPCKHQDQ t2, v2; \ | ||
42 | PUNPCKLQDQ v3, t2; \ | ||
43 | MOVO t1, v3; \ | ||
44 | MOVO v6, t1; \ | ||
45 | PUNPCKHQDQ t2, v3; \ | ||
46 | PUNPCKLQDQ v7, t2; \ | ||
47 | PUNPCKHQDQ t2, v6; \ | ||
48 | PUNPCKLQDQ t1, t2; \ | ||
49 | PUNPCKHQDQ t2, v7 | ||
50 | |||
51 | #define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ | ||
52 | MOVO v0, t0; \ | ||
53 | PMULULQ v2, t0; \ | ||
54 | PADDQ v2, v0; \ | ||
55 | PADDQ t0, v0; \ | ||
56 | PADDQ t0, v0; \ | ||
57 | PXOR v0, v6; \ | ||
58 | PSHUFD $0xB1, v6, v6; \ | ||
59 | MOVO v4, t0; \ | ||
60 | PMULULQ v6, t0; \ | ||
61 | PADDQ v6, v4; \ | ||
62 | PADDQ t0, v4; \ | ||
63 | PADDQ t0, v4; \ | ||
64 | PXOR v4, v2; \ | ||
65 | PSHUFB c40, v2; \ | ||
66 | MOVO v0, t0; \ | ||
67 | PMULULQ v2, t0; \ | ||
68 | PADDQ v2, v0; \ | ||
69 | PADDQ t0, v0; \ | ||
70 | PADDQ t0, v0; \ | ||
71 | PXOR v0, v6; \ | ||
72 | PSHUFB c48, v6; \ | ||
73 | MOVO v4, t0; \ | ||
74 | PMULULQ v6, t0; \ | ||
75 | PADDQ v6, v4; \ | ||
76 | PADDQ t0, v4; \ | ||
77 | PADDQ t0, v4; \ | ||
78 | PXOR v4, v2; \ | ||
79 | MOVO v2, t0; \ | ||
80 | PADDQ v2, t0; \ | ||
81 | PSRLQ $63, v2; \ | ||
82 | PXOR t0, v2; \ | ||
83 | MOVO v1, t0; \ | ||
84 | PMULULQ v3, t0; \ | ||
85 | PADDQ v3, v1; \ | ||
86 | PADDQ t0, v1; \ | ||
87 | PADDQ t0, v1; \ | ||
88 | PXOR v1, v7; \ | ||
89 | PSHUFD $0xB1, v7, v7; \ | ||
90 | MOVO v5, t0; \ | ||
91 | PMULULQ v7, t0; \ | ||
92 | PADDQ v7, v5; \ | ||
93 | PADDQ t0, v5; \ | ||
94 | PADDQ t0, v5; \ | ||
95 | PXOR v5, v3; \ | ||
96 | PSHUFB c40, v3; \ | ||
97 | MOVO v1, t0; \ | ||
98 | PMULULQ v3, t0; \ | ||
99 | PADDQ v3, v1; \ | ||
100 | PADDQ t0, v1; \ | ||
101 | PADDQ t0, v1; \ | ||
102 | PXOR v1, v7; \ | ||
103 | PSHUFB c48, v7; \ | ||
104 | MOVO v5, t0; \ | ||
105 | PMULULQ v7, t0; \ | ||
106 | PADDQ v7, v5; \ | ||
107 | PADDQ t0, v5; \ | ||
108 | PADDQ t0, v5; \ | ||
109 | PXOR v5, v3; \ | ||
110 | MOVO v3, t0; \ | ||
111 | PADDQ v3, t0; \ | ||
112 | PSRLQ $63, v3; \ | ||
113 | PXOR t0, v3 | ||
114 | |||
115 | #define LOAD_MSG_0(block, off) \ | ||
116 | MOVOU 8*(off+0)(block), X0; \ | ||
117 | MOVOU 8*(off+2)(block), X1; \ | ||
118 | MOVOU 8*(off+4)(block), X2; \ | ||
119 | MOVOU 8*(off+6)(block), X3; \ | ||
120 | MOVOU 8*(off+8)(block), X4; \ | ||
121 | MOVOU 8*(off+10)(block), X5; \ | ||
122 | MOVOU 8*(off+12)(block), X6; \ | ||
123 | MOVOU 8*(off+14)(block), X7 | ||
124 | |||
125 | #define STORE_MSG_0(block, off) \ | ||
126 | MOVOU X0, 8*(off+0)(block); \ | ||
127 | MOVOU X1, 8*(off+2)(block); \ | ||
128 | MOVOU X2, 8*(off+4)(block); \ | ||
129 | MOVOU X3, 8*(off+6)(block); \ | ||
130 | MOVOU X4, 8*(off+8)(block); \ | ||
131 | MOVOU X5, 8*(off+10)(block); \ | ||
132 | MOVOU X6, 8*(off+12)(block); \ | ||
133 | MOVOU X7, 8*(off+14)(block) | ||
134 | |||
135 | #define LOAD_MSG_1(block, off) \ | ||
136 | MOVOU 8*off+0*8(block), X0; \ | ||
137 | MOVOU 8*off+16*8(block), X1; \ | ||
138 | MOVOU 8*off+32*8(block), X2; \ | ||
139 | MOVOU 8*off+48*8(block), X3; \ | ||
140 | MOVOU 8*off+64*8(block), X4; \ | ||
141 | MOVOU 8*off+80*8(block), X5; \ | ||
142 | MOVOU 8*off+96*8(block), X6; \ | ||
143 | MOVOU 8*off+112*8(block), X7 | ||
144 | |||
145 | #define STORE_MSG_1(block, off) \ | ||
146 | MOVOU X0, 8*off+0*8(block); \ | ||
147 | MOVOU X1, 8*off+16*8(block); \ | ||
148 | MOVOU X2, 8*off+32*8(block); \ | ||
149 | MOVOU X3, 8*off+48*8(block); \ | ||
150 | MOVOU X4, 8*off+64*8(block); \ | ||
151 | MOVOU X5, 8*off+80*8(block); \ | ||
152 | MOVOU X6, 8*off+96*8(block); \ | ||
153 | MOVOU X7, 8*off+112*8(block) | ||
154 | |||
155 | #define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ | ||
156 | LOAD_MSG_0(block, off); \ | ||
157 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
158 | SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
159 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
160 | SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
161 | STORE_MSG_0(block, off) | ||
162 | |||
163 | #define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ | ||
164 | LOAD_MSG_1(block, off); \ | ||
165 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
166 | SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
167 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
168 | SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
169 | STORE_MSG_1(block, off) | ||
170 | |||
171 | // func blamkaSSE4(b *block) | ||
172 | TEXT ·blamkaSSE4(SB), 4, $0-8 | ||
173 | MOVQ b+0(FP), AX | ||
174 | |||
175 | MOVOU ·c40<>(SB), X10 | ||
176 | MOVOU ·c48<>(SB), X11 | ||
177 | |||
178 | BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) | ||
179 | BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) | ||
180 | BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) | ||
181 | BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) | ||
182 | BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) | ||
183 | BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) | ||
184 | BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) | ||
185 | BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) | ||
186 | |||
187 | BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) | ||
188 | BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) | ||
189 | BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) | ||
190 | BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) | ||
191 | BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) | ||
192 | BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) | ||
193 | BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) | ||
194 | BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) | ||
195 | RET | ||
196 | |||
197 | // func mixBlocksSSE2(out, a, b, c *block) | ||
198 | TEXT ·mixBlocksSSE2(SB), 4, $0-32 | ||
199 | MOVQ out+0(FP), DX | ||
200 | MOVQ a+8(FP), AX | ||
201 | MOVQ b+16(FP), BX | ||
202 | MOVQ a+24(FP), CX | ||
203 | MOVQ $128, BP | ||
204 | |||
205 | loop: | ||
206 | MOVOU 0(AX), X0 | ||
207 | MOVOU 0(BX), X1 | ||
208 | MOVOU 0(CX), X2 | ||
209 | PXOR X1, X0 | ||
210 | PXOR X2, X0 | ||
211 | MOVOU X0, 0(DX) | ||
212 | ADDQ $16, AX | ||
213 | ADDQ $16, BX | ||
214 | ADDQ $16, CX | ||
215 | ADDQ $16, DX | ||
216 | SUBQ $2, BP | ||
217 | JA loop | ||
218 | RET | ||
219 | |||
220 | // func xorBlocksSSE2(out, a, b, c *block) | ||
221 | TEXT ·xorBlocksSSE2(SB), 4, $0-32 | ||
222 | MOVQ out+0(FP), DX | ||
223 | MOVQ a+8(FP), AX | ||
224 | MOVQ b+16(FP), BX | ||
225 | MOVQ a+24(FP), CX | ||
226 | MOVQ $128, BP | ||
227 | |||
228 | loop: | ||
229 | MOVOU 0(AX), X0 | ||
230 | MOVOU 0(BX), X1 | ||
231 | MOVOU 0(CX), X2 | ||
232 | MOVOU 0(DX), X3 | ||
233 | PXOR X1, X0 | ||
234 | PXOR X2, X0 | ||
235 | PXOR X3, X0 | ||
236 | MOVOU X0, 0(DX) | ||
237 | ADDQ $16, AX | ||
238 | ADDQ $16, BX | ||
239 | ADDQ $16, CX | ||
240 | ADDQ $16, DX | ||
241 | SUBQ $2, BP | ||
242 | JA loop | ||
243 | RET | ||