diff options
author | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
---|---|---|
committer | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
commit | 404aeae4545d2426c089a5f8d5e82dae56f5212b (patch) | |
tree | 2d84e00af272b39fc04f3795ae06bc48970e57b5 /vendor/github.com/minio/sha256-simd/sha256block_amd64.s | |
parent | 209d8b0187ed025dec9ac149ebcced3462877bff (diff) | |
download | gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.tar.gz gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.zip |
Make Nix builds work
Diffstat (limited to 'vendor/github.com/minio/sha256-simd/sha256block_amd64.s')
-rw-r--r-- | vendor/github.com/minio/sha256-simd/sha256block_amd64.s | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.s b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s new file mode 100644 index 0000000..c98a1d8 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s | |||
@@ -0,0 +1,266 @@ | |||
1 | //+build !noasm,!appengine,gc | ||
2 | |||
3 | // SHA intrinsic version of SHA256 | ||
4 | |||
5 | // Kristofer Peterson, (C) 2018. | ||
6 | // | ||
7 | // Licensed under the Apache License, Version 2.0 (the "License"); | ||
8 | // you may not use this file except in compliance with the License. | ||
9 | // You may obtain a copy of the License at | ||
10 | // | ||
11 | // http://www.apache.org/licenses/LICENSE-2.0 | ||
12 | // | ||
13 | // Unless required by applicable law or agreed to in writing, software | ||
14 | // distributed under the License is distributed on an "AS IS" BASIS, | ||
15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
16 | // See the License for the specific language governing permissions and | ||
17 | // limitations under the License. | ||
18 | // | ||
19 | |||
20 | #include "textflag.h" | ||
21 | |||
22 | DATA K<>+0x00(SB)/4, $0x428a2f98 | ||
23 | DATA K<>+0x04(SB)/4, $0x71374491 | ||
24 | DATA K<>+0x08(SB)/4, $0xb5c0fbcf | ||
25 | DATA K<>+0x0c(SB)/4, $0xe9b5dba5 | ||
26 | DATA K<>+0x10(SB)/4, $0x3956c25b | ||
27 | DATA K<>+0x14(SB)/4, $0x59f111f1 | ||
28 | DATA K<>+0x18(SB)/4, $0x923f82a4 | ||
29 | DATA K<>+0x1c(SB)/4, $0xab1c5ed5 | ||
30 | DATA K<>+0x20(SB)/4, $0xd807aa98 | ||
31 | DATA K<>+0x24(SB)/4, $0x12835b01 | ||
32 | DATA K<>+0x28(SB)/4, $0x243185be | ||
33 | DATA K<>+0x2c(SB)/4, $0x550c7dc3 | ||
34 | DATA K<>+0x30(SB)/4, $0x72be5d74 | ||
35 | DATA K<>+0x34(SB)/4, $0x80deb1fe | ||
36 | DATA K<>+0x38(SB)/4, $0x9bdc06a7 | ||
37 | DATA K<>+0x3c(SB)/4, $0xc19bf174 | ||
38 | DATA K<>+0x40(SB)/4, $0xe49b69c1 | ||
39 | DATA K<>+0x44(SB)/4, $0xefbe4786 | ||
40 | DATA K<>+0x48(SB)/4, $0x0fc19dc6 | ||
41 | DATA K<>+0x4c(SB)/4, $0x240ca1cc | ||
42 | DATA K<>+0x50(SB)/4, $0x2de92c6f | ||
43 | DATA K<>+0x54(SB)/4, $0x4a7484aa | ||
44 | DATA K<>+0x58(SB)/4, $0x5cb0a9dc | ||
45 | DATA K<>+0x5c(SB)/4, $0x76f988da | ||
46 | DATA K<>+0x60(SB)/4, $0x983e5152 | ||
47 | DATA K<>+0x64(SB)/4, $0xa831c66d | ||
48 | DATA K<>+0x68(SB)/4, $0xb00327c8 | ||
49 | DATA K<>+0x6c(SB)/4, $0xbf597fc7 | ||
50 | DATA K<>+0x70(SB)/4, $0xc6e00bf3 | ||
51 | DATA K<>+0x74(SB)/4, $0xd5a79147 | ||
52 | DATA K<>+0x78(SB)/4, $0x06ca6351 | ||
53 | DATA K<>+0x7c(SB)/4, $0x14292967 | ||
54 | DATA K<>+0x80(SB)/4, $0x27b70a85 | ||
55 | DATA K<>+0x84(SB)/4, $0x2e1b2138 | ||
56 | DATA K<>+0x88(SB)/4, $0x4d2c6dfc | ||
57 | DATA K<>+0x8c(SB)/4, $0x53380d13 | ||
58 | DATA K<>+0x90(SB)/4, $0x650a7354 | ||
59 | DATA K<>+0x94(SB)/4, $0x766a0abb | ||
60 | DATA K<>+0x98(SB)/4, $0x81c2c92e | ||
61 | DATA K<>+0x9c(SB)/4, $0x92722c85 | ||
62 | DATA K<>+0xa0(SB)/4, $0xa2bfe8a1 | ||
63 | DATA K<>+0xa4(SB)/4, $0xa81a664b | ||
64 | DATA K<>+0xa8(SB)/4, $0xc24b8b70 | ||
65 | DATA K<>+0xac(SB)/4, $0xc76c51a3 | ||
66 | DATA K<>+0xb0(SB)/4, $0xd192e819 | ||
67 | DATA K<>+0xb4(SB)/4, $0xd6990624 | ||
68 | DATA K<>+0xb8(SB)/4, $0xf40e3585 | ||
69 | DATA K<>+0xbc(SB)/4, $0x106aa070 | ||
70 | DATA K<>+0xc0(SB)/4, $0x19a4c116 | ||
71 | DATA K<>+0xc4(SB)/4, $0x1e376c08 | ||
72 | DATA K<>+0xc8(SB)/4, $0x2748774c | ||
73 | DATA K<>+0xcc(SB)/4, $0x34b0bcb5 | ||
74 | DATA K<>+0xd0(SB)/4, $0x391c0cb3 | ||
75 | DATA K<>+0xd4(SB)/4, $0x4ed8aa4a | ||
76 | DATA K<>+0xd8(SB)/4, $0x5b9cca4f | ||
77 | DATA K<>+0xdc(SB)/4, $0x682e6ff3 | ||
78 | DATA K<>+0xe0(SB)/4, $0x748f82ee | ||
79 | DATA K<>+0xe4(SB)/4, $0x78a5636f | ||
80 | DATA K<>+0xe8(SB)/4, $0x84c87814 | ||
81 | DATA K<>+0xec(SB)/4, $0x8cc70208 | ||
82 | DATA K<>+0xf0(SB)/4, $0x90befffa | ||
83 | DATA K<>+0xf4(SB)/4, $0xa4506ceb | ||
84 | DATA K<>+0xf8(SB)/4, $0xbef9a3f7 | ||
85 | DATA K<>+0xfc(SB)/4, $0xc67178f2 | ||
86 | GLOBL K<>(SB), RODATA|NOPTR, $256 | ||
87 | |||
88 | DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203 | ||
89 | DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b | ||
90 | GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16 | ||
91 | |||
92 | // Register Usage | ||
93 | // BX base address of constant table (constant) | ||
94 | // DX hash_state (constant) | ||
95 | // SI hash_data.data | ||
96 | // DI hash_data.data + hash_data.length - 64 (constant) | ||
97 | // X0 scratch | ||
98 | // X1 scratch | ||
99 | // X2 working hash state // ABEF | ||
100 | // X3 working hash state // CDGH | ||
101 | // X4 first 16 bytes of block | ||
102 | // X5 second 16 bytes of block | ||
103 | // X6 third 16 bytes of block | ||
104 | // X7 fourth 16 bytes of block | ||
105 | // X12 saved hash state // ABEF | ||
106 | // X13 saved hash state // CDGH | ||
107 | // X15 data shuffle mask (constant) | ||
108 | |||
109 | TEXT ·blockIntelSha(SB), NOSPLIT, $0-32 | ||
110 | MOVQ h+0(FP), DX | ||
111 | MOVQ message_base+8(FP), SI | ||
112 | MOVQ message_len+16(FP), DI | ||
113 | LEAQ -64(SI)(DI*1), DI | ||
114 | MOVOU (DX), X2 | ||
115 | MOVOU 16(DX), X1 | ||
116 | MOVO X2, X3 | ||
117 | PUNPCKLLQ X1, X2 | ||
118 | PUNPCKHLQ X1, X3 | ||
119 | PSHUFD $0x27, X2, X2 | ||
120 | PSHUFD $0x27, X3, X3 | ||
121 | MOVO SHUF_MASK<>(SB), X15 | ||
122 | LEAQ K<>(SB), BX | ||
123 | |||
124 | JMP TEST | ||
125 | |||
126 | LOOP: | ||
127 | MOVO X2, X12 | ||
128 | MOVO X3, X13 | ||
129 | |||
130 | // load block and shuffle | ||
131 | MOVOU (SI), X4 | ||
132 | MOVOU 16(SI), X5 | ||
133 | MOVOU 32(SI), X6 | ||
134 | MOVOU 48(SI), X7 | ||
135 | PSHUFB X15, X4 | ||
136 | PSHUFB X15, X5 | ||
137 | PSHUFB X15, X6 | ||
138 | PSHUFB X15, X7 | ||
139 | |||
140 | #define ROUND456 \ | ||
141 | PADDL X5, X0 \ | ||
142 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
143 | MOVO X5, X1 \ | ||
144 | LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4 | ||
145 | PADDL X1, X6 \ | ||
146 | LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5 | ||
147 | PSHUFD $0x4e, X0, X0 \ | ||
148 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
149 | LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 | ||
150 | |||
151 | #define ROUND567 \ | ||
152 | PADDL X6, X0 \ | ||
153 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
154 | MOVO X6, X1 \ | ||
155 | LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4 | ||
156 | PADDL X1, X7 \ | ||
157 | LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6 | ||
158 | PSHUFD $0x4e, X0, X0 \ | ||
159 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
160 | LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 | ||
161 | |||
162 | #define ROUND674 \ | ||
163 | PADDL X7, X0 \ | ||
164 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
165 | MOVO X7, X1 \ | ||
166 | LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4 | ||
167 | PADDL X1, X4 \ | ||
168 | LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7 | ||
169 | PSHUFD $0x4e, X0, X0 \ | ||
170 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
171 | LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7 | ||
172 | |||
173 | #define ROUND745 \ | ||
174 | PADDL X4, X0 \ | ||
175 | LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2 | ||
176 | MOVO X4, X1 \ | ||
177 | LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4 | ||
178 | PADDL X1, X5 \ | ||
179 | LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4 | ||
180 | PSHUFD $0x4e, X0, X0 \ | ||
181 | LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3 | ||
182 | LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4 | ||
183 | |||
184 | // rounds 0-3 | ||
185 | MOVO (BX), X0 | ||
186 | PADDL X4, X0 | ||
187 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
188 | PSHUFD $0x4e, X0, X0 | ||
189 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
190 | |||
191 | // rounds 4-7 | ||
192 | MOVO 1*16(BX), X0 | ||
193 | PADDL X5, X0 | ||
194 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
195 | PSHUFD $0x4e, X0, X0 | ||
196 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
197 | LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5 | ||
198 | |||
199 | // rounds 8-11 | ||
200 | MOVO 2*16(BX), X0 | ||
201 | PADDL X6, X0 | ||
202 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
203 | PSHUFD $0x4e, X0, X0 | ||
204 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
205 | LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6 | ||
206 | |||
207 | MOVO 3*16(BX), X0; ROUND674 // rounds 12-15 | ||
208 | MOVO 4*16(BX), X0; ROUND745 // rounds 16-19 | ||
209 | MOVO 5*16(BX), X0; ROUND456 // rounds 20-23 | ||
210 | MOVO 6*16(BX), X0; ROUND567 // rounds 24-27 | ||
211 | MOVO 7*16(BX), X0; ROUND674 // rounds 28-31 | ||
212 | MOVO 8*16(BX), X0; ROUND745 // rounds 32-35 | ||
213 | MOVO 9*16(BX), X0; ROUND456 // rounds 36-39 | ||
214 | MOVO 10*16(BX), X0; ROUND567 // rounds 40-43 | ||
215 | MOVO 11*16(BX), X0; ROUND674 // rounds 44-47 | ||
216 | MOVO 12*16(BX), X0; ROUND745 // rounds 48-51 | ||
217 | |||
218 | // rounds 52-55 | ||
219 | MOVO 13*16(BX), X0 | ||
220 | PADDL X5, X0 | ||
221 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
222 | MOVO X5, X1 | ||
223 | LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4 | ||
224 | PADDL X1, X6 | ||
225 | LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5 | ||
226 | PSHUFD $0x4e, X0, X0 | ||
227 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
228 | |||
229 | // rounds 56-59 | ||
230 | MOVO 14*16(BX), X0 | ||
231 | PADDL X6, X0 | ||
232 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
233 | MOVO X6, X1 | ||
234 | LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4 | ||
235 | PADDL X1, X7 | ||
236 | LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6 | ||
237 | PSHUFD $0x4e, X0, X0 | ||
238 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
239 | |||
240 | // rounds 60-63 | ||
241 | MOVO 15*16(BX), X0 | ||
242 | PADDL X7, X0 | ||
243 | LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2 | ||
244 | PSHUFD $0x4e, X0, X0 | ||
245 | LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3 | ||
246 | |||
247 | PADDL X12, X2 | ||
248 | PADDL X13, X3 | ||
249 | |||
250 | ADDQ $64, SI | ||
251 | |||
252 | TEST: | ||
253 | CMPQ SI, DI | ||
254 | JBE LOOP | ||
255 | |||
256 | PSHUFD $0x4e, X3, X0 | ||
257 | LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0 | ||
258 | PSHUFD $0x4e, X2, X1 | ||
259 | LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f | ||
260 | PSHUFD $0x1b, X0, X0 | ||
261 | PSHUFD $0x1b, X1, X1 | ||
262 | |||
263 | MOVOU X0, (DX) | ||
264 | MOVOU X1, 16(DX) | ||
265 | |||
266 | RET | ||