aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/minio/sha256-simd/sha256block_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/minio/sha256-simd/sha256block_amd64.s')
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256block_amd64.s266
1 files changed, 266 insertions, 0 deletions
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.s b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s
new file mode 100644
index 0000000..c98a1d8
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s
@@ -0,0 +1,266 @@
1//+build !noasm,!appengine,gc
2
3// SHA intrinsic version of SHA256
4
5// Kristofer Peterson, (C) 2018.
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10//
11// http://www.apache.org/licenses/LICENSE-2.0
12//
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18//
19
20#include "textflag.h"
21
22DATA K<>+0x00(SB)/4, $0x428a2f98
23DATA K<>+0x04(SB)/4, $0x71374491
24DATA K<>+0x08(SB)/4, $0xb5c0fbcf
25DATA K<>+0x0c(SB)/4, $0xe9b5dba5
26DATA K<>+0x10(SB)/4, $0x3956c25b
27DATA K<>+0x14(SB)/4, $0x59f111f1
28DATA K<>+0x18(SB)/4, $0x923f82a4
29DATA K<>+0x1c(SB)/4, $0xab1c5ed5
30DATA K<>+0x20(SB)/4, $0xd807aa98
31DATA K<>+0x24(SB)/4, $0x12835b01
32DATA K<>+0x28(SB)/4, $0x243185be
33DATA K<>+0x2c(SB)/4, $0x550c7dc3
34DATA K<>+0x30(SB)/4, $0x72be5d74
35DATA K<>+0x34(SB)/4, $0x80deb1fe
36DATA K<>+0x38(SB)/4, $0x9bdc06a7
37DATA K<>+0x3c(SB)/4, $0xc19bf174
38DATA K<>+0x40(SB)/4, $0xe49b69c1
39DATA K<>+0x44(SB)/4, $0xefbe4786
40DATA K<>+0x48(SB)/4, $0x0fc19dc6
41DATA K<>+0x4c(SB)/4, $0x240ca1cc
42DATA K<>+0x50(SB)/4, $0x2de92c6f
43DATA K<>+0x54(SB)/4, $0x4a7484aa
44DATA K<>+0x58(SB)/4, $0x5cb0a9dc
45DATA K<>+0x5c(SB)/4, $0x76f988da
46DATA K<>+0x60(SB)/4, $0x983e5152
47DATA K<>+0x64(SB)/4, $0xa831c66d
48DATA K<>+0x68(SB)/4, $0xb00327c8
49DATA K<>+0x6c(SB)/4, $0xbf597fc7
50DATA K<>+0x70(SB)/4, $0xc6e00bf3
51DATA K<>+0x74(SB)/4, $0xd5a79147
52DATA K<>+0x78(SB)/4, $0x06ca6351
53DATA K<>+0x7c(SB)/4, $0x14292967
54DATA K<>+0x80(SB)/4, $0x27b70a85
55DATA K<>+0x84(SB)/4, $0x2e1b2138
56DATA K<>+0x88(SB)/4, $0x4d2c6dfc
57DATA K<>+0x8c(SB)/4, $0x53380d13
58DATA K<>+0x90(SB)/4, $0x650a7354
59DATA K<>+0x94(SB)/4, $0x766a0abb
60DATA K<>+0x98(SB)/4, $0x81c2c92e
61DATA K<>+0x9c(SB)/4, $0x92722c85
62DATA K<>+0xa0(SB)/4, $0xa2bfe8a1
63DATA K<>+0xa4(SB)/4, $0xa81a664b
64DATA K<>+0xa8(SB)/4, $0xc24b8b70
65DATA K<>+0xac(SB)/4, $0xc76c51a3
66DATA K<>+0xb0(SB)/4, $0xd192e819
67DATA K<>+0xb4(SB)/4, $0xd6990624
68DATA K<>+0xb8(SB)/4, $0xf40e3585
69DATA K<>+0xbc(SB)/4, $0x106aa070
70DATA K<>+0xc0(SB)/4, $0x19a4c116
71DATA K<>+0xc4(SB)/4, $0x1e376c08
72DATA K<>+0xc8(SB)/4, $0x2748774c
73DATA K<>+0xcc(SB)/4, $0x34b0bcb5
74DATA K<>+0xd0(SB)/4, $0x391c0cb3
75DATA K<>+0xd4(SB)/4, $0x4ed8aa4a
76DATA K<>+0xd8(SB)/4, $0x5b9cca4f
77DATA K<>+0xdc(SB)/4, $0x682e6ff3
78DATA K<>+0xe0(SB)/4, $0x748f82ee
79DATA K<>+0xe4(SB)/4, $0x78a5636f
80DATA K<>+0xe8(SB)/4, $0x84c87814
81DATA K<>+0xec(SB)/4, $0x8cc70208
82DATA K<>+0xf0(SB)/4, $0x90befffa
83DATA K<>+0xf4(SB)/4, $0xa4506ceb
84DATA K<>+0xf8(SB)/4, $0xbef9a3f7
85DATA K<>+0xfc(SB)/4, $0xc67178f2
86GLOBL K<>(SB), RODATA|NOPTR, $256
87
88DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203
89DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
90GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16
91
92// Register Usage
93// BX base address of constant table (constant)
94// DX hash_state (constant)
95// SI hash_data.data
96// DI hash_data.data + hash_data.length - 64 (constant)
97// X0 scratch
98// X1 scratch
99// X2 working hash state // ABEF
100// X3 working hash state // CDGH
101// X4 first 16 bytes of block
102// X5 second 16 bytes of block
103// X6 third 16 bytes of block
104// X7 fourth 16 bytes of block
105// X12 saved hash state // ABEF
106// X13 saved hash state // CDGH
107// X15 data shuffle mask (constant)
108
109TEXT ·blockIntelSha(SB), NOSPLIT, $0-32
110 MOVQ h+0(FP), DX
111 MOVQ message_base+8(FP), SI
112 MOVQ message_len+16(FP), DI
113 LEAQ -64(SI)(DI*1), DI
114 MOVOU (DX), X2
115 MOVOU 16(DX), X1
116 MOVO X2, X3
117 PUNPCKLLQ X1, X2
118 PUNPCKHLQ X1, X3
119 PSHUFD $0x27, X2, X2
120 PSHUFD $0x27, X3, X3
121 MOVO SHUF_MASK<>(SB), X15
122 LEAQ K<>(SB), BX
123
124 JMP TEST
125
126LOOP:
127 MOVO X2, X12
128 MOVO X3, X13
129
130 // load block and shuffle
131 MOVOU (SI), X4
132 MOVOU 16(SI), X5
133 MOVOU 32(SI), X6
134 MOVOU 48(SI), X7
135 PSHUFB X15, X4
136 PSHUFB X15, X5
137 PSHUFB X15, X6
138 PSHUFB X15, X7
139
140#define ROUND456 \
141 PADDL X5, X0 \
142 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
143 MOVO X5, X1 \
144 LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4
145 PADDL X1, X6 \
146 LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5
147 PSHUFD $0x4e, X0, X0 \
148 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
149 LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
150
151#define ROUND567 \
152 PADDL X6, X0 \
153 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
154 MOVO X6, X1 \
155 LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4
156 PADDL X1, X7 \
157 LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6
158 PSHUFD $0x4e, X0, X0 \
159 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
160 LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
161
162#define ROUND674 \
163 PADDL X7, X0 \
164 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
165 MOVO X7, X1 \
166 LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4
167 PADDL X1, X4 \
168 LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7
169 PSHUFD $0x4e, X0, X0 \
170 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
171 LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7
172
173#define ROUND745 \
174 PADDL X4, X0 \
175 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
176 MOVO X4, X1 \
177 LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4
178 PADDL X1, X5 \
179 LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4
180 PSHUFD $0x4e, X0, X0 \
181 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
182 LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4
183
184 // rounds 0-3
185 MOVO (BX), X0
186 PADDL X4, X0
187 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
188 PSHUFD $0x4e, X0, X0
189 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
190
191 // rounds 4-7
192 MOVO 1*16(BX), X0
193 PADDL X5, X0
194 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
195 PSHUFD $0x4e, X0, X0
196 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
197 LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
198
199 // rounds 8-11
200 MOVO 2*16(BX), X0
201 PADDL X6, X0
202 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
203 PSHUFD $0x4e, X0, X0
204 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
205 LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
206
207 MOVO 3*16(BX), X0; ROUND674 // rounds 12-15
208 MOVO 4*16(BX), X0; ROUND745 // rounds 16-19
209 MOVO 5*16(BX), X0; ROUND456 // rounds 20-23
210 MOVO 6*16(BX), X0; ROUND567 // rounds 24-27
211 MOVO 7*16(BX), X0; ROUND674 // rounds 28-31
212 MOVO 8*16(BX), X0; ROUND745 // rounds 32-35
213 MOVO 9*16(BX), X0; ROUND456 // rounds 36-39
214 MOVO 10*16(BX), X0; ROUND567 // rounds 40-43
215 MOVO 11*16(BX), X0; ROUND674 // rounds 44-47
216 MOVO 12*16(BX), X0; ROUND745 // rounds 48-51
217
218 // rounds 52-55
219 MOVO 13*16(BX), X0
220 PADDL X5, X0
221 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
222 MOVO X5, X1
223 LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4
224 PADDL X1, X6
225 LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5
226 PSHUFD $0x4e, X0, X0
227 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
228
229 // rounds 56-59
230 MOVO 14*16(BX), X0
231 PADDL X6, X0
232 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
233 MOVO X6, X1
234 LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4
235 PADDL X1, X7
236 LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6
237 PSHUFD $0x4e, X0, X0
238 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
239
240 // rounds 60-63
241 MOVO 15*16(BX), X0
242 PADDL X7, X0
243 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
244 PSHUFD $0x4e, X0, X0
245 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
246
247 PADDL X12, X2
248 PADDL X13, X3
249
250 ADDQ $64, SI
251
252TEST:
253 CMPQ SI, DI
254 JBE LOOP
255
256 PSHUFD $0x4e, X3, X0
257 LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0
258 PSHUFD $0x4e, X2, X1
259 LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f
260 PSHUFD $0x1b, X0, X0
261 PSHUFD $0x1b, X1, X1
262
263 MOVOU X0, (DX)
264 MOVOU X1, 16(DX)
265
266 RET