diff options
Diffstat (limited to 'vendor/golang.org/x/crypto/argon2')
-rw-r--r-- | vendor/golang.org/x/crypto/argon2/argon2.go | 283 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/argon2/blake2b.go | 53 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/argon2/blamka_amd64.go | 60 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/argon2/blamka_amd64.s | 243 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/argon2/blamka_generic.go | 163 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/argon2/blamka_ref.go | 15 |
6 files changed, 817 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/argon2/argon2.go b/vendor/golang.org/x/crypto/argon2/argon2.go new file mode 100644 index 0000000..29f0a2d --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/argon2.go | |||
@@ -0,0 +1,283 @@ | |||
1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | // Package argon2 implements the key derivation function Argon2. | ||
6 | // Argon2 was selected as the winner of the Password Hashing Competition and can | ||
7 | // be used to derive cryptographic keys from passwords. | ||
8 | // | ||
9 | // For a detailed specification of Argon2 see [1]. | ||
10 | // | ||
11 | // If you aren't sure which function you need, use Argon2id (IDKey) and | ||
12 | // the parameter recommendations for your scenario. | ||
13 | // | ||
14 | // # Argon2i | ||
15 | // | ||
16 | // Argon2i (implemented by Key) is the side-channel resistant version of Argon2. | ||
17 | // It uses data-independent memory access, which is preferred for password | ||
18 | // hashing and password-based key derivation. Argon2i requires more passes over | ||
19 | // memory than Argon2id to protect from trade-off attacks. The recommended | ||
20 | // parameters (taken from [2]) for non-interactive operations are time=3 and to | ||
21 | // use the maximum available memory. | ||
22 | // | ||
23 | // # Argon2id | ||
24 | // | ||
25 | // Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining | ||
26 | // Argon2i and Argon2d. It uses data-independent memory access for the first | ||
27 | // half of the first iteration over the memory and data-dependent memory access | ||
28 | // for the rest. Argon2id is side-channel resistant and provides better brute- | ||
29 | // force cost savings due to time-memory tradeoffs than Argon2i. The recommended | ||
30 | // parameters for non-interactive operations (taken from [2]) are time=1 and to | ||
31 | // use the maximum available memory. | ||
32 | // | ||
33 | // [1] https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf | ||
34 | // [2] https://tools.ietf.org/html/draft-irtf-cfrg-argon2-03#section-9.3 | ||
35 | package argon2 | ||
36 | |||
37 | import ( | ||
38 | "encoding/binary" | ||
39 | "sync" | ||
40 | |||
41 | "golang.org/x/crypto/blake2b" | ||
42 | ) | ||
43 | |||
44 | // The Argon2 version implemented by this package. | ||
45 | const Version = 0x13 | ||
46 | |||
47 | const ( | ||
48 | argon2d = iota | ||
49 | argon2i | ||
50 | argon2id | ||
51 | ) | ||
52 | |||
53 | // Key derives a key from the password, salt, and cost parameters using Argon2i | ||
54 | // returning a byte slice of length keyLen that can be used as cryptographic | ||
55 | // key. The CPU cost and parallelism degree must be greater than zero. | ||
56 | // | ||
57 | // For example, you can get a derived key for e.g. AES-256 (which needs a | ||
58 | // 32-byte key) by doing: | ||
59 | // | ||
60 | // key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32) | ||
61 | // | ||
62 | // The draft RFC recommends[2] time=3, and memory=32*1024 is a sensible number. | ||
63 | // If using that amount of memory (32 MB) is not possible in some contexts then | ||
64 | // the time parameter can be increased to compensate. | ||
65 | // | ||
66 | // The time parameter specifies the number of passes over the memory and the | ||
67 | // memory parameter specifies the size of the memory in KiB. For example | ||
68 | // memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be | ||
69 | // adjusted to the number of available CPUs. The cost parameters should be | ||
70 | // increased as memory latency and CPU parallelism increases. Remember to get a | ||
71 | // good random salt. | ||
72 | func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { | ||
73 | return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen) | ||
74 | } | ||
75 | |||
76 | // IDKey derives a key from the password, salt, and cost parameters using | ||
77 | // Argon2id returning a byte slice of length keyLen that can be used as | ||
78 | // cryptographic key. The CPU cost and parallelism degree must be greater than | ||
79 | // zero. | ||
80 | // | ||
81 | // For example, you can get a derived key for e.g. AES-256 (which needs a | ||
82 | // 32-byte key) by doing: | ||
83 | // | ||
84 | // key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32) | ||
85 | // | ||
86 | // The draft RFC recommends[2] time=1, and memory=64*1024 is a sensible number. | ||
87 | // If using that amount of memory (64 MB) is not possible in some contexts then | ||
88 | // the time parameter can be increased to compensate. | ||
89 | // | ||
90 | // The time parameter specifies the number of passes over the memory and the | ||
91 | // memory parameter specifies the size of the memory in KiB. For example | ||
92 | // memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be | ||
93 | // adjusted to the numbers of available CPUs. The cost parameters should be | ||
94 | // increased as memory latency and CPU parallelism increases. Remember to get a | ||
95 | // good random salt. | ||
96 | func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { | ||
97 | return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen) | ||
98 | } | ||
99 | |||
100 | func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { | ||
101 | if time < 1 { | ||
102 | panic("argon2: number of rounds too small") | ||
103 | } | ||
104 | if threads < 1 { | ||
105 | panic("argon2: parallelism degree too low") | ||
106 | } | ||
107 | h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode) | ||
108 | |||
109 | memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads)) | ||
110 | if memory < 2*syncPoints*uint32(threads) { | ||
111 | memory = 2 * syncPoints * uint32(threads) | ||
112 | } | ||
113 | B := initBlocks(&h0, memory, uint32(threads)) | ||
114 | processBlocks(B, time, memory, uint32(threads), mode) | ||
115 | return extractKey(B, memory, uint32(threads), keyLen) | ||
116 | } | ||
117 | |||
118 | const ( | ||
119 | blockLength = 128 | ||
120 | syncPoints = 4 | ||
121 | ) | ||
122 | |||
123 | type block [blockLength]uint64 | ||
124 | |||
125 | func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte { | ||
126 | var ( | ||
127 | h0 [blake2b.Size + 8]byte | ||
128 | params [24]byte | ||
129 | tmp [4]byte | ||
130 | ) | ||
131 | |||
132 | b2, _ := blake2b.New512(nil) | ||
133 | binary.LittleEndian.PutUint32(params[0:4], threads) | ||
134 | binary.LittleEndian.PutUint32(params[4:8], keyLen) | ||
135 | binary.LittleEndian.PutUint32(params[8:12], memory) | ||
136 | binary.LittleEndian.PutUint32(params[12:16], time) | ||
137 | binary.LittleEndian.PutUint32(params[16:20], uint32(Version)) | ||
138 | binary.LittleEndian.PutUint32(params[20:24], uint32(mode)) | ||
139 | b2.Write(params[:]) | ||
140 | binary.LittleEndian.PutUint32(tmp[:], uint32(len(password))) | ||
141 | b2.Write(tmp[:]) | ||
142 | b2.Write(password) | ||
143 | binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt))) | ||
144 | b2.Write(tmp[:]) | ||
145 | b2.Write(salt) | ||
146 | binary.LittleEndian.PutUint32(tmp[:], uint32(len(key))) | ||
147 | b2.Write(tmp[:]) | ||
148 | b2.Write(key) | ||
149 | binary.LittleEndian.PutUint32(tmp[:], uint32(len(data))) | ||
150 | b2.Write(tmp[:]) | ||
151 | b2.Write(data) | ||
152 | b2.Sum(h0[:0]) | ||
153 | return h0 | ||
154 | } | ||
155 | |||
156 | func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block { | ||
157 | var block0 [1024]byte | ||
158 | B := make([]block, memory) | ||
159 | for lane := uint32(0); lane < threads; lane++ { | ||
160 | j := lane * (memory / threads) | ||
161 | binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane) | ||
162 | |||
163 | binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0) | ||
164 | blake2bHash(block0[:], h0[:]) | ||
165 | for i := range B[j+0] { | ||
166 | B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:]) | ||
167 | } | ||
168 | |||
169 | binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1) | ||
170 | blake2bHash(block0[:], h0[:]) | ||
171 | for i := range B[j+1] { | ||
172 | B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:]) | ||
173 | } | ||
174 | } | ||
175 | return B | ||
176 | } | ||
177 | |||
178 | func processBlocks(B []block, time, memory, threads uint32, mode int) { | ||
179 | lanes := memory / threads | ||
180 | segments := lanes / syncPoints | ||
181 | |||
182 | processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) { | ||
183 | var addresses, in, zero block | ||
184 | if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { | ||
185 | in[0] = uint64(n) | ||
186 | in[1] = uint64(lane) | ||
187 | in[2] = uint64(slice) | ||
188 | in[3] = uint64(memory) | ||
189 | in[4] = uint64(time) | ||
190 | in[5] = uint64(mode) | ||
191 | } | ||
192 | |||
193 | index := uint32(0) | ||
194 | if n == 0 && slice == 0 { | ||
195 | index = 2 // we have already generated the first two blocks | ||
196 | if mode == argon2i || mode == argon2id { | ||
197 | in[6]++ | ||
198 | processBlock(&addresses, &in, &zero) | ||
199 | processBlock(&addresses, &addresses, &zero) | ||
200 | } | ||
201 | } | ||
202 | |||
203 | offset := lane*lanes + slice*segments + index | ||
204 | var random uint64 | ||
205 | for index < segments { | ||
206 | prev := offset - 1 | ||
207 | if index == 0 && slice == 0 { | ||
208 | prev += lanes // last block in lane | ||
209 | } | ||
210 | if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { | ||
211 | if index%blockLength == 0 { | ||
212 | in[6]++ | ||
213 | processBlock(&addresses, &in, &zero) | ||
214 | processBlock(&addresses, &addresses, &zero) | ||
215 | } | ||
216 | random = addresses[index%blockLength] | ||
217 | } else { | ||
218 | random = B[prev][0] | ||
219 | } | ||
220 | newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index) | ||
221 | processBlockXOR(&B[offset], &B[prev], &B[newOffset]) | ||
222 | index, offset = index+1, offset+1 | ||
223 | } | ||
224 | wg.Done() | ||
225 | } | ||
226 | |||
227 | for n := uint32(0); n < time; n++ { | ||
228 | for slice := uint32(0); slice < syncPoints; slice++ { | ||
229 | var wg sync.WaitGroup | ||
230 | for lane := uint32(0); lane < threads; lane++ { | ||
231 | wg.Add(1) | ||
232 | go processSegment(n, slice, lane, &wg) | ||
233 | } | ||
234 | wg.Wait() | ||
235 | } | ||
236 | } | ||
237 | |||
238 | } | ||
239 | |||
240 | func extractKey(B []block, memory, threads, keyLen uint32) []byte { | ||
241 | lanes := memory / threads | ||
242 | for lane := uint32(0); lane < threads-1; lane++ { | ||
243 | for i, v := range B[(lane*lanes)+lanes-1] { | ||
244 | B[memory-1][i] ^= v | ||
245 | } | ||
246 | } | ||
247 | |||
248 | var block [1024]byte | ||
249 | for i, v := range B[memory-1] { | ||
250 | binary.LittleEndian.PutUint64(block[i*8:], v) | ||
251 | } | ||
252 | key := make([]byte, keyLen) | ||
253 | blake2bHash(key, block[:]) | ||
254 | return key | ||
255 | } | ||
256 | |||
257 | func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 { | ||
258 | refLane := uint32(rand>>32) % threads | ||
259 | if n == 0 && slice == 0 { | ||
260 | refLane = lane | ||
261 | } | ||
262 | m, s := 3*segments, ((slice+1)%syncPoints)*segments | ||
263 | if lane == refLane { | ||
264 | m += index | ||
265 | } | ||
266 | if n == 0 { | ||
267 | m, s = slice*segments, 0 | ||
268 | if slice == 0 || lane == refLane { | ||
269 | m += index | ||
270 | } | ||
271 | } | ||
272 | if index == 0 || lane == refLane { | ||
273 | m-- | ||
274 | } | ||
275 | return phi(rand, uint64(m), uint64(s), refLane, lanes) | ||
276 | } | ||
277 | |||
278 | func phi(rand, m, s uint64, lane, lanes uint32) uint32 { | ||
279 | p := rand & 0xFFFFFFFF | ||
280 | p = (p * p) >> 32 | ||
281 | p = (p * m) >> 32 | ||
282 | return lane*lanes + uint32((s+m-(p+1))%uint64(lanes)) | ||
283 | } | ||
diff --git a/vendor/golang.org/x/crypto/argon2/blake2b.go b/vendor/golang.org/x/crypto/argon2/blake2b.go new file mode 100644 index 0000000..10f4694 --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/blake2b.go | |||
@@ -0,0 +1,53 @@ | |||
1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | package argon2 | ||
6 | |||
7 | import ( | ||
8 | "encoding/binary" | ||
9 | "hash" | ||
10 | |||
11 | "golang.org/x/crypto/blake2b" | ||
12 | ) | ||
13 | |||
14 | // blake2bHash computes an arbitrary long hash value of in | ||
15 | // and writes the hash to out. | ||
16 | func blake2bHash(out []byte, in []byte) { | ||
17 | var b2 hash.Hash | ||
18 | if n := len(out); n < blake2b.Size { | ||
19 | b2, _ = blake2b.New(n, nil) | ||
20 | } else { | ||
21 | b2, _ = blake2b.New512(nil) | ||
22 | } | ||
23 | |||
24 | var buffer [blake2b.Size]byte | ||
25 | binary.LittleEndian.PutUint32(buffer[:4], uint32(len(out))) | ||
26 | b2.Write(buffer[:4]) | ||
27 | b2.Write(in) | ||
28 | |||
29 | if len(out) <= blake2b.Size { | ||
30 | b2.Sum(out[:0]) | ||
31 | return | ||
32 | } | ||
33 | |||
34 | outLen := len(out) | ||
35 | b2.Sum(buffer[:0]) | ||
36 | b2.Reset() | ||
37 | copy(out, buffer[:32]) | ||
38 | out = out[32:] | ||
39 | for len(out) > blake2b.Size { | ||
40 | b2.Write(buffer[:]) | ||
41 | b2.Sum(buffer[:0]) | ||
42 | copy(out, buffer[:32]) | ||
43 | out = out[32:] | ||
44 | b2.Reset() | ||
45 | } | ||
46 | |||
47 | if outLen%blake2b.Size > 0 { // outLen > 64 | ||
48 | r := ((outLen + 31) / 32) - 2 // ⌈τ /32⌉-2 | ||
49 | b2, _ = blake2b.New(outLen-32*r, nil) | ||
50 | } | ||
51 | b2.Write(buffer[:]) | ||
52 | b2.Sum(out[:0]) | ||
53 | } | ||
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.go b/vendor/golang.org/x/crypto/argon2/blamka_amd64.go new file mode 100644 index 0000000..063e778 --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.go | |||
@@ -0,0 +1,60 @@ | |||
1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | //go:build amd64 && gc && !purego | ||
6 | |||
7 | package argon2 | ||
8 | |||
9 | import "golang.org/x/sys/cpu" | ||
10 | |||
11 | func init() { | ||
12 | useSSE4 = cpu.X86.HasSSE41 | ||
13 | } | ||
14 | |||
15 | //go:noescape | ||
16 | func mixBlocksSSE2(out, a, b, c *block) | ||
17 | |||
18 | //go:noescape | ||
19 | func xorBlocksSSE2(out, a, b, c *block) | ||
20 | |||
21 | //go:noescape | ||
22 | func blamkaSSE4(b *block) | ||
23 | |||
24 | func processBlockSSE(out, in1, in2 *block, xor bool) { | ||
25 | var t block | ||
26 | mixBlocksSSE2(&t, in1, in2, &t) | ||
27 | if useSSE4 { | ||
28 | blamkaSSE4(&t) | ||
29 | } else { | ||
30 | for i := 0; i < blockLength; i += 16 { | ||
31 | blamkaGeneric( | ||
32 | &t[i+0], &t[i+1], &t[i+2], &t[i+3], | ||
33 | &t[i+4], &t[i+5], &t[i+6], &t[i+7], | ||
34 | &t[i+8], &t[i+9], &t[i+10], &t[i+11], | ||
35 | &t[i+12], &t[i+13], &t[i+14], &t[i+15], | ||
36 | ) | ||
37 | } | ||
38 | for i := 0; i < blockLength/8; i += 2 { | ||
39 | blamkaGeneric( | ||
40 | &t[i], &t[i+1], &t[16+i], &t[16+i+1], | ||
41 | &t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1], | ||
42 | &t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1], | ||
43 | &t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1], | ||
44 | ) | ||
45 | } | ||
46 | } | ||
47 | if xor { | ||
48 | xorBlocksSSE2(out, in1, in2, &t) | ||
49 | } else { | ||
50 | mixBlocksSSE2(out, in1, in2, &t) | ||
51 | } | ||
52 | } | ||
53 | |||
54 | func processBlock(out, in1, in2 *block) { | ||
55 | processBlockSSE(out, in1, in2, false) | ||
56 | } | ||
57 | |||
58 | func processBlockXOR(out, in1, in2 *block) { | ||
59 | processBlockSSE(out, in1, in2, true) | ||
60 | } | ||
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s new file mode 100644 index 0000000..f3b653a --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s | |||
@@ -0,0 +1,243 @@ | |||
1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | //go:build amd64 && gc && !purego | ||
6 | |||
7 | #include "textflag.h" | ||
8 | |||
9 | DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 | ||
10 | DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b | ||
11 | GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 | ||
12 | |||
13 | DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 | ||
14 | DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a | ||
15 | GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 | ||
16 | |||
17 | #define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ | ||
18 | MOVO v4, t1; \ | ||
19 | MOVO v5, v4; \ | ||
20 | MOVO t1, v5; \ | ||
21 | MOVO v6, t1; \ | ||
22 | PUNPCKLQDQ v6, t2; \ | ||
23 | PUNPCKHQDQ v7, v6; \ | ||
24 | PUNPCKHQDQ t2, v6; \ | ||
25 | PUNPCKLQDQ v7, t2; \ | ||
26 | MOVO t1, v7; \ | ||
27 | MOVO v2, t1; \ | ||
28 | PUNPCKHQDQ t2, v7; \ | ||
29 | PUNPCKLQDQ v3, t2; \ | ||
30 | PUNPCKHQDQ t2, v2; \ | ||
31 | PUNPCKLQDQ t1, t2; \ | ||
32 | PUNPCKHQDQ t2, v3 | ||
33 | |||
34 | #define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ | ||
35 | MOVO v4, t1; \ | ||
36 | MOVO v5, v4; \ | ||
37 | MOVO t1, v5; \ | ||
38 | MOVO v2, t1; \ | ||
39 | PUNPCKLQDQ v2, t2; \ | ||
40 | PUNPCKHQDQ v3, v2; \ | ||
41 | PUNPCKHQDQ t2, v2; \ | ||
42 | PUNPCKLQDQ v3, t2; \ | ||
43 | MOVO t1, v3; \ | ||
44 | MOVO v6, t1; \ | ||
45 | PUNPCKHQDQ t2, v3; \ | ||
46 | PUNPCKLQDQ v7, t2; \ | ||
47 | PUNPCKHQDQ t2, v6; \ | ||
48 | PUNPCKLQDQ t1, t2; \ | ||
49 | PUNPCKHQDQ t2, v7 | ||
50 | |||
51 | #define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ | ||
52 | MOVO v0, t0; \ | ||
53 | PMULULQ v2, t0; \ | ||
54 | PADDQ v2, v0; \ | ||
55 | PADDQ t0, v0; \ | ||
56 | PADDQ t0, v0; \ | ||
57 | PXOR v0, v6; \ | ||
58 | PSHUFD $0xB1, v6, v6; \ | ||
59 | MOVO v4, t0; \ | ||
60 | PMULULQ v6, t0; \ | ||
61 | PADDQ v6, v4; \ | ||
62 | PADDQ t0, v4; \ | ||
63 | PADDQ t0, v4; \ | ||
64 | PXOR v4, v2; \ | ||
65 | PSHUFB c40, v2; \ | ||
66 | MOVO v0, t0; \ | ||
67 | PMULULQ v2, t0; \ | ||
68 | PADDQ v2, v0; \ | ||
69 | PADDQ t0, v0; \ | ||
70 | PADDQ t0, v0; \ | ||
71 | PXOR v0, v6; \ | ||
72 | PSHUFB c48, v6; \ | ||
73 | MOVO v4, t0; \ | ||
74 | PMULULQ v6, t0; \ | ||
75 | PADDQ v6, v4; \ | ||
76 | PADDQ t0, v4; \ | ||
77 | PADDQ t0, v4; \ | ||
78 | PXOR v4, v2; \ | ||
79 | MOVO v2, t0; \ | ||
80 | PADDQ v2, t0; \ | ||
81 | PSRLQ $63, v2; \ | ||
82 | PXOR t0, v2; \ | ||
83 | MOVO v1, t0; \ | ||
84 | PMULULQ v3, t0; \ | ||
85 | PADDQ v3, v1; \ | ||
86 | PADDQ t0, v1; \ | ||
87 | PADDQ t0, v1; \ | ||
88 | PXOR v1, v7; \ | ||
89 | PSHUFD $0xB1, v7, v7; \ | ||
90 | MOVO v5, t0; \ | ||
91 | PMULULQ v7, t0; \ | ||
92 | PADDQ v7, v5; \ | ||
93 | PADDQ t0, v5; \ | ||
94 | PADDQ t0, v5; \ | ||
95 | PXOR v5, v3; \ | ||
96 | PSHUFB c40, v3; \ | ||
97 | MOVO v1, t0; \ | ||
98 | PMULULQ v3, t0; \ | ||
99 | PADDQ v3, v1; \ | ||
100 | PADDQ t0, v1; \ | ||
101 | PADDQ t0, v1; \ | ||
102 | PXOR v1, v7; \ | ||
103 | PSHUFB c48, v7; \ | ||
104 | MOVO v5, t0; \ | ||
105 | PMULULQ v7, t0; \ | ||
106 | PADDQ v7, v5; \ | ||
107 | PADDQ t0, v5; \ | ||
108 | PADDQ t0, v5; \ | ||
109 | PXOR v5, v3; \ | ||
110 | MOVO v3, t0; \ | ||
111 | PADDQ v3, t0; \ | ||
112 | PSRLQ $63, v3; \ | ||
113 | PXOR t0, v3 | ||
114 | |||
115 | #define LOAD_MSG_0(block, off) \ | ||
116 | MOVOU 8*(off+0)(block), X0; \ | ||
117 | MOVOU 8*(off+2)(block), X1; \ | ||
118 | MOVOU 8*(off+4)(block), X2; \ | ||
119 | MOVOU 8*(off+6)(block), X3; \ | ||
120 | MOVOU 8*(off+8)(block), X4; \ | ||
121 | MOVOU 8*(off+10)(block), X5; \ | ||
122 | MOVOU 8*(off+12)(block), X6; \ | ||
123 | MOVOU 8*(off+14)(block), X7 | ||
124 | |||
125 | #define STORE_MSG_0(block, off) \ | ||
126 | MOVOU X0, 8*(off+0)(block); \ | ||
127 | MOVOU X1, 8*(off+2)(block); \ | ||
128 | MOVOU X2, 8*(off+4)(block); \ | ||
129 | MOVOU X3, 8*(off+6)(block); \ | ||
130 | MOVOU X4, 8*(off+8)(block); \ | ||
131 | MOVOU X5, 8*(off+10)(block); \ | ||
132 | MOVOU X6, 8*(off+12)(block); \ | ||
133 | MOVOU X7, 8*(off+14)(block) | ||
134 | |||
135 | #define LOAD_MSG_1(block, off) \ | ||
136 | MOVOU 8*off+0*8(block), X0; \ | ||
137 | MOVOU 8*off+16*8(block), X1; \ | ||
138 | MOVOU 8*off+32*8(block), X2; \ | ||
139 | MOVOU 8*off+48*8(block), X3; \ | ||
140 | MOVOU 8*off+64*8(block), X4; \ | ||
141 | MOVOU 8*off+80*8(block), X5; \ | ||
142 | MOVOU 8*off+96*8(block), X6; \ | ||
143 | MOVOU 8*off+112*8(block), X7 | ||
144 | |||
145 | #define STORE_MSG_1(block, off) \ | ||
146 | MOVOU X0, 8*off+0*8(block); \ | ||
147 | MOVOU X1, 8*off+16*8(block); \ | ||
148 | MOVOU X2, 8*off+32*8(block); \ | ||
149 | MOVOU X3, 8*off+48*8(block); \ | ||
150 | MOVOU X4, 8*off+64*8(block); \ | ||
151 | MOVOU X5, 8*off+80*8(block); \ | ||
152 | MOVOU X6, 8*off+96*8(block); \ | ||
153 | MOVOU X7, 8*off+112*8(block) | ||
154 | |||
155 | #define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ | ||
156 | LOAD_MSG_0(block, off); \ | ||
157 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
158 | SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
159 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
160 | SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
161 | STORE_MSG_0(block, off) | ||
162 | |||
163 | #define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ | ||
164 | LOAD_MSG_1(block, off); \ | ||
165 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
166 | SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
167 | HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ | ||
168 | SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ | ||
169 | STORE_MSG_1(block, off) | ||
170 | |||
171 | // func blamkaSSE4(b *block) | ||
172 | TEXT ·blamkaSSE4(SB), 4, $0-8 | ||
173 | MOVQ b+0(FP), AX | ||
174 | |||
175 | MOVOU ·c40<>(SB), X10 | ||
176 | MOVOU ·c48<>(SB), X11 | ||
177 | |||
178 | BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) | ||
179 | BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) | ||
180 | BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) | ||
181 | BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) | ||
182 | BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) | ||
183 | BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) | ||
184 | BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) | ||
185 | BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) | ||
186 | |||
187 | BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) | ||
188 | BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) | ||
189 | BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) | ||
190 | BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) | ||
191 | BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) | ||
192 | BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) | ||
193 | BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) | ||
194 | BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) | ||
195 | RET | ||
196 | |||
197 | // func mixBlocksSSE2(out, a, b, c *block) | ||
198 | TEXT ·mixBlocksSSE2(SB), 4, $0-32 | ||
199 | MOVQ out+0(FP), DX | ||
200 | MOVQ a+8(FP), AX | ||
201 | MOVQ b+16(FP), BX | ||
202 | MOVQ a+24(FP), CX | ||
203 | MOVQ $128, BP | ||
204 | |||
205 | loop: | ||
206 | MOVOU 0(AX), X0 | ||
207 | MOVOU 0(BX), X1 | ||
208 | MOVOU 0(CX), X2 | ||
209 | PXOR X1, X0 | ||
210 | PXOR X2, X0 | ||
211 | MOVOU X0, 0(DX) | ||
212 | ADDQ $16, AX | ||
213 | ADDQ $16, BX | ||
214 | ADDQ $16, CX | ||
215 | ADDQ $16, DX | ||
216 | SUBQ $2, BP | ||
217 | JA loop | ||
218 | RET | ||
219 | |||
220 | // func xorBlocksSSE2(out, a, b, c *block) | ||
221 | TEXT ·xorBlocksSSE2(SB), 4, $0-32 | ||
222 | MOVQ out+0(FP), DX | ||
223 | MOVQ a+8(FP), AX | ||
224 | MOVQ b+16(FP), BX | ||
225 | MOVQ a+24(FP), CX | ||
226 | MOVQ $128, BP | ||
227 | |||
228 | loop: | ||
229 | MOVOU 0(AX), X0 | ||
230 | MOVOU 0(BX), X1 | ||
231 | MOVOU 0(CX), X2 | ||
232 | MOVOU 0(DX), X3 | ||
233 | PXOR X1, X0 | ||
234 | PXOR X2, X0 | ||
235 | PXOR X3, X0 | ||
236 | MOVOU X0, 0(DX) | ||
237 | ADDQ $16, AX | ||
238 | ADDQ $16, BX | ||
239 | ADDQ $16, CX | ||
240 | ADDQ $16, DX | ||
241 | SUBQ $2, BP | ||
242 | JA loop | ||
243 | RET | ||
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_generic.go b/vendor/golang.org/x/crypto/argon2/blamka_generic.go new file mode 100644 index 0000000..a481b22 --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/blamka_generic.go | |||
@@ -0,0 +1,163 @@ | |||
1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | package argon2 | ||
6 | |||
7 | var useSSE4 bool | ||
8 | |||
9 | func processBlockGeneric(out, in1, in2 *block, xor bool) { | ||
10 | var t block | ||
11 | for i := range t { | ||
12 | t[i] = in1[i] ^ in2[i] | ||
13 | } | ||
14 | for i := 0; i < blockLength; i += 16 { | ||
15 | blamkaGeneric( | ||
16 | &t[i+0], &t[i+1], &t[i+2], &t[i+3], | ||
17 | &t[i+4], &t[i+5], &t[i+6], &t[i+7], | ||
18 | &t[i+8], &t[i+9], &t[i+10], &t[i+11], | ||
19 | &t[i+12], &t[i+13], &t[i+14], &t[i+15], | ||
20 | ) | ||
21 | } | ||
22 | for i := 0; i < blockLength/8; i += 2 { | ||
23 | blamkaGeneric( | ||
24 | &t[i], &t[i+1], &t[16+i], &t[16+i+1], | ||
25 | &t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1], | ||
26 | &t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1], | ||
27 | &t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1], | ||
28 | ) | ||
29 | } | ||
30 | if xor { | ||
31 | for i := range t { | ||
32 | out[i] ^= in1[i] ^ in2[i] ^ t[i] | ||
33 | } | ||
34 | } else { | ||
35 | for i := range t { | ||
36 | out[i] = in1[i] ^ in2[i] ^ t[i] | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | |||
41 | func blamkaGeneric(t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15 *uint64) { | ||
42 | v00, v01, v02, v03 := *t00, *t01, *t02, *t03 | ||
43 | v04, v05, v06, v07 := *t04, *t05, *t06, *t07 | ||
44 | v08, v09, v10, v11 := *t08, *t09, *t10, *t11 | ||
45 | v12, v13, v14, v15 := *t12, *t13, *t14, *t15 | ||
46 | |||
47 | v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04)) | ||
48 | v12 ^= v00 | ||
49 | v12 = v12>>32 | v12<<32 | ||
50 | v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12)) | ||
51 | v04 ^= v08 | ||
52 | v04 = v04>>24 | v04<<40 | ||
53 | |||
54 | v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04)) | ||
55 | v12 ^= v00 | ||
56 | v12 = v12>>16 | v12<<48 | ||
57 | v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12)) | ||
58 | v04 ^= v08 | ||
59 | v04 = v04>>63 | v04<<1 | ||
60 | |||
61 | v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05)) | ||
62 | v13 ^= v01 | ||
63 | v13 = v13>>32 | v13<<32 | ||
64 | v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13)) | ||
65 | v05 ^= v09 | ||
66 | v05 = v05>>24 | v05<<40 | ||
67 | |||
68 | v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05)) | ||
69 | v13 ^= v01 | ||
70 | v13 = v13>>16 | v13<<48 | ||
71 | v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13)) | ||
72 | v05 ^= v09 | ||
73 | v05 = v05>>63 | v05<<1 | ||
74 | |||
75 | v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06)) | ||
76 | v14 ^= v02 | ||
77 | v14 = v14>>32 | v14<<32 | ||
78 | v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14)) | ||
79 | v06 ^= v10 | ||
80 | v06 = v06>>24 | v06<<40 | ||
81 | |||
82 | v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06)) | ||
83 | v14 ^= v02 | ||
84 | v14 = v14>>16 | v14<<48 | ||
85 | v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14)) | ||
86 | v06 ^= v10 | ||
87 | v06 = v06>>63 | v06<<1 | ||
88 | |||
89 | v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07)) | ||
90 | v15 ^= v03 | ||
91 | v15 = v15>>32 | v15<<32 | ||
92 | v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15)) | ||
93 | v07 ^= v11 | ||
94 | v07 = v07>>24 | v07<<40 | ||
95 | |||
96 | v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07)) | ||
97 | v15 ^= v03 | ||
98 | v15 = v15>>16 | v15<<48 | ||
99 | v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15)) | ||
100 | v07 ^= v11 | ||
101 | v07 = v07>>63 | v07<<1 | ||
102 | |||
103 | v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05)) | ||
104 | v15 ^= v00 | ||
105 | v15 = v15>>32 | v15<<32 | ||
106 | v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15)) | ||
107 | v05 ^= v10 | ||
108 | v05 = v05>>24 | v05<<40 | ||
109 | |||
110 | v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05)) | ||
111 | v15 ^= v00 | ||
112 | v15 = v15>>16 | v15<<48 | ||
113 | v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15)) | ||
114 | v05 ^= v10 | ||
115 | v05 = v05>>63 | v05<<1 | ||
116 | |||
117 | v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06)) | ||
118 | v12 ^= v01 | ||
119 | v12 = v12>>32 | v12<<32 | ||
120 | v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12)) | ||
121 | v06 ^= v11 | ||
122 | v06 = v06>>24 | v06<<40 | ||
123 | |||
124 | v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06)) | ||
125 | v12 ^= v01 | ||
126 | v12 = v12>>16 | v12<<48 | ||
127 | v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12)) | ||
128 | v06 ^= v11 | ||
129 | v06 = v06>>63 | v06<<1 | ||
130 | |||
131 | v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07)) | ||
132 | v13 ^= v02 | ||
133 | v13 = v13>>32 | v13<<32 | ||
134 | v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13)) | ||
135 | v07 ^= v08 | ||
136 | v07 = v07>>24 | v07<<40 | ||
137 | |||
138 | v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07)) | ||
139 | v13 ^= v02 | ||
140 | v13 = v13>>16 | v13<<48 | ||
141 | v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13)) | ||
142 | v07 ^= v08 | ||
143 | v07 = v07>>63 | v07<<1 | ||
144 | |||
145 | v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04)) | ||
146 | v14 ^= v03 | ||
147 | v14 = v14>>32 | v14<<32 | ||
148 | v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14)) | ||
149 | v04 ^= v09 | ||
150 | v04 = v04>>24 | v04<<40 | ||
151 | |||
152 | v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04)) | ||
153 | v14 ^= v03 | ||
154 | v14 = v14>>16 | v14<<48 | ||
155 | v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14)) | ||
156 | v04 ^= v09 | ||
157 | v04 = v04>>63 | v04<<1 | ||
158 | |||
159 | *t00, *t01, *t02, *t03 = v00, v01, v02, v03 | ||
160 | *t04, *t05, *t06, *t07 = v04, v05, v06, v07 | ||
161 | *t08, *t09, *t10, *t11 = v08, v09, v10, v11 | ||
162 | *t12, *t13, *t14, *t15 = v12, v13, v14, v15 | ||
163 | } | ||
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_ref.go b/vendor/golang.org/x/crypto/argon2/blamka_ref.go new file mode 100644 index 0000000..16d58c6 --- /dev/null +++ b/vendor/golang.org/x/crypto/argon2/blamka_ref.go | |||
@@ -0,0 +1,15 @@ | |||
1 | // Copyright 2017 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | //go:build !amd64 || purego || !gc | ||
6 | |||
7 | package argon2 | ||
8 | |||
9 | func processBlock(out, in1, in2 *block) { | ||
10 | processBlockGeneric(out, in1, in2, false) | ||
11 | } | ||
12 | |||
13 | func processBlockXOR(out, in1, in2 *block) { | ||
14 | processBlockGeneric(out, in1, in2, true) | ||
15 | } | ||