aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/crypto/blake2b
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/crypto/blake2b')
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2b.go291
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go37
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s744
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go24
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s278
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2b_generic.go182
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2b_ref.go11
-rw-r--r--vendor/golang.org/x/crypto/blake2b/blake2x.go177
-rw-r--r--vendor/golang.org/x/crypto/blake2b/register.go32
9 files changed, 0 insertions, 1776 deletions
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b.go b/vendor/golang.org/x/crypto/blake2b/blake2b.go
deleted file mode 100644
index d2e98d4..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b.go
+++ /dev/null
@@ -1,291 +0,0 @@
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package blake2b implements the BLAKE2b hash algorithm defined by RFC 7693
6// and the extendable output function (XOF) BLAKE2Xb.
7//
8// BLAKE2b is optimized for 64-bit platforms—including NEON-enabled ARMs—and
9// produces digests of any size between 1 and 64 bytes.
10// For a detailed specification of BLAKE2b see https://blake2.net/blake2.pdf
11// and for BLAKE2Xb see https://blake2.net/blake2x.pdf
12//
13// If you aren't sure which function you need, use BLAKE2b (Sum512 or New512).
14// If you need a secret-key MAC (message authentication code), use the New512
15// function with a non-nil key.
16//
17// BLAKE2X is a construction to compute hash values larger than 64 bytes. It
18// can produce hash values between 0 and 4 GiB.
19package blake2b
20
21import (
22 "encoding/binary"
23 "errors"
24 "hash"
25)
26
27const (
28 // The blocksize of BLAKE2b in bytes.
29 BlockSize = 128
30 // The hash size of BLAKE2b-512 in bytes.
31 Size = 64
32 // The hash size of BLAKE2b-384 in bytes.
33 Size384 = 48
34 // The hash size of BLAKE2b-256 in bytes.
35 Size256 = 32
36)
37
38var (
39 useAVX2 bool
40 useAVX bool
41 useSSE4 bool
42)
43
44var (
45 errKeySize = errors.New("blake2b: invalid key size")
46 errHashSize = errors.New("blake2b: invalid hash size")
47)
48
49var iv = [8]uint64{
50 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
51 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
52}
53
54// Sum512 returns the BLAKE2b-512 checksum of the data.
55func Sum512(data []byte) [Size]byte {
56 var sum [Size]byte
57 checkSum(&sum, Size, data)
58 return sum
59}
60
61// Sum384 returns the BLAKE2b-384 checksum of the data.
62func Sum384(data []byte) [Size384]byte {
63 var sum [Size]byte
64 var sum384 [Size384]byte
65 checkSum(&sum, Size384, data)
66 copy(sum384[:], sum[:Size384])
67 return sum384
68}
69
70// Sum256 returns the BLAKE2b-256 checksum of the data.
71func Sum256(data []byte) [Size256]byte {
72 var sum [Size]byte
73 var sum256 [Size256]byte
74 checkSum(&sum, Size256, data)
75 copy(sum256[:], sum[:Size256])
76 return sum256
77}
78
79// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
80// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
81func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) }
82
83// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
84// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
85func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) }
86
87// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
88// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
89func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) }
90
91// New returns a new hash.Hash computing the BLAKE2b checksum with a custom length.
92// A non-nil key turns the hash into a MAC. The key must be between zero and 64 bytes long.
93// The hash size can be a value between 1 and 64 but it is highly recommended to use
94// values equal or greater than:
95// - 32 if BLAKE2b is used as a hash function (The key is zero bytes long).
96// - 16 if BLAKE2b is used as a MAC function (The key is at least 16 bytes long).
97// When the key is nil, the returned hash.Hash implements BinaryMarshaler
98// and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash.
99func New(size int, key []byte) (hash.Hash, error) { return newDigest(size, key) }
100
101func newDigest(hashSize int, key []byte) (*digest, error) {
102 if hashSize < 1 || hashSize > Size {
103 return nil, errHashSize
104 }
105 if len(key) > Size {
106 return nil, errKeySize
107 }
108 d := &digest{
109 size: hashSize,
110 keyLen: len(key),
111 }
112 copy(d.key[:], key)
113 d.Reset()
114 return d, nil
115}
116
117func checkSum(sum *[Size]byte, hashSize int, data []byte) {
118 h := iv
119 h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24)
120 var c [2]uint64
121
122 if length := len(data); length > BlockSize {
123 n := length &^ (BlockSize - 1)
124 if length == n {
125 n -= BlockSize
126 }
127 hashBlocks(&h, &c, 0, data[:n])
128 data = data[n:]
129 }
130
131 var block [BlockSize]byte
132 offset := copy(block[:], data)
133 remaining := uint64(BlockSize - offset)
134 if c[0] < remaining {
135 c[1]--
136 }
137 c[0] -= remaining
138
139 hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
140
141 for i, v := range h[:(hashSize+7)/8] {
142 binary.LittleEndian.PutUint64(sum[8*i:], v)
143 }
144}
145
146type digest struct {
147 h [8]uint64
148 c [2]uint64
149 size int
150 block [BlockSize]byte
151 offset int
152
153 key [BlockSize]byte
154 keyLen int
155}
156
157const (
158 magic = "b2b"
159 marshaledSize = len(magic) + 8*8 + 2*8 + 1 + BlockSize + 1
160)
161
162func (d *digest) MarshalBinary() ([]byte, error) {
163 if d.keyLen != 0 {
164 return nil, errors.New("crypto/blake2b: cannot marshal MACs")
165 }
166 b := make([]byte, 0, marshaledSize)
167 b = append(b, magic...)
168 for i := 0; i < 8; i++ {
169 b = appendUint64(b, d.h[i])
170 }
171 b = appendUint64(b, d.c[0])
172 b = appendUint64(b, d.c[1])
173 // Maximum value for size is 64
174 b = append(b, byte(d.size))
175 b = append(b, d.block[:]...)
176 b = append(b, byte(d.offset))
177 return b, nil
178}
179
180func (d *digest) UnmarshalBinary(b []byte) error {
181 if len(b) < len(magic) || string(b[:len(magic)]) != magic {
182 return errors.New("crypto/blake2b: invalid hash state identifier")
183 }
184 if len(b) != marshaledSize {
185 return errors.New("crypto/blake2b: invalid hash state size")
186 }
187 b = b[len(magic):]
188 for i := 0; i < 8; i++ {
189 b, d.h[i] = consumeUint64(b)
190 }
191 b, d.c[0] = consumeUint64(b)
192 b, d.c[1] = consumeUint64(b)
193 d.size = int(b[0])
194 b = b[1:]
195 copy(d.block[:], b[:BlockSize])
196 b = b[BlockSize:]
197 d.offset = int(b[0])
198 return nil
199}
200
201func (d *digest) BlockSize() int { return BlockSize }
202
203func (d *digest) Size() int { return d.size }
204
205func (d *digest) Reset() {
206 d.h = iv
207 d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24)
208 d.offset, d.c[0], d.c[1] = 0, 0, 0
209 if d.keyLen > 0 {
210 d.block = d.key
211 d.offset = BlockSize
212 }
213}
214
215func (d *digest) Write(p []byte) (n int, err error) {
216 n = len(p)
217
218 if d.offset > 0 {
219 remaining := BlockSize - d.offset
220 if n <= remaining {
221 d.offset += copy(d.block[d.offset:], p)
222 return
223 }
224 copy(d.block[d.offset:], p[:remaining])
225 hashBlocks(&d.h, &d.c, 0, d.block[:])
226 d.offset = 0
227 p = p[remaining:]
228 }
229
230 if length := len(p); length > BlockSize {
231 nn := length &^ (BlockSize - 1)
232 if length == nn {
233 nn -= BlockSize
234 }
235 hashBlocks(&d.h, &d.c, 0, p[:nn])
236 p = p[nn:]
237 }
238
239 if len(p) > 0 {
240 d.offset += copy(d.block[:], p)
241 }
242
243 return
244}
245
246func (d *digest) Sum(sum []byte) []byte {
247 var hash [Size]byte
248 d.finalize(&hash)
249 return append(sum, hash[:d.size]...)
250}
251
252func (d *digest) finalize(hash *[Size]byte) {
253 var block [BlockSize]byte
254 copy(block[:], d.block[:d.offset])
255 remaining := uint64(BlockSize - d.offset)
256
257 c := d.c
258 if c[0] < remaining {
259 c[1]--
260 }
261 c[0] -= remaining
262
263 h := d.h
264 hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:])
265
266 for i, v := range h {
267 binary.LittleEndian.PutUint64(hash[8*i:], v)
268 }
269}
270
271func appendUint64(b []byte, x uint64) []byte {
272 var a [8]byte
273 binary.BigEndian.PutUint64(a[:], x)
274 return append(b, a[:]...)
275}
276
277func appendUint32(b []byte, x uint32) []byte {
278 var a [4]byte
279 binary.BigEndian.PutUint32(a[:], x)
280 return append(b, a[:]...)
281}
282
283func consumeUint64(b []byte) ([]byte, uint64) {
284 x := binary.BigEndian.Uint64(b)
285 return b[8:], x
286}
287
288func consumeUint32(b []byte) ([]byte, uint32) {
289 x := binary.BigEndian.Uint32(b)
290 return b[4:], x
291}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go
deleted file mode 100644
index 4f506f8..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.go
+++ /dev/null
@@ -1,37 +0,0 @@
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build go1.7 && amd64 && gc && !purego
6
7package blake2b
8
9import "golang.org/x/sys/cpu"
10
11func init() {
12 useAVX2 = cpu.X86.HasAVX2
13 useAVX = cpu.X86.HasAVX
14 useSSE4 = cpu.X86.HasSSE41
15}
16
17//go:noescape
18func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
19
20//go:noescape
21func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
22
23//go:noescape
24func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
25
26func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
27 switch {
28 case useAVX2:
29 hashBlocksAVX2(h, c, flag, blocks)
30 case useAVX:
31 hashBlocksAVX(h, c, flag, blocks)
32 case useSSE4:
33 hashBlocksSSE4(h, c, flag, blocks)
34 default:
35 hashBlocksGeneric(h, c, flag, blocks)
36 }
37}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
deleted file mode 100644
index 353bb7c..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
+++ /dev/null
@@ -1,744 +0,0 @@
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build go1.7 && amd64 && gc && !purego
6
7#include "textflag.h"
8
9DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
10DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
11DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
12DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
13GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
14
15DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
16DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
17DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
18DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
19GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
20
21DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
22DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
23DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
24DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
25GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
26
27DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
28DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
29DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
30DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
31GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
32
33DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
34DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
35GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
36
37DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
38DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
39GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
40
41DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
42DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
43GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
44
45DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
46DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
47GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
48
49DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
50DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
51GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
52
53DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
54DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
55GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
56
57#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39
58#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93
59#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e
60#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93
61#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39
62
63#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
64 VPADDQ m0, Y0, Y0; \
65 VPADDQ Y1, Y0, Y0; \
66 VPXOR Y0, Y3, Y3; \
67 VPSHUFD $-79, Y3, Y3; \
68 VPADDQ Y3, Y2, Y2; \
69 VPXOR Y2, Y1, Y1; \
70 VPSHUFB c40, Y1, Y1; \
71 VPADDQ m1, Y0, Y0; \
72 VPADDQ Y1, Y0, Y0; \
73 VPXOR Y0, Y3, Y3; \
74 VPSHUFB c48, Y3, Y3; \
75 VPADDQ Y3, Y2, Y2; \
76 VPXOR Y2, Y1, Y1; \
77 VPADDQ Y1, Y1, t; \
78 VPSRLQ $63, Y1, Y1; \
79 VPXOR t, Y1, Y1; \
80 VPERMQ_0x39_Y1_Y1; \
81 VPERMQ_0x4E_Y2_Y2; \
82 VPERMQ_0x93_Y3_Y3; \
83 VPADDQ m2, Y0, Y0; \
84 VPADDQ Y1, Y0, Y0; \
85 VPXOR Y0, Y3, Y3; \
86 VPSHUFD $-79, Y3, Y3; \
87 VPADDQ Y3, Y2, Y2; \
88 VPXOR Y2, Y1, Y1; \
89 VPSHUFB c40, Y1, Y1; \
90 VPADDQ m3, Y0, Y0; \
91 VPADDQ Y1, Y0, Y0; \
92 VPXOR Y0, Y3, Y3; \
93 VPSHUFB c48, Y3, Y3; \
94 VPADDQ Y3, Y2, Y2; \
95 VPXOR Y2, Y1, Y1; \
96 VPADDQ Y1, Y1, t; \
97 VPSRLQ $63, Y1, Y1; \
98 VPXOR t, Y1, Y1; \
99 VPERMQ_0x39_Y3_Y3; \
100 VPERMQ_0x4E_Y2_Y2; \
101 VPERMQ_0x93_Y1_Y1
102
103#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E
104#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26
105#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E
106#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36
107#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E
108
109#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n
110#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n
111#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n
112#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n
113#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n
114
115#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01
116#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01
117#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01
118#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01
119#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01
120
121#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01
122#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01
123#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01
124#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01
125#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01
126
127#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8
128#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01
129
130// load msg: Y12 = (i0, i1, i2, i3)
131// i0, i1, i2, i3 must not be 0
132#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \
133 VMOVQ_SI_X12(i0*8); \
134 VMOVQ_SI_X11(i2*8); \
135 VPINSRQ_1_SI_X12(i1*8); \
136 VPINSRQ_1_SI_X11(i3*8); \
137 VINSERTI128 $1, X11, Y12, Y12
138
139// load msg: Y13 = (i0, i1, i2, i3)
140// i0, i1, i2, i3 must not be 0
141#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \
142 VMOVQ_SI_X13(i0*8); \
143 VMOVQ_SI_X11(i2*8); \
144 VPINSRQ_1_SI_X13(i1*8); \
145 VPINSRQ_1_SI_X11(i3*8); \
146 VINSERTI128 $1, X11, Y13, Y13
147
148// load msg: Y14 = (i0, i1, i2, i3)
149// i0, i1, i2, i3 must not be 0
150#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \
151 VMOVQ_SI_X14(i0*8); \
152 VMOVQ_SI_X11(i2*8); \
153 VPINSRQ_1_SI_X14(i1*8); \
154 VPINSRQ_1_SI_X11(i3*8); \
155 VINSERTI128 $1, X11, Y14, Y14
156
157// load msg: Y15 = (i0, i1, i2, i3)
158// i0, i1, i2, i3 must not be 0
159#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \
160 VMOVQ_SI_X15(i0*8); \
161 VMOVQ_SI_X11(i2*8); \
162 VPINSRQ_1_SI_X15(i1*8); \
163 VPINSRQ_1_SI_X11(i3*8); \
164 VINSERTI128 $1, X11, Y15, Y15
165
166#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \
167 VMOVQ_SI_X12_0; \
168 VMOVQ_SI_X11(4*8); \
169 VPINSRQ_1_SI_X12(2*8); \
170 VPINSRQ_1_SI_X11(6*8); \
171 VINSERTI128 $1, X11, Y12, Y12; \
172 LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \
173 LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \
174 LOAD_MSG_AVX2_Y15(9, 11, 13, 15)
175
176#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \
177 LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \
178 LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \
179 VMOVQ_SI_X11(11*8); \
180 VPSHUFD $0x4E, 0*8(SI), X14; \
181 VPINSRQ_1_SI_X11(5*8); \
182 VINSERTI128 $1, X11, Y14, Y14; \
183 LOAD_MSG_AVX2_Y15(12, 2, 7, 3)
184
185#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \
186 VMOVQ_SI_X11(5*8); \
187 VMOVDQU 11*8(SI), X12; \
188 VPINSRQ_1_SI_X11(15*8); \
189 VINSERTI128 $1, X11, Y12, Y12; \
190 VMOVQ_SI_X13(8*8); \
191 VMOVQ_SI_X11(2*8); \
192 VPINSRQ_1_SI_X13_0; \
193 VPINSRQ_1_SI_X11(13*8); \
194 VINSERTI128 $1, X11, Y13, Y13; \
195 LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \
196 LOAD_MSG_AVX2_Y15(14, 6, 1, 4)
197
198#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \
199 LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \
200 LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \
201 LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \
202 VMOVQ_SI_X15(6*8); \
203 VMOVQ_SI_X11_0; \
204 VPINSRQ_1_SI_X15(10*8); \
205 VPINSRQ_1_SI_X11(8*8); \
206 VINSERTI128 $1, X11, Y15, Y15
207
208#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \
209 LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \
210 VMOVQ_SI_X13_0; \
211 VMOVQ_SI_X11(4*8); \
212 VPINSRQ_1_SI_X13(7*8); \
213 VPINSRQ_1_SI_X11(15*8); \
214 VINSERTI128 $1, X11, Y13, Y13; \
215 LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \
216 LOAD_MSG_AVX2_Y15(1, 12, 8, 13)
217
218#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \
219 VMOVQ_SI_X12(2*8); \
220 VMOVQ_SI_X11_0; \
221 VPINSRQ_1_SI_X12(6*8); \
222 VPINSRQ_1_SI_X11(8*8); \
223 VINSERTI128 $1, X11, Y12, Y12; \
224 LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \
225 LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \
226 LOAD_MSG_AVX2_Y15(13, 5, 14, 9)
227
228#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \
229 LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \
230 LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \
231 VMOVQ_SI_X14_0; \
232 VPSHUFD $0x4E, 8*8(SI), X11; \
233 VPINSRQ_1_SI_X14(6*8); \
234 VINSERTI128 $1, X11, Y14, Y14; \
235 LOAD_MSG_AVX2_Y15(7, 3, 2, 11)
236
237#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \
238 LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \
239 LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \
240 LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \
241 VMOVQ_SI_X15_0; \
242 VMOVQ_SI_X11(6*8); \
243 VPINSRQ_1_SI_X15(4*8); \
244 VPINSRQ_1_SI_X11(10*8); \
245 VINSERTI128 $1, X11, Y15, Y15
246
247#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \
248 VMOVQ_SI_X12(6*8); \
249 VMOVQ_SI_X11(11*8); \
250 VPINSRQ_1_SI_X12(14*8); \
251 VPINSRQ_1_SI_X11_0; \
252 VINSERTI128 $1, X11, Y12, Y12; \
253 LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \
254 VMOVQ_SI_X11(1*8); \
255 VMOVDQU 12*8(SI), X14; \
256 VPINSRQ_1_SI_X11(10*8); \
257 VINSERTI128 $1, X11, Y14, Y14; \
258 VMOVQ_SI_X15(2*8); \
259 VMOVDQU 4*8(SI), X11; \
260 VPINSRQ_1_SI_X15(7*8); \
261 VINSERTI128 $1, X11, Y15, Y15
262
263#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \
264 LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \
265 VMOVQ_SI_X13(2*8); \
266 VPSHUFD $0x4E, 5*8(SI), X11; \
267 VPINSRQ_1_SI_X13(4*8); \
268 VINSERTI128 $1, X11, Y13, Y13; \
269 LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \
270 VMOVQ_SI_X15(11*8); \
271 VMOVQ_SI_X11(12*8); \
272 VPINSRQ_1_SI_X15(14*8); \
273 VPINSRQ_1_SI_X11_0; \
274 VINSERTI128 $1, X11, Y15, Y15
275
276// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
277TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
278 MOVQ h+0(FP), AX
279 MOVQ c+8(FP), BX
280 MOVQ flag+16(FP), CX
281 MOVQ blocks_base+24(FP), SI
282 MOVQ blocks_len+32(FP), DI
283
284 MOVQ SP, DX
285 ADDQ $31, DX
286 ANDQ $~31, DX
287
288 MOVQ CX, 16(DX)
289 XORQ CX, CX
290 MOVQ CX, 24(DX)
291
292 VMOVDQU ·AVX2_c40<>(SB), Y4
293 VMOVDQU ·AVX2_c48<>(SB), Y5
294
295 VMOVDQU 0(AX), Y8
296 VMOVDQU 32(AX), Y9
297 VMOVDQU ·AVX2_iv0<>(SB), Y6
298 VMOVDQU ·AVX2_iv1<>(SB), Y7
299
300 MOVQ 0(BX), R8
301 MOVQ 8(BX), R9
302 MOVQ R9, 8(DX)
303
304loop:
305 ADDQ $128, R8
306 MOVQ R8, 0(DX)
307 CMPQ R8, $128
308 JGE noinc
309 INCQ R9
310 MOVQ R9, 8(DX)
311
312noinc:
313 VMOVDQA Y8, Y0
314 VMOVDQA Y9, Y1
315 VMOVDQA Y6, Y2
316 VPXOR 0(DX), Y7, Y3
317
318 LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
319 VMOVDQA Y12, 32(DX)
320 VMOVDQA Y13, 64(DX)
321 VMOVDQA Y14, 96(DX)
322 VMOVDQA Y15, 128(DX)
323 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
324 LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
325 VMOVDQA Y12, 160(DX)
326 VMOVDQA Y13, 192(DX)
327 VMOVDQA Y14, 224(DX)
328 VMOVDQA Y15, 256(DX)
329
330 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
331 LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
332 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
333 LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8()
334 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
335 LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13()
336 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
337 LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9()
338 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
339 LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11()
340 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
341 LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10()
342 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
343 LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5()
344 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
345 LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
346 ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
347
348 ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5)
349 ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5)
350
351 VPXOR Y0, Y8, Y8
352 VPXOR Y1, Y9, Y9
353 VPXOR Y2, Y8, Y8
354 VPXOR Y3, Y9, Y9
355
356 LEAQ 128(SI), SI
357 SUBQ $128, DI
358 JNE loop
359
360 MOVQ R8, 0(BX)
361 MOVQ R9, 8(BX)
362
363 VMOVDQU Y8, 0(AX)
364 VMOVDQU Y9, 32(AX)
365 VZEROUPPER
366
367 RET
368
369#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
370#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB
371#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF
372#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD
373#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE
374
375#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7
376#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF
377#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7
378#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF
379#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7
380#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7
381#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF
382#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF
383
384#define SHUFFLE_AVX() \
385 VMOVDQA X6, X13; \
386 VMOVDQA X2, X14; \
387 VMOVDQA X4, X6; \
388 VPUNPCKLQDQ_X13_X13_X15; \
389 VMOVDQA X5, X4; \
390 VMOVDQA X6, X5; \
391 VPUNPCKHQDQ_X15_X7_X6; \
392 VPUNPCKLQDQ_X7_X7_X15; \
393 VPUNPCKHQDQ_X15_X13_X7; \
394 VPUNPCKLQDQ_X3_X3_X15; \
395 VPUNPCKHQDQ_X15_X2_X2; \
396 VPUNPCKLQDQ_X14_X14_X15; \
397 VPUNPCKHQDQ_X15_X3_X3; \
398
399#define SHUFFLE_AVX_INV() \
400 VMOVDQA X2, X13; \
401 VMOVDQA X4, X14; \
402 VPUNPCKLQDQ_X2_X2_X15; \
403 VMOVDQA X5, X4; \
404 VPUNPCKHQDQ_X15_X3_X2; \
405 VMOVDQA X14, X5; \
406 VPUNPCKLQDQ_X3_X3_X15; \
407 VMOVDQA X6, X14; \
408 VPUNPCKHQDQ_X15_X13_X3; \
409 VPUNPCKLQDQ_X7_X7_X15; \
410 VPUNPCKHQDQ_X15_X6_X6; \
411 VPUNPCKLQDQ_X14_X14_X15; \
412 VPUNPCKHQDQ_X15_X7_X7; \
413
414#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
415 VPADDQ m0, v0, v0; \
416 VPADDQ v2, v0, v0; \
417 VPADDQ m1, v1, v1; \
418 VPADDQ v3, v1, v1; \
419 VPXOR v0, v6, v6; \
420 VPXOR v1, v7, v7; \
421 VPSHUFD $-79, v6, v6; \
422 VPSHUFD $-79, v7, v7; \
423 VPADDQ v6, v4, v4; \
424 VPADDQ v7, v5, v5; \
425 VPXOR v4, v2, v2; \
426 VPXOR v5, v3, v3; \
427 VPSHUFB c40, v2, v2; \
428 VPSHUFB c40, v3, v3; \
429 VPADDQ m2, v0, v0; \
430 VPADDQ v2, v0, v0; \
431 VPADDQ m3, v1, v1; \
432 VPADDQ v3, v1, v1; \
433 VPXOR v0, v6, v6; \
434 VPXOR v1, v7, v7; \
435 VPSHUFB c48, v6, v6; \
436 VPSHUFB c48, v7, v7; \
437 VPADDQ v6, v4, v4; \
438 VPADDQ v7, v5, v5; \
439 VPXOR v4, v2, v2; \
440 VPXOR v5, v3, v3; \
441 VPADDQ v2, v2, t0; \
442 VPSRLQ $63, v2, v2; \
443 VPXOR t0, v2, v2; \
444 VPADDQ v3, v3, t0; \
445 VPSRLQ $63, v3, v3; \
446 VPXOR t0, v3, v3
447
448// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7)
449// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0
450#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \
451 VMOVQ_SI_X12(i0*8); \
452 VMOVQ_SI_X13(i2*8); \
453 VMOVQ_SI_X14(i4*8); \
454 VMOVQ_SI_X15(i6*8); \
455 VPINSRQ_1_SI_X12(i1*8); \
456 VPINSRQ_1_SI_X13(i3*8); \
457 VPINSRQ_1_SI_X14(i5*8); \
458 VPINSRQ_1_SI_X15(i7*8)
459
460// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7)
461#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \
462 VMOVQ_SI_X12_0; \
463 VMOVQ_SI_X13(4*8); \
464 VMOVQ_SI_X14(1*8); \
465 VMOVQ_SI_X15(5*8); \
466 VPINSRQ_1_SI_X12(2*8); \
467 VPINSRQ_1_SI_X13(6*8); \
468 VPINSRQ_1_SI_X14(3*8); \
469 VPINSRQ_1_SI_X15(7*8)
470
471// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3)
472#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \
473 VPSHUFD $0x4E, 0*8(SI), X12; \
474 VMOVQ_SI_X13(11*8); \
475 VMOVQ_SI_X14(12*8); \
476 VMOVQ_SI_X15(7*8); \
477 VPINSRQ_1_SI_X13(5*8); \
478 VPINSRQ_1_SI_X14(2*8); \
479 VPINSRQ_1_SI_X15(3*8)
480
481// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13)
482#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \
483 VMOVDQU 11*8(SI), X12; \
484 VMOVQ_SI_X13(5*8); \
485 VMOVQ_SI_X14(8*8); \
486 VMOVQ_SI_X15(2*8); \
487 VPINSRQ_1_SI_X13(15*8); \
488 VPINSRQ_1_SI_X14_0; \
489 VPINSRQ_1_SI_X15(13*8)
490
491// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8)
492#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \
493 VMOVQ_SI_X12(2*8); \
494 VMOVQ_SI_X13(4*8); \
495 VMOVQ_SI_X14(6*8); \
496 VMOVQ_SI_X15_0; \
497 VPINSRQ_1_SI_X12(5*8); \
498 VPINSRQ_1_SI_X13(15*8); \
499 VPINSRQ_1_SI_X14(10*8); \
500 VPINSRQ_1_SI_X15(8*8)
501
502// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15)
503#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \
504 VMOVQ_SI_X12(9*8); \
505 VMOVQ_SI_X13(2*8); \
506 VMOVQ_SI_X14_0; \
507 VMOVQ_SI_X15(4*8); \
508 VPINSRQ_1_SI_X12(5*8); \
509 VPINSRQ_1_SI_X13(10*8); \
510 VPINSRQ_1_SI_X14(7*8); \
511 VPINSRQ_1_SI_X15(15*8)
512
513// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3)
514#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \
515 VMOVQ_SI_X12(2*8); \
516 VMOVQ_SI_X13_0; \
517 VMOVQ_SI_X14(12*8); \
518 VMOVQ_SI_X15(11*8); \
519 VPINSRQ_1_SI_X12(6*8); \
520 VPINSRQ_1_SI_X13(8*8); \
521 VPINSRQ_1_SI_X14(10*8); \
522 VPINSRQ_1_SI_X15(3*8)
523
524// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11)
525#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \
526 MOVQ 0*8(SI), X12; \
527 VPSHUFD $0x4E, 8*8(SI), X13; \
528 MOVQ 7*8(SI), X14; \
529 MOVQ 2*8(SI), X15; \
530 VPINSRQ_1_SI_X12(6*8); \
531 VPINSRQ_1_SI_X14(3*8); \
532 VPINSRQ_1_SI_X15(11*8)
533
534// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8)
535#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \
536 MOVQ 6*8(SI), X12; \
537 MOVQ 11*8(SI), X13; \
538 MOVQ 15*8(SI), X14; \
539 MOVQ 3*8(SI), X15; \
540 VPINSRQ_1_SI_X12(14*8); \
541 VPINSRQ_1_SI_X13_0; \
542 VPINSRQ_1_SI_X14(9*8); \
543 VPINSRQ_1_SI_X15(8*8)
544
545// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10)
546#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \
547 MOVQ 5*8(SI), X12; \
548 MOVQ 8*8(SI), X13; \
549 MOVQ 0*8(SI), X14; \
550 MOVQ 6*8(SI), X15; \
551 VPINSRQ_1_SI_X12(15*8); \
552 VPINSRQ_1_SI_X13(2*8); \
553 VPINSRQ_1_SI_X14(4*8); \
554 VPINSRQ_1_SI_X15(10*8)
555
556// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5)
557#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \
558 VMOVDQU 12*8(SI), X12; \
559 MOVQ 1*8(SI), X13; \
560 MOVQ 2*8(SI), X14; \
561 VPINSRQ_1_SI_X13(10*8); \
562 VPINSRQ_1_SI_X14(7*8); \
563 VMOVDQU 4*8(SI), X15
564
565// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0)
566#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \
567 MOVQ 15*8(SI), X12; \
568 MOVQ 3*8(SI), X13; \
569 MOVQ 11*8(SI), X14; \
570 MOVQ 12*8(SI), X15; \
571 VPINSRQ_1_SI_X12(9*8); \
572 VPINSRQ_1_SI_X13(13*8); \
573 VPINSRQ_1_SI_X14(14*8); \
574 VPINSRQ_1_SI_X15_0
575
576// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
577TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
578 MOVQ h+0(FP), AX
579 MOVQ c+8(FP), BX
580 MOVQ flag+16(FP), CX
581 MOVQ blocks_base+24(FP), SI
582 MOVQ blocks_len+32(FP), DI
583
584 MOVQ SP, R10
585 ADDQ $15, R10
586 ANDQ $~15, R10
587
588 VMOVDQU ·AVX_c40<>(SB), X0
589 VMOVDQU ·AVX_c48<>(SB), X1
590 VMOVDQA X0, X8
591 VMOVDQA X1, X9
592
593 VMOVDQU ·AVX_iv3<>(SB), X0
594 VMOVDQA X0, 0(R10)
595 XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0)
596
597 VMOVDQU 0(AX), X10
598 VMOVDQU 16(AX), X11
599 VMOVDQU 32(AX), X2
600 VMOVDQU 48(AX), X3
601
602 MOVQ 0(BX), R8
603 MOVQ 8(BX), R9
604
605loop:
606 ADDQ $128, R8
607 CMPQ R8, $128
608 JGE noinc
609 INCQ R9
610
611noinc:
612 VMOVQ_R8_X15
613 VPINSRQ_1_R9_X15
614
615 VMOVDQA X10, X0
616 VMOVDQA X11, X1
617 VMOVDQU ·AVX_iv0<>(SB), X4
618 VMOVDQU ·AVX_iv1<>(SB), X5
619 VMOVDQU ·AVX_iv2<>(SB), X6
620
621 VPXOR X15, X6, X6
622 VMOVDQA 0(R10), X7
623
624 LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
625 VMOVDQA X12, 16(R10)
626 VMOVDQA X13, 32(R10)
627 VMOVDQA X14, 48(R10)
628 VMOVDQA X15, 64(R10)
629 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
630 SHUFFLE_AVX()
631 LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
632 VMOVDQA X12, 80(R10)
633 VMOVDQA X13, 96(R10)
634 VMOVDQA X14, 112(R10)
635 VMOVDQA X15, 128(R10)
636 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
637 SHUFFLE_AVX_INV()
638
639 LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
640 VMOVDQA X12, 144(R10)
641 VMOVDQA X13, 160(R10)
642 VMOVDQA X14, 176(R10)
643 VMOVDQA X15, 192(R10)
644 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
645 SHUFFLE_AVX()
646 LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
647 VMOVDQA X12, 208(R10)
648 VMOVDQA X13, 224(R10)
649 VMOVDQA X14, 240(R10)
650 VMOVDQA X15, 256(R10)
651 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
652 SHUFFLE_AVX_INV()
653
654 LOAD_MSG_AVX_11_12_5_15_8_0_2_13()
655 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
656 SHUFFLE_AVX()
657 LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4)
658 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
659 SHUFFLE_AVX_INV()
660
661 LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14)
662 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
663 SHUFFLE_AVX()
664 LOAD_MSG_AVX_2_5_4_15_6_10_0_8()
665 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
666 SHUFFLE_AVX_INV()
667
668 LOAD_MSG_AVX_9_5_2_10_0_7_4_15()
669 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
670 SHUFFLE_AVX()
671 LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13)
672 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
673 SHUFFLE_AVX_INV()
674
675 LOAD_MSG_AVX_2_6_0_8_12_10_11_3()
676 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
677 SHUFFLE_AVX()
678 LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9)
679 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
680 SHUFFLE_AVX_INV()
681
682 LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10)
683 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
684 SHUFFLE_AVX()
685 LOAD_MSG_AVX_0_6_9_8_7_3_2_11()
686 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
687 SHUFFLE_AVX_INV()
688
689 LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9)
690 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
691 SHUFFLE_AVX()
692 LOAD_MSG_AVX_5_15_8_2_0_4_6_10()
693 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
694 SHUFFLE_AVX_INV()
695
696 LOAD_MSG_AVX_6_14_11_0_15_9_3_8()
697 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
698 SHUFFLE_AVX()
699 LOAD_MSG_AVX_12_13_1_10_2_7_4_5()
700 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
701 SHUFFLE_AVX_INV()
702
703 LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5)
704 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
705 SHUFFLE_AVX()
706 LOAD_MSG_AVX_15_9_3_13_11_14_12_0()
707 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
708 SHUFFLE_AVX_INV()
709
710 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9)
711 SHUFFLE_AVX()
712 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9)
713 SHUFFLE_AVX_INV()
714
715 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9)
716 SHUFFLE_AVX()
717 HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9)
718 SHUFFLE_AVX_INV()
719
720 VMOVDQU 32(AX), X14
721 VMOVDQU 48(AX), X15
722 VPXOR X0, X10, X10
723 VPXOR X1, X11, X11
724 VPXOR X2, X14, X14
725 VPXOR X3, X15, X15
726 VPXOR X4, X10, X10
727 VPXOR X5, X11, X11
728 VPXOR X6, X14, X2
729 VPXOR X7, X15, X3
730 VMOVDQU X2, 32(AX)
731 VMOVDQU X3, 48(AX)
732
733 LEAQ 128(SI), SI
734 SUBQ $128, DI
735 JNE loop
736
737 VMOVDQU X10, 0(AX)
738 VMOVDQU X11, 16(AX)
739
740 MOVQ R8, 0(BX)
741 MOVQ R9, 8(BX)
742 VZEROUPPER
743
744 RET
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go
deleted file mode 100644
index 1d0770a..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.go
+++ /dev/null
@@ -1,24 +0,0 @@
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !go1.7 && amd64 && gc && !purego
6
7package blake2b
8
9import "golang.org/x/sys/cpu"
10
11func init() {
12 useSSE4 = cpu.X86.HasSSE41
13}
14
15//go:noescape
16func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
17
18func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
19 if useSSE4 {
20 hashBlocksSSE4(h, c, flag, blocks)
21 } else {
22 hashBlocksGeneric(h, c, flag, blocks)
23 }
24}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
deleted file mode 100644
index adfac00..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
+++ /dev/null
@@ -1,278 +0,0 @@
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build amd64 && gc && !purego
6
7#include "textflag.h"
8
9DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
10DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
11GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
12
13DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
14DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
15GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
16
17DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
18DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
19GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
20
21DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
22DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
23GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
24
25DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
26DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
27GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
28
29DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
30DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
31GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
32
33#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
34 MOVO v4, t1; \
35 MOVO v5, v4; \
36 MOVO t1, v5; \
37 MOVO v6, t1; \
38 PUNPCKLQDQ v6, t2; \
39 PUNPCKHQDQ v7, v6; \
40 PUNPCKHQDQ t2, v6; \
41 PUNPCKLQDQ v7, t2; \
42 MOVO t1, v7; \
43 MOVO v2, t1; \
44 PUNPCKHQDQ t2, v7; \
45 PUNPCKLQDQ v3, t2; \
46 PUNPCKHQDQ t2, v2; \
47 PUNPCKLQDQ t1, t2; \
48 PUNPCKHQDQ t2, v3
49
50#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
51 MOVO v4, t1; \
52 MOVO v5, v4; \
53 MOVO t1, v5; \
54 MOVO v2, t1; \
55 PUNPCKLQDQ v2, t2; \
56 PUNPCKHQDQ v3, v2; \
57 PUNPCKHQDQ t2, v2; \
58 PUNPCKLQDQ v3, t2; \
59 MOVO t1, v3; \
60 MOVO v6, t1; \
61 PUNPCKHQDQ t2, v3; \
62 PUNPCKLQDQ v7, t2; \
63 PUNPCKHQDQ t2, v6; \
64 PUNPCKLQDQ t1, t2; \
65 PUNPCKHQDQ t2, v7
66
67#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
68 PADDQ m0, v0; \
69 PADDQ m1, v1; \
70 PADDQ v2, v0; \
71 PADDQ v3, v1; \
72 PXOR v0, v6; \
73 PXOR v1, v7; \
74 PSHUFD $0xB1, v6, v6; \
75 PSHUFD $0xB1, v7, v7; \
76 PADDQ v6, v4; \
77 PADDQ v7, v5; \
78 PXOR v4, v2; \
79 PXOR v5, v3; \
80 PSHUFB c40, v2; \
81 PSHUFB c40, v3; \
82 PADDQ m2, v0; \
83 PADDQ m3, v1; \
84 PADDQ v2, v0; \
85 PADDQ v3, v1; \
86 PXOR v0, v6; \
87 PXOR v1, v7; \
88 PSHUFB c48, v6; \
89 PSHUFB c48, v7; \
90 PADDQ v6, v4; \
91 PADDQ v7, v5; \
92 PXOR v4, v2; \
93 PXOR v5, v3; \
94 MOVOU v2, t0; \
95 PADDQ v2, t0; \
96 PSRLQ $63, v2; \
97 PXOR t0, v2; \
98 MOVOU v3, t0; \
99 PADDQ v3, t0; \
100 PSRLQ $63, v3; \
101 PXOR t0, v3
102
103#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
104 MOVQ i0*8(src), m0; \
105 PINSRQ $1, i1*8(src), m0; \
106 MOVQ i2*8(src), m1; \
107 PINSRQ $1, i3*8(src), m1; \
108 MOVQ i4*8(src), m2; \
109 PINSRQ $1, i5*8(src), m2; \
110 MOVQ i6*8(src), m3; \
111 PINSRQ $1, i7*8(src), m3
112
113// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
114TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
115 MOVQ h+0(FP), AX
116 MOVQ c+8(FP), BX
117 MOVQ flag+16(FP), CX
118 MOVQ blocks_base+24(FP), SI
119 MOVQ blocks_len+32(FP), DI
120
121 MOVQ SP, R10
122 ADDQ $15, R10
123 ANDQ $~15, R10
124
125 MOVOU ·iv3<>(SB), X0
126 MOVO X0, 0(R10)
127 XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0)
128
129 MOVOU ·c40<>(SB), X13
130 MOVOU ·c48<>(SB), X14
131
132 MOVOU 0(AX), X12
133 MOVOU 16(AX), X15
134
135 MOVQ 0(BX), R8
136 MOVQ 8(BX), R9
137
138loop:
139 ADDQ $128, R8
140 CMPQ R8, $128
141 JGE noinc
142 INCQ R9
143
144noinc:
145 MOVQ R8, X8
146 PINSRQ $1, R9, X8
147
148 MOVO X12, X0
149 MOVO X15, X1
150 MOVOU 32(AX), X2
151 MOVOU 48(AX), X3
152 MOVOU ·iv0<>(SB), X4
153 MOVOU ·iv1<>(SB), X5
154 MOVOU ·iv2<>(SB), X6
155
156 PXOR X8, X6
157 MOVO 0(R10), X7
158
159 LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
160 MOVO X8, 16(R10)
161 MOVO X9, 32(R10)
162 MOVO X10, 48(R10)
163 MOVO X11, 64(R10)
164 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
165 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
166 LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
167 MOVO X8, 80(R10)
168 MOVO X9, 96(R10)
169 MOVO X10, 112(R10)
170 MOVO X11, 128(R10)
171 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
172 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
173
174 LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
175 MOVO X8, 144(R10)
176 MOVO X9, 160(R10)
177 MOVO X10, 176(R10)
178 MOVO X11, 192(R10)
179 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
180 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
181 LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
182 MOVO X8, 208(R10)
183 MOVO X9, 224(R10)
184 MOVO X10, 240(R10)
185 MOVO X11, 256(R10)
186 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
187 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
188
189 LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
190 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
191 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
192 LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
193 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
194 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
195
196 LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
197 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
198 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
199 LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
200 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
201 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
202
203 LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
204 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
205 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
206 LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
207 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
208 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
209
210 LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
211 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
212 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
213 LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
214 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
215 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
216
217 LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
218 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
219 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
220 LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
221 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
222 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
223
224 LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
225 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
226 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
227 LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
228 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
229 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
230
231 LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
232 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
233 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
234 LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
235 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
236 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
237
238 LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
239 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
240 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
241 LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
242 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
243 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
244
245 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
246 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
247 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
248 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
249
250 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
251 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
252 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
253 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
254
255 MOVOU 32(AX), X10
256 MOVOU 48(AX), X11
257 PXOR X0, X12
258 PXOR X1, X15
259 PXOR X2, X10
260 PXOR X3, X11
261 PXOR X4, X12
262 PXOR X5, X15
263 PXOR X6, X10
264 PXOR X7, X11
265 MOVOU X10, 32(AX)
266 MOVOU X11, 48(AX)
267
268 LEAQ 128(SI), SI
269 SUBQ $128, DI
270 JNE loop
271
272 MOVOU X12, 0(AX)
273 MOVOU X15, 16(AX)
274
275 MOVQ R8, 0(BX)
276 MOVQ R9, 8(BX)
277
278 RET
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go b/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go
deleted file mode 100644
index 3168a8a..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_generic.go
+++ /dev/null
@@ -1,182 +0,0 @@
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package blake2b
6
7import (
8 "encoding/binary"
9 "math/bits"
10)
11
12// the precomputed values for BLAKE2b
13// there are 12 16-byte arrays - one for each round
14// the entries are calculated from the sigma constants.
15var precomputed = [12][16]byte{
16 {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
17 {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
18 {11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
19 {7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
20 {9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
21 {2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
22 {12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
23 {13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
24 {6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
25 {10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
26 {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
27 {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
28}
29
30func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
31 var m [16]uint64
32 c0, c1 := c[0], c[1]
33
34 for i := 0; i < len(blocks); {
35 c0 += BlockSize
36 if c0 < BlockSize {
37 c1++
38 }
39
40 v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7]
41 v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]
42 v12 ^= c0
43 v13 ^= c1
44 v14 ^= flag
45
46 for j := range m {
47 m[j] = binary.LittleEndian.Uint64(blocks[i:])
48 i += 8
49 }
50
51 for j := range precomputed {
52 s := &(precomputed[j])
53
54 v0 += m[s[0]]
55 v0 += v4
56 v12 ^= v0
57 v12 = bits.RotateLeft64(v12, -32)
58 v8 += v12
59 v4 ^= v8
60 v4 = bits.RotateLeft64(v4, -24)
61 v1 += m[s[1]]
62 v1 += v5
63 v13 ^= v1
64 v13 = bits.RotateLeft64(v13, -32)
65 v9 += v13
66 v5 ^= v9
67 v5 = bits.RotateLeft64(v5, -24)
68 v2 += m[s[2]]
69 v2 += v6
70 v14 ^= v2
71 v14 = bits.RotateLeft64(v14, -32)
72 v10 += v14
73 v6 ^= v10
74 v6 = bits.RotateLeft64(v6, -24)
75 v3 += m[s[3]]
76 v3 += v7
77 v15 ^= v3
78 v15 = bits.RotateLeft64(v15, -32)
79 v11 += v15
80 v7 ^= v11
81 v7 = bits.RotateLeft64(v7, -24)
82
83 v0 += m[s[4]]
84 v0 += v4
85 v12 ^= v0
86 v12 = bits.RotateLeft64(v12, -16)
87 v8 += v12
88 v4 ^= v8
89 v4 = bits.RotateLeft64(v4, -63)
90 v1 += m[s[5]]
91 v1 += v5
92 v13 ^= v1
93 v13 = bits.RotateLeft64(v13, -16)
94 v9 += v13
95 v5 ^= v9
96 v5 = bits.RotateLeft64(v5, -63)
97 v2 += m[s[6]]
98 v2 += v6
99 v14 ^= v2
100 v14 = bits.RotateLeft64(v14, -16)
101 v10 += v14
102 v6 ^= v10
103 v6 = bits.RotateLeft64(v6, -63)
104 v3 += m[s[7]]
105 v3 += v7
106 v15 ^= v3
107 v15 = bits.RotateLeft64(v15, -16)
108 v11 += v15
109 v7 ^= v11
110 v7 = bits.RotateLeft64(v7, -63)
111
112 v0 += m[s[8]]
113 v0 += v5
114 v15 ^= v0
115 v15 = bits.RotateLeft64(v15, -32)
116 v10 += v15
117 v5 ^= v10
118 v5 = bits.RotateLeft64(v5, -24)
119 v1 += m[s[9]]
120 v1 += v6
121 v12 ^= v1
122 v12 = bits.RotateLeft64(v12, -32)
123 v11 += v12
124 v6 ^= v11
125 v6 = bits.RotateLeft64(v6, -24)
126 v2 += m[s[10]]
127 v2 += v7
128 v13 ^= v2
129 v13 = bits.RotateLeft64(v13, -32)
130 v8 += v13
131 v7 ^= v8
132 v7 = bits.RotateLeft64(v7, -24)
133 v3 += m[s[11]]
134 v3 += v4
135 v14 ^= v3
136 v14 = bits.RotateLeft64(v14, -32)
137 v9 += v14
138 v4 ^= v9
139 v4 = bits.RotateLeft64(v4, -24)
140
141 v0 += m[s[12]]
142 v0 += v5
143 v15 ^= v0
144 v15 = bits.RotateLeft64(v15, -16)
145 v10 += v15
146 v5 ^= v10
147 v5 = bits.RotateLeft64(v5, -63)
148 v1 += m[s[13]]
149 v1 += v6
150 v12 ^= v1
151 v12 = bits.RotateLeft64(v12, -16)
152 v11 += v12
153 v6 ^= v11
154 v6 = bits.RotateLeft64(v6, -63)
155 v2 += m[s[14]]
156 v2 += v7
157 v13 ^= v2
158 v13 = bits.RotateLeft64(v13, -16)
159 v8 += v13
160 v7 ^= v8
161 v7 = bits.RotateLeft64(v7, -63)
162 v3 += m[s[15]]
163 v3 += v4
164 v14 ^= v3
165 v14 = bits.RotateLeft64(v14, -16)
166 v9 += v14
167 v4 ^= v9
168 v4 = bits.RotateLeft64(v4, -63)
169
170 }
171
172 h[0] ^= v0 ^ v8
173 h[1] ^= v1 ^ v9
174 h[2] ^= v2 ^ v10
175 h[3] ^= v3 ^ v11
176 h[4] ^= v4 ^ v12
177 h[5] ^= v5 ^ v13
178 h[6] ^= v6 ^ v14
179 h[7] ^= v7 ^ v15
180 }
181 c[0], c[1] = c0, c1
182}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go b/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go
deleted file mode 100644
index 6e28668..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_ref.go
+++ /dev/null
@@ -1,11 +0,0 @@
1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !amd64 || purego || !gc
6
7package blake2b
8
9func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) {
10 hashBlocksGeneric(h, c, flag, blocks)
11}
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2x.go b/vendor/golang.org/x/crypto/blake2b/blake2x.go
deleted file mode 100644
index 52c414d..0000000
--- a/vendor/golang.org/x/crypto/blake2b/blake2x.go
+++ /dev/null
@@ -1,177 +0,0 @@
1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package blake2b
6
7import (
8 "encoding/binary"
9 "errors"
10 "io"
11)
12
13// XOF defines the interface to hash functions that
14// support arbitrary-length output.
15type XOF interface {
16 // Write absorbs more data into the hash's state. It panics if called
17 // after Read.
18 io.Writer
19
20 // Read reads more output from the hash. It returns io.EOF if the limit
21 // has been reached.
22 io.Reader
23
24 // Clone returns a copy of the XOF in its current state.
25 Clone() XOF
26
27 // Reset resets the XOF to its initial state.
28 Reset()
29}
30
31// OutputLengthUnknown can be used as the size argument to NewXOF to indicate
32// the length of the output is not known in advance.
33const OutputLengthUnknown = 0
34
35// magicUnknownOutputLength is a magic value for the output size that indicates
36// an unknown number of output bytes.
37const magicUnknownOutputLength = (1 << 32) - 1
38
39// maxOutputLength is the absolute maximum number of bytes to produce when the
40// number of output bytes is unknown.
41const maxOutputLength = (1 << 32) * 64
42
43// NewXOF creates a new variable-output-length hash. The hash either produce a
44// known number of bytes (1 <= size < 2**32-1), or an unknown number of bytes
45// (size == OutputLengthUnknown). In the latter case, an absolute limit of
46// 256GiB applies.
47//
48// A non-nil key turns the hash into a MAC. The key must between
49// zero and 32 bytes long.
50func NewXOF(size uint32, key []byte) (XOF, error) {
51 if len(key) > Size {
52 return nil, errKeySize
53 }
54 if size == magicUnknownOutputLength {
55 // 2^32-1 indicates an unknown number of bytes and thus isn't a
56 // valid length.
57 return nil, errors.New("blake2b: XOF length too large")
58 }
59 if size == OutputLengthUnknown {
60 size = magicUnknownOutputLength
61 }
62 x := &xof{
63 d: digest{
64 size: Size,
65 keyLen: len(key),
66 },
67 length: size,
68 }
69 copy(x.d.key[:], key)
70 x.Reset()
71 return x, nil
72}
73
74type xof struct {
75 d digest
76 length uint32
77 remaining uint64
78 cfg, root, block [Size]byte
79 offset int
80 nodeOffset uint32
81 readMode bool
82}
83
84func (x *xof) Write(p []byte) (n int, err error) {
85 if x.readMode {
86 panic("blake2b: write to XOF after read")
87 }
88 return x.d.Write(p)
89}
90
91func (x *xof) Clone() XOF {
92 clone := *x
93 return &clone
94}
95
96func (x *xof) Reset() {
97 x.cfg[0] = byte(Size)
98 binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length
99 binary.LittleEndian.PutUint32(x.cfg[12:], x.length) // XOF length
100 x.cfg[17] = byte(Size) // inner hash size
101
102 x.d.Reset()
103 x.d.h[1] ^= uint64(x.length) << 32
104
105 x.remaining = uint64(x.length)
106 if x.remaining == magicUnknownOutputLength {
107 x.remaining = maxOutputLength
108 }
109 x.offset, x.nodeOffset = 0, 0
110 x.readMode = false
111}
112
113func (x *xof) Read(p []byte) (n int, err error) {
114 if !x.readMode {
115 x.d.finalize(&x.root)
116 x.readMode = true
117 }
118
119 if x.remaining == 0 {
120 return 0, io.EOF
121 }
122
123 n = len(p)
124 if uint64(n) > x.remaining {
125 n = int(x.remaining)
126 p = p[:n]
127 }
128
129 if x.offset > 0 {
130 blockRemaining := Size - x.offset
131 if n < blockRemaining {
132 x.offset += copy(p, x.block[x.offset:])
133 x.remaining -= uint64(n)
134 return
135 }
136 copy(p, x.block[x.offset:])
137 p = p[blockRemaining:]
138 x.offset = 0
139 x.remaining -= uint64(blockRemaining)
140 }
141
142 for len(p) >= Size {
143 binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
144 x.nodeOffset++
145
146 x.d.initConfig(&x.cfg)
147 x.d.Write(x.root[:])
148 x.d.finalize(&x.block)
149
150 copy(p, x.block[:])
151 p = p[Size:]
152 x.remaining -= uint64(Size)
153 }
154
155 if todo := len(p); todo > 0 {
156 if x.remaining < uint64(Size) {
157 x.cfg[0] = byte(x.remaining)
158 }
159 binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset)
160 x.nodeOffset++
161
162 x.d.initConfig(&x.cfg)
163 x.d.Write(x.root[:])
164 x.d.finalize(&x.block)
165
166 x.offset = copy(p, x.block[:todo])
167 x.remaining -= uint64(todo)
168 }
169 return
170}
171
172func (d *digest) initConfig(cfg *[Size]byte) {
173 d.offset, d.c[0], d.c[1] = 0, 0, 0
174 for i := range d.h {
175 d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(cfg[i*8:])
176 }
177}
diff --git a/vendor/golang.org/x/crypto/blake2b/register.go b/vendor/golang.org/x/crypto/blake2b/register.go
deleted file mode 100644
index d9fcac3..0000000
--- a/vendor/golang.org/x/crypto/blake2b/register.go
+++ /dev/null
@@ -1,32 +0,0 @@
1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build go1.9
6
7package blake2b
8
9import (
10 "crypto"
11 "hash"
12)
13
14func init() {
15 newHash256 := func() hash.Hash {
16 h, _ := New256(nil)
17 return h
18 }
19 newHash384 := func() hash.Hash {
20 h, _ := New384(nil)
21 return h
22 }
23
24 newHash512 := func() hash.Hash {
25 h, _ := New512(nil)
26 return h
27 }
28
29 crypto.RegisterHash(crypto.BLAKE2b_256, newHash256)
30 crypto.RegisterHash(crypto.BLAKE2b_384, newHash384)
31 crypto.RegisterHash(crypto.BLAKE2b_512, newHash512)
32}