Delete vendor directory

author: Rutger Broekhoff 2024-01-02 18:56:31 +0100
committer: Rutger Broekhoff 2024-01-02 18:56:31 +0100
commit: 8db41da676ac8368ef7c2549d56239a5ff5eedde (patch)
tree: 09c427fd66de2ec1ebffc8342f5fdbb84b0701b5 /vendor/github.com/minio/md5-simd/block16_amd64.s
parent: d4f75fb6db22e57577867445a022227e70958931 (diff)
download: gitolfs3-8db41da676ac8368ef7c2549d56239a5ff5eedde.tar.gz
gitolfs3-8db41da676ac8368ef7c2549d56239a5ff5eedde.zip
1 files changed, 0 insertions, 228 deletions
diff --git a/vendor/github.com/minio/md5-simd/block16_amd64.s b/vendor/github.com/minio/md5-simd/block16_amd64.s
deleted file mode 100644
index be0a43a..0000000
--- a/vendor/github.com/minio/md5-simd/block16_amd64.s
+++ /dev/null
@@ -1,228 +0,0 @@
-// Copyright (c) 2020 MinIO Inc. All rights reserved.
-// Use of this source code is governed by a license that can be
-// found in the LICENSE file.
-//+build !noasm,!appengine,gc
-// This is the AVX512 implementation of the MD5 block function (16-way parallel)
-#define prep(index) \
-        KMOVQ      kmask, ktmp                      \
-        VPGATHERDD index*4(base)(ptrs*1), ktmp, mem
-#define ROUND1(a, b, c, d, index, const, shift) \
-        VPXORQ     c, tmp, tmp            \
-        VPADDD     64*const(consts), a, a \
-        VPADDD     mem, a, a              \
-        VPTERNLOGD $0x6C, b, d, tmp       \
-        prep(index)                       \
-        VPADDD     tmp, a, a              \
-        VPROLD     $shift, a, a           \
-        VMOVAPD    c, tmp                 \
-        VPADDD     b, a, a
-#define ROUND1noload(a, b, c, d, const, shift) \
-        VPXORQ     c, tmp, tmp            \
-        VPADDD     64*const(consts), a, a \
-        VPADDD     mem, a, a              \
-        VPTERNLOGD $0x6C, b, d, tmp       \
-        VPADDD     tmp, a, a              \
-        VPROLD     $shift, a, a           \
-        VMOVAPD    c, tmp                 \
-        VPADDD     b, a, a
-#define ROUND2(a, b, c, d, zreg, const, shift) \
-        VPADDD     64*const(consts), a, a \
-        VPADDD     zreg, a, a             \
-        VANDNPD    c, tmp, tmp            \
-        VPTERNLOGD $0xEC, b, tmp, tmp2    \
-        VMOVAPD    c, tmp                 \
-        VPADDD     tmp2, a, a             \
-        VMOVAPD    c, tmp2                \
-        VPROLD     $shift, a, a           \
-        VPADDD     b, a, a
-#define ROUND3(a, b, c, d, zreg, const, shift) \
-        VPADDD     64*const(consts), a, a \
-        VPADDD     zreg, a, a             \
-        VPTERNLOGD $0x96, b, d, tmp       \
-        VPADDD     tmp, a, a              \
-        VPROLD     $shift, a, a           \
-        VMOVAPD    b, tmp                 \
-        VPADDD     b, a, a
-#define ROUND4(a, b, c, d, zreg, const, shift) \
-        VPADDD     64*const(consts), a, a \
-        VPADDD     zreg, a, a             \
-        VPTERNLOGD $0x36, b, c, tmp       \
-        VPADDD     tmp, a, a              \
-        VPROLD     $shift, a, a           \
-        VPXORQ     c, ones, tmp           \
-        VPADDD     b, a, a
-TEXT ·block16(SB), 4, $0-40
-        MOVQ  state+0(FP), BX
-        MOVQ  base+8(FP), SI
-        MOVQ  ptrs+16(FP), AX
-        KMOVQ mask+24(FP), K1
-        MOVQ  n+32(FP), DX
-        MOVQ  ·avx512md5consts+0(SB), DI
-#define a Z0
-#define b Z1
-#define c Z2
-#define d Z3
-#define sa Z4
-#define sb Z5
-#define sc Z6
-#define sd Z7
-#define tmp       Z8
-#define tmp2      Z9
-#define ptrs     Z10
-#define ones     Z12
-#define mem      Z15
-#define kmask  K1
-#define ktmp   K3
-// ----------------------------------------------------------
-// Registers Z16 through to Z31 are used for caching purposes
-// ----------------------------------------------------------
-#define dig    BX
-#define count  DX
-#define base   SI
-#define consts DI
-        // load digest into state registers
-        VMOVUPD (dig), a
-        VMOVUPD 0x40(dig), b
-        VMOVUPD 0x80(dig), c
-        VMOVUPD 0xc0(dig), d
-        // load source pointers
-        VMOVUPD 0x00(AX), ptrs
-        MOVQ         $-1, AX
-        VPBROADCASTQ AX, ones
-loop:
-        VMOVAPD a, sa
-        VMOVAPD b, sb
-        VMOVAPD c, sc
-        VMOVAPD d, sd
-        prep(0)
-        VMOVAPD d, tmp
-        VMOVAPD mem, Z16
-        ROUND1(a,b,c,d, 1,0x00, 7)
-        VMOVAPD mem, Z17
-        ROUND1(d,a,b,c, 2,0x01,12)
-        VMOVAPD mem, Z18
-        ROUND1(c,d,a,b, 3,0x02,17)
-        VMOVAPD mem, Z19
-        ROUND1(b,c,d,a, 4,0x03,22)
-        VMOVAPD mem, Z20
-        ROUND1(a,b,c,d, 5,0x04, 7)
-        VMOVAPD mem, Z21
-        ROUND1(d,a,b,c, 6,0x05,12)
-        VMOVAPD mem, Z22
-        ROUND1(c,d,a,b, 7,0x06,17)
-        VMOVAPD mem, Z23
-        ROUND1(b,c,d,a, 8,0x07,22)
-        VMOVAPD mem, Z24
-        ROUND1(a,b,c,d, 9,0x08, 7)
-        VMOVAPD mem, Z25
-        ROUND1(d,a,b,c,10,0x09,12)
-        VMOVAPD mem, Z26
-        ROUND1(c,d,a,b,11,0x0a,17)
-        VMOVAPD mem, Z27
-        ROUND1(b,c,d,a,12,0x0b,22)
-        VMOVAPD mem, Z28
-        ROUND1(a,b,c,d,13,0x0c, 7)
-        VMOVAPD mem, Z29
-        ROUND1(d,a,b,c,14,0x0d,12)
-        VMOVAPD mem, Z30
-        ROUND1(c,d,a,b,15,0x0e,17)
-        VMOVAPD mem, Z31
-        ROUND1noload(b,c,d,a, 0x0f,22)
-        VMOVAPD d, tmp
-        VMOVAPD d, tmp2
-        ROUND2(a,b,c,d, Z17,0x10, 5)
-        ROUND2(d,a,b,c, Z22,0x11, 9)
-        ROUND2(c,d,a,b, Z27,0x12,14)
-        ROUND2(b,c,d,a, Z16,0x13,20)
-        ROUND2(a,b,c,d, Z21,0x14, 5)
-        ROUND2(d,a,b,c, Z26,0x15, 9)
-        ROUND2(c,d,a,b, Z31,0x16,14)
-        ROUND2(b,c,d,a, Z20,0x17,20)
-        ROUND2(a,b,c,d, Z25,0x18, 5)
-        ROUND2(d,a,b,c, Z30,0x19, 9)
-        ROUND2(c,d,a,b, Z19,0x1a,14)
-        ROUND2(b,c,d,a, Z24,0x1b,20)
-        ROUND2(a,b,c,d, Z29,0x1c, 5)
-        ROUND2(d,a,b,c, Z18,0x1d, 9)
-        ROUND2(c,d,a,b, Z23,0x1e,14)
-        ROUND2(b,c,d,a, Z28,0x1f,20)
-        VMOVAPD c, tmp
-        ROUND3(a,b,c,d, Z21,0x20, 4)
-        ROUND3(d,a,b,c, Z24,0x21,11)
-        ROUND3(c,d,a,b, Z27,0x22,16)
-        ROUND3(b,c,d,a, Z30,0x23,23)
-        ROUND3(a,b,c,d, Z17,0x24, 4)
-        ROUND3(d,a,b,c, Z20,0x25,11)
-        ROUND3(c,d,a,b, Z23,0x26,16)
-        ROUND3(b,c,d,a, Z26,0x27,23)
-        ROUND3(a,b,c,d, Z29,0x28, 4)
-        ROUND3(d,a,b,c, Z16,0x29,11)
-        ROUND3(c,d,a,b, Z19,0x2a,16)
-        ROUND3(b,c,d,a, Z22,0x2b,23)
-        ROUND3(a,b,c,d, Z25,0x2c, 4)
-        ROUND3(d,a,b,c, Z28,0x2d,11)
-        ROUND3(c,d,a,b, Z31,0x2e,16)
-        ROUND3(b,c,d,a, Z18,0x2f,23)
-        VPXORQ d, ones, tmp
-        ROUND4(a,b,c,d, Z16,0x30, 6)
-        ROUND4(d,a,b,c, Z23,0x31,10)
-        ROUND4(c,d,a,b, Z30,0x32,15)
-        ROUND4(b,c,d,a, Z21,0x33,21)
-        ROUND4(a,b,c,d, Z28,0x34, 6)
-        ROUND4(d,a,b,c, Z19,0x35,10)
-        ROUND4(c,d,a,b, Z26,0x36,15)
-        ROUND4(b,c,d,a, Z17,0x37,21)
-        ROUND4(a,b,c,d, Z24,0x38, 6)
-        ROUND4(d,a,b,c, Z31,0x39,10)
-        ROUND4(c,d,a,b, Z22,0x3a,15)
-        ROUND4(b,c,d,a, Z29,0x3b,21)
-        ROUND4(a,b,c,d, Z20,0x3c, 6)
-        ROUND4(d,a,b,c, Z27,0x3d,10)
-        ROUND4(c,d,a,b, Z18,0x3e,15)
-        ROUND4(b,c,d,a, Z25,0x3f,21)
-        VPADDD sa, a, a
-        VPADDD sb, b, b
-        VPADDD sc, c, c
-        VPADDD sd, d, d
-        LEAQ 64(base), base
-        SUBQ $64, count
-        JNE  loop
-        VMOVUPD a, (dig)
-        VMOVUPD b, 0x40(dig)
-        VMOVUPD c, 0x80(dig)
-        VMOVUPD d, 0xc0(dig)
-        VZEROUPPER
-        RET
author	Rutger Broekhoff	2024-01-02 18:56:31 +0100
committer	Rutger Broekhoff	2024-01-02 18:56:31 +0100
commit	8db41da676ac8368ef7c2549d56239a5ff5eedde (patch)
tree	09c427fd66de2ec1ebffc8342f5fdbb84b0701b5 /vendor/github.com/minio/md5-simd/block16_amd64.s
parent	d4f75fb6db22e57577867445a022227e70958931 (diff)
download	gitolfs3-8db41da676ac8368ef7c2549d56239a5ff5eedde.tar.gz gitolfs3-8db41da676ac8368ef7c2549d56239a5ff5eedde.zip

diff --git a/vendor/github.com/minio/md5-simd/block16_amd64.s b/vendor/github.com/minio/md5-simd/block16_amd64.s deleted file mode 100644 index be0a43a..0000000 --- a/vendor/github.com/minio/md5-simd/block16_amd64.s +++ /dev/null
@@ -1,228 +0,0 @@
1	// Copyright (c) 2020 MinIO Inc. All rights reserved.
2	// Use of this source code is governed by a license that can be
3	// found in the LICENSE file.
4
5	//+build !noasm,!appengine,gc
6
7	// This is the AVX512 implementation of the MD5 block function (16-way parallel)
8
9	#define prep(index) \
10	KMOVQ kmask, ktmp \
11	VPGATHERDD index4(base)(ptrs1), ktmp, mem
12
13	#define ROUND1(a, b, c, d, index, const, shift) \
14	VPXORQ c, tmp, tmp \
15	VPADDD 64*const(consts), a, a \
16	VPADDD mem, a, a \
17	VPTERNLOGD $0x6C, b, d, tmp \
18	prep(index) \
19	VPADDD tmp, a, a \
20	VPROLD $shift, a, a \
21	VMOVAPD c, tmp \
22	VPADDD b, a, a
23
24	#define ROUND1noload(a, b, c, d, const, shift) \
25	VPXORQ c, tmp, tmp \
26	VPADDD 64*const(consts), a, a \
27	VPADDD mem, a, a \
28	VPTERNLOGD $0x6C, b, d, tmp \
29	VPADDD tmp, a, a \
30	VPROLD $shift, a, a \
31	VMOVAPD c, tmp \
32	VPADDD b, a, a
33
34	#define ROUND2(a, b, c, d, zreg, const, shift) \
35	VPADDD 64*const(consts), a, a \
36	VPADDD zreg, a, a \
37	VANDNPD c, tmp, tmp \
38	VPTERNLOGD $0xEC, b, tmp, tmp2 \
39	VMOVAPD c, tmp \
40	VPADDD tmp2, a, a \
41	VMOVAPD c, tmp2 \
42	VPROLD $shift, a, a \
43	VPADDD b, a, a
44
45	#define ROUND3(a, b, c, d, zreg, const, shift) \
46	VPADDD 64*const(consts), a, a \
47	VPADDD zreg, a, a \
48	VPTERNLOGD $0x96, b, d, tmp \
49	VPADDD tmp, a, a \
50	VPROLD $shift, a, a \
51	VMOVAPD b, tmp \
52	VPADDD b, a, a
53
54	#define ROUND4(a, b, c, d, zreg, const, shift) \
55	VPADDD 64*const(consts), a, a \
56	VPADDD zreg, a, a \
57	VPTERNLOGD $0x36, b, c, tmp \
58	VPADDD tmp, a, a \
59	VPROLD $shift, a, a \
60	VPXORQ c, ones, tmp \
61	VPADDD b, a, a
62
63	TEXT ·block16(SB), 4, $0-40
64
65	MOVQ state+0(FP), BX
66	MOVQ base+8(FP), SI
67	MOVQ ptrs+16(FP), AX
68	KMOVQ mask+24(FP), K1
69	MOVQ n+32(FP), DX
70	MOVQ ·avx512md5consts+0(SB), DI
71
72	#define a Z0
73	#define b Z1
74	#define c Z2
75	#define d Z3
76
77	#define sa Z4
78	#define sb Z5
79	#define sc Z6
80	#define sd Z7
81
82	#define tmp Z8
83	#define tmp2 Z9
84	#define ptrs Z10
85	#define ones Z12
86	#define mem Z15
87
88	#define kmask K1
89	#define ktmp K3
90
91	// ----------------------------------------------------------
92	// Registers Z16 through to Z31 are used for caching purposes
93	// ----------------------------------------------------------
94
95	#define dig BX
96	#define count DX
97	#define base SI
98	#define consts DI
99
100	// load digest into state registers
101	VMOVUPD (dig), a
102	VMOVUPD 0x40(dig), b
103	VMOVUPD 0x80(dig), c
104	VMOVUPD 0xc0(dig), d
105
106	// load source pointers
107	VMOVUPD 0x00(AX), ptrs
108
109	MOVQ $-1, AX
110	VPBROADCASTQ AX, ones
111
112	loop:
113	VMOVAPD a, sa
114	VMOVAPD b, sb
115	VMOVAPD c, sc
116	VMOVAPD d, sd
117
118	prep(0)
119	VMOVAPD d, tmp
120	VMOVAPD mem, Z16
121
122	ROUND1(a,b,c,d, 1,0x00, 7)
123	VMOVAPD mem, Z17
124	ROUND1(d,a,b,c, 2,0x01,12)
125	VMOVAPD mem, Z18
126	ROUND1(c,d,a,b, 3,0x02,17)
127	VMOVAPD mem, Z19
128	ROUND1(b,c,d,a, 4,0x03,22)
129	VMOVAPD mem, Z20
130	ROUND1(a,b,c,d, 5,0x04, 7)
131	VMOVAPD mem, Z21
132	ROUND1(d,a,b,c, 6,0x05,12)
133	VMOVAPD mem, Z22
134	ROUND1(c,d,a,b, 7,0x06,17)
135	VMOVAPD mem, Z23
136	ROUND1(b,c,d,a, 8,0x07,22)
137	VMOVAPD mem, Z24
138	ROUND1(a,b,c,d, 9,0x08, 7)
139	VMOVAPD mem, Z25
140	ROUND1(d,a,b,c,10,0x09,12)
141	VMOVAPD mem, Z26
142	ROUND1(c,d,a,b,11,0x0a,17)
143	VMOVAPD mem, Z27
144	ROUND1(b,c,d,a,12,0x0b,22)
145	VMOVAPD mem, Z28
146	ROUND1(a,b,c,d,13,0x0c, 7)
147	VMOVAPD mem, Z29
148	ROUND1(d,a,b,c,14,0x0d,12)
149	VMOVAPD mem, Z30
150	ROUND1(c,d,a,b,15,0x0e,17)
151	VMOVAPD mem, Z31
152
153	ROUND1noload(b,c,d,a, 0x0f,22)
154
155	VMOVAPD d, tmp
156	VMOVAPD d, tmp2
157
158	ROUND2(a,b,c,d, Z17,0x10, 5)
159	ROUND2(d,a,b,c, Z22,0x11, 9)
160	ROUND2(c,d,a,b, Z27,0x12,14)
161	ROUND2(b,c,d,a, Z16,0x13,20)
162	ROUND2(a,b,c,d, Z21,0x14, 5)
163	ROUND2(d,a,b,c, Z26,0x15, 9)
164	ROUND2(c,d,a,b, Z31,0x16,14)
165	ROUND2(b,c,d,a, Z20,0x17,20)
166	ROUND2(a,b,c,d, Z25,0x18, 5)
167	ROUND2(d,a,b,c, Z30,0x19, 9)
168	ROUND2(c,d,a,b, Z19,0x1a,14)
169	ROUND2(b,c,d,a, Z24,0x1b,20)
170	ROUND2(a,b,c,d, Z29,0x1c, 5)
171	ROUND2(d,a,b,c, Z18,0x1d, 9)
172	ROUND2(c,d,a,b, Z23,0x1e,14)
173	ROUND2(b,c,d,a, Z28,0x1f,20)
174
175	VMOVAPD c, tmp
176
177	ROUND3(a,b,c,d, Z21,0x20, 4)
178	ROUND3(d,a,b,c, Z24,0x21,11)
179	ROUND3(c,d,a,b, Z27,0x22,16)
180	ROUND3(b,c,d,a, Z30,0x23,23)
181	ROUND3(a,b,c,d, Z17,0x24, 4)
182	ROUND3(d,a,b,c, Z20,0x25,11)
183	ROUND3(c,d,a,b, Z23,0x26,16)
184	ROUND3(b,c,d,a, Z26,0x27,23)
185	ROUND3(a,b,c,d, Z29,0x28, 4)
186	ROUND3(d,a,b,c, Z16,0x29,11)
187	ROUND3(c,d,a,b, Z19,0x2a,16)
188	ROUND3(b,c,d,a, Z22,0x2b,23)
189	ROUND3(a,b,c,d, Z25,0x2c, 4)
190	ROUND3(d,a,b,c, Z28,0x2d,11)
191	ROUND3(c,d,a,b, Z31,0x2e,16)
192	ROUND3(b,c,d,a, Z18,0x2f,23)
193
194	VPXORQ d, ones, tmp
195
196	ROUND4(a,b,c,d, Z16,0x30, 6)
197	ROUND4(d,a,b,c, Z23,0x31,10)
198	ROUND4(c,d,a,b, Z30,0x32,15)
199	ROUND4(b,c,d,a, Z21,0x33,21)
200	ROUND4(a,b,c,d, Z28,0x34, 6)
201	ROUND4(d,a,b,c, Z19,0x35,10)
202	ROUND4(c,d,a,b, Z26,0x36,15)
203	ROUND4(b,c,d,a, Z17,0x37,21)
204	ROUND4(a,b,c,d, Z24,0x38, 6)
205	ROUND4(d,a,b,c, Z31,0x39,10)
206	ROUND4(c,d,a,b, Z22,0x3a,15)
207	ROUND4(b,c,d,a, Z29,0x3b,21)
208	ROUND4(a,b,c,d, Z20,0x3c, 6)
209	ROUND4(d,a,b,c, Z27,0x3d,10)
210	ROUND4(c,d,a,b, Z18,0x3e,15)
211	ROUND4(b,c,d,a, Z25,0x3f,21)
212
213	VPADDD sa, a, a
214	VPADDD sb, b, b
215	VPADDD sc, c, c
216	VPADDD sd, d, d
217
218	LEAQ 64(base), base
219	SUBQ $64, count
220	JNE loop
221
222	VMOVUPD a, (dig)
223	VMOVUPD b, 0x40(dig)
224	VMOVUPD c, 0x80(dig)
225	VMOVUPD d, 0xc0(dig)
226
227	VZEROUPPER
228	RET