aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/minio/sha256-simd
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/minio/sha256-simd')
-rw-r--r--vendor/github.com/minio/sha256-simd/.gitignore1
-rw-r--r--vendor/github.com/minio/sha256-simd/LICENSE202
-rw-r--r--vendor/github.com/minio/sha256-simd/README.md137
-rw-r--r--vendor/github.com/minio/sha256-simd/cpuid_other.go50
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256.go468
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm686
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go501
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s267
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256block_amd64.go31
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256block_amd64.s266
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256block_arm64.go37
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256block_arm64.s192
-rw-r--r--vendor/github.com/minio/sha256-simd/sha256block_other.go29
-rw-r--r--vendor/github.com/minio/sha256-simd/test-architectures.sh15
14 files changed, 2882 insertions, 0 deletions
diff --git a/vendor/github.com/minio/sha256-simd/.gitignore b/vendor/github.com/minio/sha256-simd/.gitignore
new file mode 100644
index 0000000..c56069f
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/.gitignore
@@ -0,0 +1 @@
*.test \ No newline at end of file
diff --git a/vendor/github.com/minio/sha256-simd/LICENSE b/vendor/github.com/minio/sha256-simd/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/LICENSE
@@ -0,0 +1,202 @@
1
2 Apache License
3 Version 2.0, January 2004
4 http://www.apache.org/licenses/
5
6 TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
8 1. Definitions.
9
10 "License" shall mean the terms and conditions for use, reproduction,
11 and distribution as defined by Sections 1 through 9 of this document.
12
13 "Licensor" shall mean the copyright owner or entity authorized by
14 the copyright owner that is granting the License.
15
16 "Legal Entity" shall mean the union of the acting entity and all
17 other entities that control, are controlled by, or are under common
18 control with that entity. For the purposes of this definition,
19 "control" means (i) the power, direct or indirect, to cause the
20 direction or management of such entity, whether by contract or
21 otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 outstanding shares, or (iii) beneficial ownership of such entity.
23
24 "You" (or "Your") shall mean an individual or Legal Entity
25 exercising permissions granted by this License.
26
27 "Source" form shall mean the preferred form for making modifications,
28 including but not limited to software source code, documentation
29 source, and configuration files.
30
31 "Object" form shall mean any form resulting from mechanical
32 transformation or translation of a Source form, including but
33 not limited to compiled object code, generated documentation,
34 and conversions to other media types.
35
36 "Work" shall mean the work of authorship, whether in Source or
37 Object form, made available under the License, as indicated by a
38 copyright notice that is included in or attached to the work
39 (an example is provided in the Appendix below).
40
41 "Derivative Works" shall mean any work, whether in Source or Object
42 form, that is based on (or derived from) the Work and for which the
43 editorial revisions, annotations, elaborations, or other modifications
44 represent, as a whole, an original work of authorship. For the purposes
45 of this License, Derivative Works shall not include works that remain
46 separable from, or merely link (or bind by name) to the interfaces of,
47 the Work and Derivative Works thereof.
48
49 "Contribution" shall mean any work of authorship, including
50 the original version of the Work and any modifications or additions
51 to that Work or Derivative Works thereof, that is intentionally
52 submitted to Licensor for inclusion in the Work by the copyright owner
53 or by an individual or Legal Entity authorized to submit on behalf of
54 the copyright owner. For the purposes of this definition, "submitted"
55 means any form of electronic, verbal, or written communication sent
56 to the Licensor or its representatives, including but not limited to
57 communication on electronic mailing lists, source code control systems,
58 and issue tracking systems that are managed by, or on behalf of, the
59 Licensor for the purpose of discussing and improving the Work, but
60 excluding communication that is conspicuously marked or otherwise
61 designated in writing by the copyright owner as "Not a Contribution."
62
63 "Contributor" shall mean Licensor and any individual or Legal Entity
64 on behalf of whom a Contribution has been received by Licensor and
65 subsequently incorporated within the Work.
66
67 2. Grant of Copyright License. Subject to the terms and conditions of
68 this License, each Contributor hereby grants to You a perpetual,
69 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 copyright license to reproduce, prepare Derivative Works of,
71 publicly display, publicly perform, sublicense, and distribute the
72 Work and such Derivative Works in Source or Object form.
73
74 3. Grant of Patent License. Subject to the terms and conditions of
75 this License, each Contributor hereby grants to You a perpetual,
76 worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 (except as stated in this section) patent license to make, have made,
78 use, offer to sell, sell, import, and otherwise transfer the Work,
79 where such license applies only to those patent claims licensable
80 by such Contributor that are necessarily infringed by their
81 Contribution(s) alone or by combination of their Contribution(s)
82 with the Work to which such Contribution(s) was submitted. If You
83 institute patent litigation against any entity (including a
84 cross-claim or counterclaim in a lawsuit) alleging that the Work
85 or a Contribution incorporated within the Work constitutes direct
86 or contributory patent infringement, then any patent licenses
87 granted to You under this License for that Work shall terminate
88 as of the date such litigation is filed.
89
90 4. Redistribution. You may reproduce and distribute copies of the
91 Work or Derivative Works thereof in any medium, with or without
92 modifications, and in Source or Object form, provided that You
93 meet the following conditions:
94
95 (a) You must give any other recipients of the Work or
96 Derivative Works a copy of this License; and
97
98 (b) You must cause any modified files to carry prominent notices
99 stating that You changed the files; and
100
101 (c) You must retain, in the Source form of any Derivative Works
102 that You distribute, all copyright, patent, trademark, and
103 attribution notices from the Source form of the Work,
104 excluding those notices that do not pertain to any part of
105 the Derivative Works; and
106
107 (d) If the Work includes a "NOTICE" text file as part of its
108 distribution, then any Derivative Works that You distribute must
109 include a readable copy of the attribution notices contained
110 within such NOTICE file, excluding those notices that do not
111 pertain to any part of the Derivative Works, in at least one
112 of the following places: within a NOTICE text file distributed
113 as part of the Derivative Works; within the Source form or
114 documentation, if provided along with the Derivative Works; or,
115 within a display generated by the Derivative Works, if and
116 wherever such third-party notices normally appear. The contents
117 of the NOTICE file are for informational purposes only and
118 do not modify the License. You may add Your own attribution
119 notices within Derivative Works that You distribute, alongside
120 or as an addendum to the NOTICE text from the Work, provided
121 that such additional attribution notices cannot be construed
122 as modifying the License.
123
124 You may add Your own copyright statement to Your modifications and
125 may provide additional or different license terms and conditions
126 for use, reproduction, or distribution of Your modifications, or
127 for any such Derivative Works as a whole, provided Your use,
128 reproduction, and distribution of the Work otherwise complies with
129 the conditions stated in this License.
130
131 5. Submission of Contributions. Unless You explicitly state otherwise,
132 any Contribution intentionally submitted for inclusion in the Work
133 by You to the Licensor shall be under the terms and conditions of
134 this License, without any additional terms or conditions.
135 Notwithstanding the above, nothing herein shall supersede or modify
136 the terms of any separate license agreement you may have executed
137 with Licensor regarding such Contributions.
138
139 6. Trademarks. This License does not grant permission to use the trade
140 names, trademarks, service marks, or product names of the Licensor,
141 except as required for reasonable and customary use in describing the
142 origin of the Work and reproducing the content of the NOTICE file.
143
144 7. Disclaimer of Warranty. Unless required by applicable law or
145 agreed to in writing, Licensor provides the Work (and each
146 Contributor provides its Contributions) on an "AS IS" BASIS,
147 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 implied, including, without limitation, any warranties or conditions
149 of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 PARTICULAR PURPOSE. You are solely responsible for determining the
151 appropriateness of using or redistributing the Work and assume any
152 risks associated with Your exercise of permissions under this License.
153
154 8. Limitation of Liability. In no event and under no legal theory,
155 whether in tort (including negligence), contract, or otherwise,
156 unless required by applicable law (such as deliberate and grossly
157 negligent acts) or agreed to in writing, shall any Contributor be
158 liable to You for damages, including any direct, indirect, special,
159 incidental, or consequential damages of any character arising as a
160 result of this License or out of the use or inability to use the
161 Work (including but not limited to damages for loss of goodwill,
162 work stoppage, computer failure or malfunction, or any and all
163 other commercial damages or losses), even if such Contributor
164 has been advised of the possibility of such damages.
165
166 9. Accepting Warranty or Additional Liability. While redistributing
167 the Work or Derivative Works thereof, You may choose to offer,
168 and charge a fee for, acceptance of support, warranty, indemnity,
169 or other liability obligations and/or rights consistent with this
170 License. However, in accepting such obligations, You may act only
171 on Your own behalf and on Your sole responsibility, not on behalf
172 of any other Contributor, and only if You agree to indemnify,
173 defend, and hold each Contributor harmless for any liability
174 incurred by, or claims asserted against, such Contributor by reason
175 of your accepting any such warranty or additional liability.
176
177 END OF TERMS AND CONDITIONS
178
179 APPENDIX: How to apply the Apache License to your work.
180
181 To apply the Apache License to your work, attach the following
182 boilerplate notice, with the fields enclosed by brackets "[]"
183 replaced with your own identifying information. (Don't include
184 the brackets!) The text should be enclosed in the appropriate
185 comment syntax for the file format. We also recommend that a
186 file or class name and description of purpose be included on the
187 same "printed page" as the copyright notice for easier
188 identification within third-party archives.
189
190 Copyright [yyyy] [name of copyright owner]
191
192 Licensed under the Apache License, Version 2.0 (the "License");
193 you may not use this file except in compliance with the License.
194 You may obtain a copy of the License at
195
196 http://www.apache.org/licenses/LICENSE-2.0
197
198 Unless required by applicable law or agreed to in writing, software
199 distributed under the License is distributed on an "AS IS" BASIS,
200 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 See the License for the specific language governing permissions and
202 limitations under the License.
diff --git a/vendor/github.com/minio/sha256-simd/README.md b/vendor/github.com/minio/sha256-simd/README.md
new file mode 100644
index 0000000..6117488
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/README.md
@@ -0,0 +1,137 @@
1# sha256-simd
2
3Accelerate SHA256 computations in pure Go using AVX512, SHA Extensions for x86 and ARM64 for ARM.
4On AVX512 it provides an up to 8x improvement (over 3 GB/s per core).
5SHA Extensions give a performance boost of close to 4x over native.
6
7## Introduction
8
9This package is designed as a replacement for `crypto/sha256`.
10For ARM CPUs with the Cryptography Extensions, advantage is taken of the SHA2 instructions resulting in a massive performance improvement.
11
12This package uses Golang assembly.
13The AVX512 version is based on the Intel's "multi-buffer crypto library for IPSec" whereas the other Intel implementations are described in "Fast SHA-256 Implementations on Intel Architecture Processors" by J. Guilford et al.
14
15## Support for Intel SHA Extensions
16
17Support for the Intel SHA Extensions has been added by Kristofer Peterson (@svenski123), originally developed for spacemeshos [here](https://github.com/spacemeshos/POET/issues/23). On CPUs that support it (known thus far Intel Celeron J3455 and AMD Ryzen) it gives a significant boost in performance (with thanks to @AudriusButkevicius for reporting the results; full results [here](https://github.com/minio/sha256-simd/pull/37#issuecomment-451607827)).
18
19```
20$ benchcmp avx2.txt sha-ext.txt
21benchmark AVX2 MB/s SHA Ext MB/s speedup
22BenchmarkHash5M 514.40 1975.17 3.84x
23```
24
25Thanks to Kristofer Peterson, we also added additional performance changes such as optimized padding,
26endian conversions which sped up all implementations i.e. Intel SHA alone while doubled performance for small sizes,
27the other changes increased everything roughly 50%.
28
29## Support for AVX512
30
31We have added support for AVX512 which results in an up to 8x performance improvement over AVX2 (3.0 GHz Xeon Platinum 8124M CPU):
32
33```
34$ benchcmp avx2.txt avx512.txt
35benchmark AVX2 MB/s AVX512 MB/s speedup
36BenchmarkHash5M 448.62 3498.20 7.80x
37```
38
39The original code was developed by Intel as part of the [multi-buffer crypto library](https://github.com/intel/intel-ipsec-mb) for IPSec or more specifically this [AVX512](https://github.com/intel/intel-ipsec-mb/blob/master/avx512/sha256_x16_avx512.asm) implementation. The key idea behind it is to process a total of 16 checksums in parallel by “transposing” 16 (independent) messages of 64 bytes between a total of 16 ZMM registers (each 64 bytes wide).
40
41Transposing the input messages means that in order to take full advantage of the speedup you need to have a (server) workload where multiple threads are doing SHA256 calculations in parallel. Unfortunately for this algorithm it is not possible for two message blocks processed in parallel to be dependent on one another — because then the (interim) result of the first part of the message has to be an input into the processing of the second part of the message.
42
43Whereas the original Intel C implementation requires some sort of explicit scheduling of messages to be processed in parallel, for Golang it makes sense to take advantage of channels in order to group messages together and use channels as well for sending back the results (thereby effectively decoupling the calculations). We have implemented a fairly simple scheduling mechanism that seems to work well in practice.
44
45Due to this different way of scheduling, we decided to use an explicit method to instantiate the AVX512 version. Essentially one or more AVX512 processing servers ([`Avx512Server`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L294)) have to be created whereby each server can hash over 3 GB/s on a single core. An `hash.Hash` object ([`Avx512Digest`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L45)) is then instantiated using one of these servers and used in the regular fashion:
46
47```go
48import "github.com/minio/sha256-simd"
49
50func main() {
51 server := sha256.NewAvx512Server()
52 h512 := sha256.NewAvx512(server)
53 h512.Write(fileBlock)
54 digest := h512.Sum([]byte{})
55}
56```
57
58Note that, because of the scheduling overhead, for small messages (< 1 MB) you will be better off using the regular SHA256 hashing (but those are typically not performance critical anyway). Some other tips to get the best performance:
59* Have many go routines doing SHA256 calculations in parallel.
60* Try to Write() messages in multiples of 64 bytes.
61* Try to keep the overall length of messages to a roughly similar size ie. 5 MB (this way all 16 ‘lanes’ in the AVX512 computations are contributing as much as possible).
62
63More detailed information can be found in this [blog](https://blog.minio.io/accelerate-sha256-up-to-8x-over-3-gb-s-per-core-with-avx512-a0b1d64f78f) post including scaling across cores.
64
65## Drop-In Replacement
66
67The following code snippet shows how you can use `github.com/minio/sha256-simd`.
68This will automatically select the fastest method for the architecture on which it will be executed.
69
70```go
71import "github.com/minio/sha256-simd"
72
73func main() {
74 ...
75 shaWriter := sha256.New()
76 io.Copy(shaWriter, file)
77 ...
78}
79```
80
81## Performance
82
83Below is the speed in MB/s for a single core (ranked fast to slow) for blocks larger than 1 MB.
84
85| Processor | SIMD | Speed (MB/s) |
86| --------------------------------- | ------- | ------------:|
87| 3.0 GHz Intel Xeon Platinum 8124M | AVX512 | 3498 |
88| 3.7 GHz AMD Ryzen 7 2700X | SHA Ext | 1979 |
89| 1.2 GHz ARM Cortex-A53 | ARM64 | 638 |
90
91## asm2plan9s
92
93In order to be able to work more easily with AVX512/AVX2 instructions, a separate tool was developed to convert SIMD instructions into the corresponding BYTE sequence as accepted by Go assembly. See [asm2plan9s](https://github.com/minio/asm2plan9s) for more information.
94
95## Why and benefits
96
97One of the most performance sensitive parts of the [Minio](https://github.com/minio/minio) object storage server is related to SHA256 hash sums calculations. For instance during multi part uploads each part that is uploaded needs to be verified for data integrity by the server.
98
99Other applications that can benefit from enhanced SHA256 performance are deduplication in storage systems, intrusion detection, version control systems, integrity checking, etc.
100
101## ARM SHA Extensions
102
103The 64-bit ARMv8 core has introduced new instructions for SHA1 and SHA2 acceleration as part of the [Cryptography Extensions](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0501f/CHDFJBCJ.html). Below you can see a small excerpt highlighting one of the rounds as is done for the SHA256 calculation process (for full code see [sha256block_arm64.s](https://github.com/minio/sha256-simd/blob/master/sha256block_arm64.s)).
104
105 ```
106 sha256h q2, q3, v9.4s
107 sha256h2 q3, q4, v9.4s
108 sha256su0 v5.4s, v6.4s
109 rev32 v8.16b, v8.16b
110 add v9.4s, v7.4s, v18.4s
111 mov v4.16b, v2.16b
112 sha256h q2, q3, v10.4s
113 sha256h2 q3, q4, v10.4s
114 sha256su0 v6.4s, v7.4s
115 sha256su1 v5.4s, v7.4s, v8.4s
116 ```
117
118### Detailed benchmarks
119
120Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/).
121
122```
123minio@minio-arm:$ benchcmp golang.txt arm64.txt
124benchmark golang arm64 speedup
125BenchmarkHash8Bytes-4 0.68 MB/s 5.70 MB/s 8.38x
126BenchmarkHash1K-4 5.65 MB/s 326.30 MB/s 57.75x
127BenchmarkHash8K-4 6.00 MB/s 570.63 MB/s 95.11x
128BenchmarkHash1M-4 6.05 MB/s 638.23 MB/s 105.49x
129```
130
131## License
132
133Released under the Apache License v2.0. You can find the complete text in the file LICENSE.
134
135## Contributing
136
137Contributions are welcome, please send PRs for any enhancements.
diff --git a/vendor/github.com/minio/sha256-simd/cpuid_other.go b/vendor/github.com/minio/sha256-simd/cpuid_other.go
new file mode 100644
index 0000000..97af6a1
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/cpuid_other.go
@@ -0,0 +1,50 @@
1// Minio Cloud Storage, (C) 2021 Minio, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16package sha256
17
18import (
19 "bytes"
20 "io/ioutil"
21 "runtime"
22
23 "github.com/klauspost/cpuid/v2"
24)
25
26var (
27 hasIntelSha = runtime.GOARCH == "amd64" && cpuid.CPU.Supports(cpuid.SHA, cpuid.SSSE3, cpuid.SSE4)
28 hasAvx512 = cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512DQ, cpuid.AVX512BW, cpuid.AVX512VL)
29)
30
31func hasArmSha2() bool {
32 if cpuid.CPU.Has(cpuid.SHA2) {
33 return true
34 }
35 if runtime.GOARCH != "arm64" || runtime.GOOS != "linux" {
36 return false
37 }
38
39 // Fall back to hacky cpuinfo parsing...
40 const procCPUInfo = "/proc/cpuinfo"
41
42 // Feature to check for.
43 const sha256Feature = "sha2"
44
45 cpuInfo, err := ioutil.ReadFile(procCPUInfo)
46 if err != nil {
47 return false
48 }
49 return bytes.Contains(cpuInfo, []byte(sha256Feature))
50}
diff --git a/vendor/github.com/minio/sha256-simd/sha256.go b/vendor/github.com/minio/sha256-simd/sha256.go
new file mode 100644
index 0000000..f146bbd
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256.go
@@ -0,0 +1,468 @@
1/*
2 * Minio Cloud Storage, (C) 2016 Minio, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package sha256
18
19import (
20 "crypto/sha256"
21 "encoding/binary"
22 "errors"
23 "hash"
24)
25
26// Size - The size of a SHA256 checksum in bytes.
27const Size = 32
28
29// BlockSize - The blocksize of SHA256 in bytes.
30const BlockSize = 64
31
32const (
33 chunk = BlockSize
34 init0 = 0x6A09E667
35 init1 = 0xBB67AE85
36 init2 = 0x3C6EF372
37 init3 = 0xA54FF53A
38 init4 = 0x510E527F
39 init5 = 0x9B05688C
40 init6 = 0x1F83D9AB
41 init7 = 0x5BE0CD19
42)
43
44// digest represents the partial evaluation of a checksum.
45type digest struct {
46 h [8]uint32
47 x [chunk]byte
48 nx int
49 len uint64
50}
51
52// Reset digest back to default
53func (d *digest) Reset() {
54 d.h[0] = init0
55 d.h[1] = init1
56 d.h[2] = init2
57 d.h[3] = init3
58 d.h[4] = init4
59 d.h[5] = init5
60 d.h[6] = init6
61 d.h[7] = init7
62 d.nx = 0
63 d.len = 0
64}
65
66type blockfuncType int
67
68const (
69 blockfuncStdlib blockfuncType = iota
70 blockfuncIntelSha
71 blockfuncArmSha2
72 blockfuncForceGeneric = -1
73)
74
75var blockfunc blockfuncType
76
77func init() {
78 switch {
79 case hasIntelSha:
80 blockfunc = blockfuncIntelSha
81 case hasArmSha2():
82 blockfunc = blockfuncArmSha2
83 }
84}
85
86// New returns a new hash.Hash computing the SHA256 checksum.
87func New() hash.Hash {
88 if blockfunc == blockfuncStdlib {
89 // Fallback to the standard golang implementation
90 // if no features were found.
91 return sha256.New()
92 }
93
94 d := new(digest)
95 d.Reset()
96 return d
97}
98
99// Sum256 - single caller sha256 helper
100func Sum256(data []byte) (result [Size]byte) {
101 var d digest
102 d.Reset()
103 d.Write(data)
104 result = d.checkSum()
105 return
106}
107
108// Return size of checksum
109func (d *digest) Size() int { return Size }
110
111// Return blocksize of checksum
112func (d *digest) BlockSize() int { return BlockSize }
113
114// Write to digest
115func (d *digest) Write(p []byte) (nn int, err error) {
116 nn = len(p)
117 d.len += uint64(nn)
118 if d.nx > 0 {
119 n := copy(d.x[d.nx:], p)
120 d.nx += n
121 if d.nx == chunk {
122 block(d, d.x[:])
123 d.nx = 0
124 }
125 p = p[n:]
126 }
127 if len(p) >= chunk {
128 n := len(p) &^ (chunk - 1)
129 block(d, p[:n])
130 p = p[n:]
131 }
132 if len(p) > 0 {
133 d.nx = copy(d.x[:], p)
134 }
135 return
136}
137
138// Return sha256 sum in bytes
139func (d *digest) Sum(in []byte) []byte {
140 // Make a copy of d0 so that caller can keep writing and summing.
141 d0 := *d
142 hash := d0.checkSum()
143 return append(in, hash[:]...)
144}
145
146// Intermediate checksum function
147func (d *digest) checkSum() (digest [Size]byte) {
148 n := d.nx
149
150 var k [64]byte
151 copy(k[:], d.x[:n])
152
153 k[n] = 0x80
154
155 if n >= 56 {
156 block(d, k[:])
157
158 // clear block buffer - go compiles this to optimal 1x xorps + 4x movups
159 // unfortunately expressing this more succinctly results in much worse code
160 k[0] = 0
161 k[1] = 0
162 k[2] = 0
163 k[3] = 0
164 k[4] = 0
165 k[5] = 0
166 k[6] = 0
167 k[7] = 0
168 k[8] = 0
169 k[9] = 0
170 k[10] = 0
171 k[11] = 0
172 k[12] = 0
173 k[13] = 0
174 k[14] = 0
175 k[15] = 0
176 k[16] = 0
177 k[17] = 0
178 k[18] = 0
179 k[19] = 0
180 k[20] = 0
181 k[21] = 0
182 k[22] = 0
183 k[23] = 0
184 k[24] = 0
185 k[25] = 0
186 k[26] = 0
187 k[27] = 0
188 k[28] = 0
189 k[29] = 0
190 k[30] = 0
191 k[31] = 0
192 k[32] = 0
193 k[33] = 0
194 k[34] = 0
195 k[35] = 0
196 k[36] = 0
197 k[37] = 0
198 k[38] = 0
199 k[39] = 0
200 k[40] = 0
201 k[41] = 0
202 k[42] = 0
203 k[43] = 0
204 k[44] = 0
205 k[45] = 0
206 k[46] = 0
207 k[47] = 0
208 k[48] = 0
209 k[49] = 0
210 k[50] = 0
211 k[51] = 0
212 k[52] = 0
213 k[53] = 0
214 k[54] = 0
215 k[55] = 0
216 k[56] = 0
217 k[57] = 0
218 k[58] = 0
219 k[59] = 0
220 k[60] = 0
221 k[61] = 0
222 k[62] = 0
223 k[63] = 0
224 }
225 binary.BigEndian.PutUint64(k[56:64], uint64(d.len)<<3)
226 block(d, k[:])
227
228 {
229 const i = 0
230 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
231 }
232 {
233 const i = 1
234 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
235 }
236 {
237 const i = 2
238 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
239 }
240 {
241 const i = 3
242 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
243 }
244 {
245 const i = 4
246 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
247 }
248 {
249 const i = 5
250 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
251 }
252 {
253 const i = 6
254 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
255 }
256 {
257 const i = 7
258 binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
259 }
260
261 return
262}
263
264func block(dig *digest, p []byte) {
265 if blockfunc == blockfuncIntelSha {
266 blockIntelShaGo(dig, p)
267 } else if blockfunc == blockfuncArmSha2 {
268 blockArmSha2Go(dig, p)
269 } else {
270 blockGeneric(dig, p)
271 }
272}
273
274func blockGeneric(dig *digest, p []byte) {
275 var w [64]uint32
276 h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]
277 for len(p) >= chunk {
278 // Can interlace the computation of w with the
279 // rounds below if needed for speed.
280 for i := 0; i < 16; i++ {
281 j := i * 4
282 w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3])
283 }
284 for i := 16; i < 64; i++ {
285 v1 := w[i-2]
286 t1 := (v1>>17 | v1<<(32-17)) ^ (v1>>19 | v1<<(32-19)) ^ (v1 >> 10)
287 v2 := w[i-15]
288 t2 := (v2>>7 | v2<<(32-7)) ^ (v2>>18 | v2<<(32-18)) ^ (v2 >> 3)
289 w[i] = t1 + w[i-7] + t2 + w[i-16]
290 }
291
292 a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7
293
294 for i := 0; i < 64; i++ {
295 t1 := h + ((e>>6 | e<<(32-6)) ^ (e>>11 | e<<(32-11)) ^ (e>>25 | e<<(32-25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i]
296
297 t2 := ((a>>2 | a<<(32-2)) ^ (a>>13 | a<<(32-13)) ^ (a>>22 | a<<(32-22))) + ((a & b) ^ (a & c) ^ (b & c))
298
299 h = g
300 g = f
301 f = e
302 e = d + t1
303 d = c
304 c = b
305 b = a
306 a = t1 + t2
307 }
308
309 h0 += a
310 h1 += b
311 h2 += c
312 h3 += d
313 h4 += e
314 h5 += f
315 h6 += g
316 h7 += h
317
318 p = p[chunk:]
319 }
320
321 dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
322}
323
324var _K = []uint32{
325 0x428a2f98,
326 0x71374491,
327 0xb5c0fbcf,
328 0xe9b5dba5,
329 0x3956c25b,
330 0x59f111f1,
331 0x923f82a4,
332 0xab1c5ed5,
333 0xd807aa98,
334 0x12835b01,
335 0x243185be,
336 0x550c7dc3,
337 0x72be5d74,
338 0x80deb1fe,
339 0x9bdc06a7,
340 0xc19bf174,
341 0xe49b69c1,
342 0xefbe4786,
343 0x0fc19dc6,
344 0x240ca1cc,
345 0x2de92c6f,
346 0x4a7484aa,
347 0x5cb0a9dc,
348 0x76f988da,
349 0x983e5152,
350 0xa831c66d,
351 0xb00327c8,
352 0xbf597fc7,
353 0xc6e00bf3,
354 0xd5a79147,
355 0x06ca6351,
356 0x14292967,
357 0x27b70a85,
358 0x2e1b2138,
359 0x4d2c6dfc,
360 0x53380d13,
361 0x650a7354,
362 0x766a0abb,
363 0x81c2c92e,
364 0x92722c85,
365 0xa2bfe8a1,
366 0xa81a664b,
367 0xc24b8b70,
368 0xc76c51a3,
369 0xd192e819,
370 0xd6990624,
371 0xf40e3585,
372 0x106aa070,
373 0x19a4c116,
374 0x1e376c08,
375 0x2748774c,
376 0x34b0bcb5,
377 0x391c0cb3,
378 0x4ed8aa4a,
379 0x5b9cca4f,
380 0x682e6ff3,
381 0x748f82ee,
382 0x78a5636f,
383 0x84c87814,
384 0x8cc70208,
385 0x90befffa,
386 0xa4506ceb,
387 0xbef9a3f7,
388 0xc67178f2,
389}
390
391const (
392 magic256 = "sha\x03"
393 marshaledSize = len(magic256) + 8*4 + chunk + 8
394)
395
396func (d *digest) MarshalBinary() ([]byte, error) {
397 b := make([]byte, 0, marshaledSize)
398 b = append(b, magic256...)
399 b = appendUint32(b, d.h[0])
400 b = appendUint32(b, d.h[1])
401 b = appendUint32(b, d.h[2])
402 b = appendUint32(b, d.h[3])
403 b = appendUint32(b, d.h[4])
404 b = appendUint32(b, d.h[5])
405 b = appendUint32(b, d.h[6])
406 b = appendUint32(b, d.h[7])
407 b = append(b, d.x[:d.nx]...)
408 b = b[:len(b)+len(d.x)-d.nx] // already zero
409 b = appendUint64(b, d.len)
410 return b, nil
411}
412
413func (d *digest) UnmarshalBinary(b []byte) error {
414 if len(b) < len(magic256) || string(b[:len(magic256)]) != magic256 {
415 return errors.New("crypto/sha256: invalid hash state identifier")
416 }
417 if len(b) != marshaledSize {
418 return errors.New("crypto/sha256: invalid hash state size")
419 }
420 b = b[len(magic256):]
421 b, d.h[0] = consumeUint32(b)
422 b, d.h[1] = consumeUint32(b)
423 b, d.h[2] = consumeUint32(b)
424 b, d.h[3] = consumeUint32(b)
425 b, d.h[4] = consumeUint32(b)
426 b, d.h[5] = consumeUint32(b)
427 b, d.h[6] = consumeUint32(b)
428 b, d.h[7] = consumeUint32(b)
429 b = b[copy(d.x[:], b):]
430 b, d.len = consumeUint64(b)
431 d.nx = int(d.len % chunk)
432 return nil
433}
434
435func appendUint32(b []byte, v uint32) []byte {
436 return append(b,
437 byte(v>>24),
438 byte(v>>16),
439 byte(v>>8),
440 byte(v),
441 )
442}
443
444func appendUint64(b []byte, v uint64) []byte {
445 return append(b,
446 byte(v>>56),
447 byte(v>>48),
448 byte(v>>40),
449 byte(v>>32),
450 byte(v>>24),
451 byte(v>>16),
452 byte(v>>8),
453 byte(v),
454 )
455}
456
457func consumeUint64(b []byte) ([]byte, uint64) {
458 _ = b[7]
459 x := uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
460 uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
461 return b[8:], x
462}
463
464func consumeUint32(b []byte) ([]byte, uint32) {
465 _ = b[3]
466 x := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
467 return b[4:], x
468}
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm
new file mode 100644
index 0000000..c959b1a
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm
@@ -0,0 +1,686 @@
1
2// 16x Parallel implementation of SHA256 for AVX512
3
4//
5// Minio Cloud Storage, (C) 2017 Minio, Inc.
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10//
11// http://www.apache.org/licenses/LICENSE-2.0
12//
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18
19//
20// This code is based on the Intel Multi-Buffer Crypto for IPSec library
21// and more specifically the following implementation:
22// https://github.com/intel/intel-ipsec-mb/blob/master/avx512/sha256_x16_avx512.asm
23//
24// For Golang it has been converted into Plan 9 assembly with the help of
25// github.com/minio/asm2plan9s to assemble the AVX512 instructions
26//
27
28// Copyright (c) 2017, Intel Corporation
29//
30// Redistribution and use in source and binary forms, with or without
31// modification, are permitted provided that the following conditions are met:
32//
33// * Redistributions of source code must retain the above copyright notice,
34// this list of conditions and the following disclaimer.
35// * Redistributions in binary form must reproduce the above copyright
36// notice, this list of conditions and the following disclaimer in the
37// documentation and/or other materials provided with the distribution.
38// * Neither the name of Intel Corporation nor the names of its contributors
39// may be used to endorse or promote products derived from this software
40// without specific prior written permission.
41//
42// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
45// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
46// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
48// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
49// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
51// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52
53#define SHA256_DIGEST_ROW_SIZE 64
54
55// arg1
56#define STATE rdi
57#define STATE_P9 DI
58// arg2
59#define INP_SIZE rsi
60#define INP_SIZE_P9 SI
61
62#define IDX rcx
63#define TBL rdx
64#define TBL_P9 DX
65
66#define INPUT rax
67#define INPUT_P9 AX
68
69#define inp0 r9
70#define SCRATCH_P9 R12
71#define SCRATCH r12
72#define maskp r13
73#define MASKP_P9 R13
74#define mask r14
75#define MASK_P9 R14
76
77#define A zmm0
78#define B zmm1
79#define C zmm2
80#define D zmm3
81#define E zmm4
82#define F zmm5
83#define G zmm6
84#define H zmm7
85#define T1 zmm8
86#define TMP0 zmm9
87#define TMP1 zmm10
88#define TMP2 zmm11
89#define TMP3 zmm12
90#define TMP4 zmm13
91#define TMP5 zmm14
92#define TMP6 zmm15
93
94#define W0 zmm16
95#define W1 zmm17
96#define W2 zmm18
97#define W3 zmm19
98#define W4 zmm20
99#define W5 zmm21
100#define W6 zmm22
101#define W7 zmm23
102#define W8 zmm24
103#define W9 zmm25
104#define W10 zmm26
105#define W11 zmm27
106#define W12 zmm28
107#define W13 zmm29
108#define W14 zmm30
109#define W15 zmm31
110
111
112#define TRANSPOSE16(_r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7, _r8, _r9, _r10, _r11, _r12, _r13, _r14, _r15, _t0, _t1) \
113 \
114 \ // input r0 = {a15 a14 a13 a12 a11 a10 a9 a8 a7 a6 a5 a4 a3 a2 a1 a0}
115 \ // r1 = {b15 b14 b13 b12 b11 b10 b9 b8 b7 b6 b5 b4 b3 b2 b1 b0}
116 \ // r2 = {c15 c14 c13 c12 c11 c10 c9 c8 c7 c6 c5 c4 c3 c2 c1 c0}
117 \ // r3 = {d15 d14 d13 d12 d11 d10 d9 d8 d7 d6 d5 d4 d3 d2 d1 d0}
118 \ // r4 = {e15 e14 e13 e12 e11 e10 e9 e8 e7 e6 e5 e4 e3 e2 e1 e0}
119 \ // r5 = {f15 f14 f13 f12 f11 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1 f0}
120 \ // r6 = {g15 g14 g13 g12 g11 g10 g9 g8 g7 g6 g5 g4 g3 g2 g1 g0}
121 \ // r7 = {h15 h14 h13 h12 h11 h10 h9 h8 h7 h6 h5 h4 h3 h2 h1 h0}
122 \ // r8 = {i15 i14 i13 i12 i11 i10 i9 i8 i7 i6 i5 i4 i3 i2 i1 i0}
123 \ // r9 = {j15 j14 j13 j12 j11 j10 j9 j8 j7 j6 j5 j4 j3 j2 j1 j0}
124 \ // r10 = {k15 k14 k13 k12 k11 k10 k9 k8 k7 k6 k5 k4 k3 k2 k1 k0}
125 \ // r11 = {l15 l14 l13 l12 l11 l10 l9 l8 l7 l6 l5 l4 l3 l2 l1 l0}
126 \ // r12 = {m15 m14 m13 m12 m11 m10 m9 m8 m7 m6 m5 m4 m3 m2 m1 m0}
127 \ // r13 = {n15 n14 n13 n12 n11 n10 n9 n8 n7 n6 n5 n4 n3 n2 n1 n0}
128 \ // r14 = {o15 o14 o13 o12 o11 o10 o9 o8 o7 o6 o5 o4 o3 o2 o1 o0}
129 \ // r15 = {p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0}
130 \
131 \ // output r0 = { p0 o0 n0 m0 l0 k0 j0 i0 h0 g0 f0 e0 d0 c0 b0 a0}
132 \ // r1 = { p1 o1 n1 m1 l1 k1 j1 i1 h1 g1 f1 e1 d1 c1 b1 a1}
133 \ // r2 = { p2 o2 n2 m2 l2 k2 j2 i2 h2 g2 f2 e2 d2 c2 b2 a2}
134 \ // r3 = { p3 o3 n3 m3 l3 k3 j3 i3 h3 g3 f3 e3 d3 c3 b3 a3}
135 \ // r4 = { p4 o4 n4 m4 l4 k4 j4 i4 h4 g4 f4 e4 d4 c4 b4 a4}
136 \ // r5 = { p5 o5 n5 m5 l5 k5 j5 i5 h5 g5 f5 e5 d5 c5 b5 a5}
137 \ // r6 = { p6 o6 n6 m6 l6 k6 j6 i6 h6 g6 f6 e6 d6 c6 b6 a6}
138 \ // r7 = { p7 o7 n7 m7 l7 k7 j7 i7 h7 g7 f7 e7 d7 c7 b7 a7}
139 \ // r8 = { p8 o8 n8 m8 l8 k8 j8 i8 h8 g8 f8 e8 d8 c8 b8 a8}
140 \ // r9 = { p9 o9 n9 m9 l9 k9 j9 i9 h9 g9 f9 e9 d9 c9 b9 a9}
141 \ // r10 = {p10 o10 n10 m10 l10 k10 j10 i10 h10 g10 f10 e10 d10 c10 b10 a10}
142 \ // r11 = {p11 o11 n11 m11 l11 k11 j11 i11 h11 g11 f11 e11 d11 c11 b11 a11}
143 \ // r12 = {p12 o12 n12 m12 l12 k12 j12 i12 h12 g12 f12 e12 d12 c12 b12 a12}
144 \ // r13 = {p13 o13 n13 m13 l13 k13 j13 i13 h13 g13 f13 e13 d13 c13 b13 a13}
145 \ // r14 = {p14 o14 n14 m14 l14 k14 j14 i14 h14 g14 f14 e14 d14 c14 b14 a14}
146 \ // r15 = {p15 o15 n15 m15 l15 k15 j15 i15 h15 g15 f15 e15 d15 c15 b15 a15}
147 \
148 \ // process top half
149 vshufps _t0, _r0, _r1, 0x44 \ // t0 = {b13 b12 a13 a12 b9 b8 a9 a8 b5 b4 a5 a4 b1 b0 a1 a0}
150 vshufps _r0, _r0, _r1, 0xEE \ // r0 = {b15 b14 a15 a14 b11 b10 a11 a10 b7 b6 a7 a6 b3 b2 a3 a2}
151 vshufps _t1, _r2, _r3, 0x44 \ // t1 = {d13 d12 c13 c12 d9 d8 c9 c8 d5 d4 c5 c4 d1 d0 c1 c0}
152 vshufps _r2, _r2, _r3, 0xEE \ // r2 = {d15 d14 c15 c14 d11 d10 c11 c10 d7 d6 c7 c6 d3 d2 c3 c2}
153 \
154 vshufps _r3, _t0, _t1, 0xDD \ // r3 = {d13 c13 b13 a13 d9 c9 b9 a9 d5 c5 b5 a5 d1 c1 b1 a1}
155 vshufps _r1, _r0, _r2, 0x88 \ // r1 = {d14 c14 b14 a14 d10 c10 b10 a10 d6 c6 b6 a6 d2 c2 b2 a2}
156 vshufps _r0, _r0, _r2, 0xDD \ // r0 = {d15 c15 b15 a15 d11 c11 b11 a11 d7 c7 b7 a7 d3 c3 b3 a3}
157 vshufps _t0, _t0, _t1, 0x88 \ // t0 = {d12 c12 b12 a12 d8 c8 b8 a8 d4 c4 b4 a4 d0 c0 b0 a0}
158 \
159 \ // use r2 in place of t0
160 vshufps _r2, _r4, _r5, 0x44 \ // r2 = {f13 f12 e13 e12 f9 f8 e9 e8 f5 f4 e5 e4 f1 f0 e1 e0}
161 vshufps _r4, _r4, _r5, 0xEE \ // r4 = {f15 f14 e15 e14 f11 f10 e11 e10 f7 f6 e7 e6 f3 f2 e3 e2}
162 vshufps _t1, _r6, _r7, 0x44 \ // t1 = {h13 h12 g13 g12 h9 h8 g9 g8 h5 h4 g5 g4 h1 h0 g1 g0}
163 vshufps _r6, _r6, _r7, 0xEE \ // r6 = {h15 h14 g15 g14 h11 h10 g11 g10 h7 h6 g7 g6 h3 h2 g3 g2}
164 \
165 vshufps _r7, _r2, _t1, 0xDD \ // r7 = {h13 g13 f13 e13 h9 g9 f9 e9 h5 g5 f5 e5 h1 g1 f1 e1}
166 vshufps _r5, _r4, _r6, 0x88 \ // r5 = {h14 g14 f14 e14 h10 g10 f10 e10 h6 g6 f6 e6 h2 g2 f2 e2}
167 vshufps _r4, _r4, _r6, 0xDD \ // r4 = {h15 g15 f15 e15 h11 g11 f11 e11 h7 g7 f7 e7 h3 g3 f3 e3}
168 vshufps _r2, _r2, _t1, 0x88 \ // r2 = {h12 g12 f12 e12 h8 g8 f8 e8 h4 g4 f4 e4 h0 g0 f0 e0}
169 \
170 \ // use r6 in place of t0
171 vshufps _r6, _r8, _r9, 0x44 \ // r6 = {j13 j12 i13 i12 j9 j8 i9 i8 j5 j4 i5 i4 j1 j0 i1 i0}
172 vshufps _r8, _r8, _r9, 0xEE \ // r8 = {j15 j14 i15 i14 j11 j10 i11 i10 j7 j6 i7 i6 j3 j2 i3 i2}
173 vshufps _t1, _r10, _r11, 0x44 \ // t1 = {l13 l12 k13 k12 l9 l8 k9 k8 l5 l4 k5 k4 l1 l0 k1 k0}
174 vshufps _r10, _r10, _r11, 0xEE \ // r10 = {l15 l14 k15 k14 l11 l10 k11 k10 l7 l6 k7 k6 l3 l2 k3 k2}
175 \
176 vshufps _r11, _r6, _t1, 0xDD \ // r11 = {l13 k13 j13 113 l9 k9 j9 i9 l5 k5 j5 i5 l1 k1 j1 i1}
177 vshufps _r9, _r8, _r10, 0x88 \ // r9 = {l14 k14 j14 114 l10 k10 j10 i10 l6 k6 j6 i6 l2 k2 j2 i2}
178 vshufps _r8, _r8, _r10, 0xDD \ // r8 = {l15 k15 j15 115 l11 k11 j11 i11 l7 k7 j7 i7 l3 k3 j3 i3}
179 vshufps _r6, _r6, _t1, 0x88 \ // r6 = {l12 k12 j12 112 l8 k8 j8 i8 l4 k4 j4 i4 l0 k0 j0 i0}
180 \
181 \ // use r10 in place of t0
182 vshufps _r10, _r12, _r13, 0x44 \ // r10 = {n13 n12 m13 m12 n9 n8 m9 m8 n5 n4 m5 m4 n1 n0 a1 m0}
183 vshufps _r12, _r12, _r13, 0xEE \ // r12 = {n15 n14 m15 m14 n11 n10 m11 m10 n7 n6 m7 m6 n3 n2 a3 m2}
184 vshufps _t1, _r14, _r15, 0x44 \ // t1 = {p13 p12 013 012 p9 p8 09 08 p5 p4 05 04 p1 p0 01 00}
185 vshufps _r14, _r14, _r15, 0xEE \ // r14 = {p15 p14 015 014 p11 p10 011 010 p7 p6 07 06 p3 p2 03 02}
186 \
187 vshufps _r15, _r10, _t1, 0xDD \ // r15 = {p13 013 n13 m13 p9 09 n9 m9 p5 05 n5 m5 p1 01 n1 m1}
188 vshufps _r13, _r12, _r14, 0x88 \ // r13 = {p14 014 n14 m14 p10 010 n10 m10 p6 06 n6 m6 p2 02 n2 m2}
189 vshufps _r12, _r12, _r14, 0xDD \ // r12 = {p15 015 n15 m15 p11 011 n11 m11 p7 07 n7 m7 p3 03 n3 m3}
190 vshufps _r10, _r10, _t1, 0x88 \ // r10 = {p12 012 n12 m12 p8 08 n8 m8 p4 04 n4 m4 p0 00 n0 m0}
191 \
192 \ // At this point, the registers that contain interesting data are:
193 \ // t0, r3, r1, r0, r2, r7, r5, r4, r6, r11, r9, r8, r10, r15, r13, r12
194 \ // Can use t1 and r14 as scratch registers
195 LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX \
196 LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8 \
197 \
198 vmovdqu32 _r14, [rbx] \
199 vpermi2q _r14, _t0, _r2 \ // r14 = {h8 g8 f8 e8 d8 c8 b8 a8 h0 g0 f0 e0 d0 c0 b0 a0}
200 vmovdqu32 _t1, [r8] \
201 vpermi2q _t1, _t0, _r2 \ // t1 = {h12 g12 f12 e12 d12 c12 b12 a12 h4 g4 f4 e4 d4 c4 b4 a4}
202 \
203 vmovdqu32 _r2, [rbx] \
204 vpermi2q _r2, _r3, _r7 \ // r2 = {h9 g9 f9 e9 d9 c9 b9 a9 h1 g1 f1 e1 d1 c1 b1 a1}
205 vmovdqu32 _t0, [r8] \
206 vpermi2q _t0, _r3, _r7 \ // t0 = {h13 g13 f13 e13 d13 c13 b13 a13 h5 g5 f5 e5 d5 c5 b5 a5}
207 \
208 vmovdqu32 _r3, [rbx] \
209 vpermi2q _r3, _r1, _r5 \ // r3 = {h10 g10 f10 e10 d10 c10 b10 a10 h2 g2 f2 e2 d2 c2 b2 a2}
210 vmovdqu32 _r7, [r8] \
211 vpermi2q _r7, _r1, _r5 \ // r7 = {h14 g14 f14 e14 d14 c14 b14 a14 h6 g6 f6 e6 d6 c6 b6 a6}
212 \
213 vmovdqu32 _r1, [rbx] \
214 vpermi2q _r1, _r0, _r4 \ // r1 = {h11 g11 f11 e11 d11 c11 b11 a11 h3 g3 f3 e3 d3 c3 b3 a3}
215 vmovdqu32 _r5, [r8] \
216 vpermi2q _r5, _r0, _r4 \ // r5 = {h15 g15 f15 e15 d15 c15 b15 a15 h7 g7 f7 e7 d7 c7 b7 a7}
217 \
218 vmovdqu32 _r0, [rbx] \
219 vpermi2q _r0, _r6, _r10 \ // r0 = {p8 o8 n8 m8 l8 k8 j8 i8 p0 o0 n0 m0 l0 k0 j0 i0}
220 vmovdqu32 _r4, [r8] \
221 vpermi2q _r4, _r6, _r10 \ // r4 = {p12 o12 n12 m12 l12 k12 j12 i12 p4 o4 n4 m4 l4 k4 j4 i4}
222 \
223 vmovdqu32 _r6, [rbx] \
224 vpermi2q _r6, _r11, _r15 \ // r6 = {p9 o9 n9 m9 l9 k9 j9 i9 p1 o1 n1 m1 l1 k1 j1 i1}
225 vmovdqu32 _r10, [r8] \
226 vpermi2q _r10, _r11, _r15 \ // r10 = {p13 o13 n13 m13 l13 k13 j13 i13 p5 o5 n5 m5 l5 k5 j5 i5}
227 \
228 vmovdqu32 _r11, [rbx] \
229 vpermi2q _r11, _r9, _r13 \ // r11 = {p10 o10 n10 m10 l10 k10 j10 i10 p2 o2 n2 m2 l2 k2 j2 i2}
230 vmovdqu32 _r15, [r8] \
231 vpermi2q _r15, _r9, _r13 \ // r15 = {p14 o14 n14 m14 l14 k14 j14 i14 p6 o6 n6 m6 l6 k6 j6 i6}
232 \
233 vmovdqu32 _r9, [rbx] \
234 vpermi2q _r9, _r8, _r12 \ // r9 = {p11 o11 n11 m11 l11 k11 j11 i11 p3 o3 n3 m3 l3 k3 j3 i3}
235 vmovdqu32 _r13, [r8] \
236 vpermi2q _r13, _r8, _r12 \ // r13 = {p15 o15 n15 m15 l15 k15 j15 i15 p7 o7 n7 m7 l7 k7 j7 i7}
237 \
238 \ // At this point r8 and r12 can be used as scratch registers
239 vshuff64x2 _r8, _r14, _r0, 0xEE \ // r8 = {p8 o8 n8 m8 l8 k8 j8 i8 h8 g8 f8 e8 d8 c8 b8 a8}
240 vshuff64x2 _r0, _r14, _r0, 0x44 \ // r0 = {p0 o0 n0 m0 l0 k0 j0 i0 h0 g0 f0 e0 d0 c0 b0 a0}
241 \
242 vshuff64x2 _r12, _t1, _r4, 0xEE \ // r12 = {p12 o12 n12 m12 l12 k12 j12 i12 h12 g12 f12 e12 d12 c12 b12 a12}
243 vshuff64x2 _r4, _t1, _r4, 0x44 \ // r4 = {p4 o4 n4 m4 l4 k4 j4 i4 h4 g4 f4 e4 d4 c4 b4 a4}
244 \
245 vshuff64x2 _r14, _r7, _r15, 0xEE \ // r14 = {p14 o14 n14 m14 l14 k14 j14 i14 h14 g14 f14 e14 d14 c14 b14 a14}
246 vshuff64x2 _t1, _r7, _r15, 0x44 \ // t1 = {p6 o6 n6 m6 l6 k6 j6 i6 h6 g6 f6 e6 d6 c6 b6 a6}
247 \
248 vshuff64x2 _r15, _r5, _r13, 0xEE \ // r15 = {p15 o15 n15 m15 l15 k15 j15 i15 h15 g15 f15 e15 d15 c15 b15 a15}
249 vshuff64x2 _r7, _r5, _r13, 0x44 \ // r7 = {p7 o7 n7 m7 l7 k7 j7 i7 h7 g7 f7 e7 d7 c7 b7 a7}
250 \
251 vshuff64x2 _r13, _t0, _r10, 0xEE \ // r13 = {p13 o13 n13 m13 l13 k13 j13 i13 h13 g13 f13 e13 d13 c13 b13 a13}
252 vshuff64x2 _r5, _t0, _r10, 0x44 \ // r5 = {p5 o5 n5 m5 l5 k5 j5 i5 h5 g5 f5 e5 d5 c5 b5 a5}
253 \
254 vshuff64x2 _r10, _r3, _r11, 0xEE \ // r10 = {p10 o10 n10 m10 l10 k10 j10 i10 h10 g10 f10 e10 d10 c10 b10 a10}
255 vshuff64x2 _t0, _r3, _r11, 0x44 \ // t0 = {p2 o2 n2 m2 l2 k2 j2 i2 h2 g2 f2 e2 d2 c2 b2 a2}
256 \
257 vshuff64x2 _r11, _r1, _r9, 0xEE \ // r11 = {p11 o11 n11 m11 l11 k11 j11 i11 h11 g11 f11 e11 d11 c11 b11 a11}
258 vshuff64x2 _r3, _r1, _r9, 0x44 \ // r3 = {p3 o3 n3 m3 l3 k3 j3 i3 h3 g3 f3 e3 d3 c3 b3 a3}
259 \
260 vshuff64x2 _r9, _r2, _r6, 0xEE \ // r9 = {p9 o9 n9 m9 l9 k9 j9 i9 h9 g9 f9 e9 d9 c9 b9 a9}
261 vshuff64x2 _r1, _r2, _r6, 0x44 \ // r1 = {p1 o1 n1 m1 l1 k1 j1 i1 h1 g1 f1 e1 d1 c1 b1 a1}
262 \
263 vmovdqu32 _r2, _t0 \ // r2 = {p2 o2 n2 m2 l2 k2 j2 i2 h2 g2 f2 e2 d2 c2 b2 a2}
264 vmovdqu32 _r6, _t1 \ // r6 = {p6 o6 n6 m6 l6 k6 j6 i6 h6 g6 f6 e6 d6 c6 b6 a6}
265
266
267// CH(A, B, C) = (A&B) ^ (~A&C)
268// MAJ(E, F, G) = (E&F) ^ (E&G) ^ (F&G)
269// SIGMA0 = ROR_2 ^ ROR_13 ^ ROR_22
270// SIGMA1 = ROR_6 ^ ROR_11 ^ ROR_25
271// sigma0 = ROR_7 ^ ROR_18 ^ SHR_3
272// sigma1 = ROR_17 ^ ROR_19 ^ SHR_10
273
274// Main processing loop per round
275#define PROCESS_LOOP(_WT, _ROUND, _A, _B, _C, _D, _E, _F, _G, _H) \
276 \ // T1 = H + SIGMA1(E) + CH(E, F, G) + Kt + Wt
277 \ // T2 = SIGMA0(A) + MAJ(A, B, C)
278 \ // H=G, G=F, F=E, E=D+T1, D=C, C=B, B=A, A=T1+T2
279 \
280 \ // H becomes T2, then add T1 for A
281 \ // D becomes D + T1 for E
282 \
283 vpaddd T1, _H, TMP3 \ // T1 = H + Kt
284 vmovdqu32 TMP0, _E \
285 vprord TMP1, _E, 6 \ // ROR_6(E)
286 vprord TMP2, _E, 11 \ // ROR_11(E)
287 vprord TMP3, _E, 25 \ // ROR_25(E)
288 vpternlogd TMP0, _F, _G, 0xCA \ // TMP0 = CH(E,F,G)
289 vpaddd T1, T1, _WT \ // T1 = T1 + Wt
290 vpternlogd TMP1, TMP2, TMP3, 0x96 \ // TMP1 = SIGMA1(E)
291 vpaddd T1, T1, TMP0 \ // T1 = T1 + CH(E,F,G)
292 vpaddd T1, T1, TMP1 \ // T1 = T1 + SIGMA1(E)
293 vpaddd _D, _D, T1 \ // D = D + T1
294 \
295 vprord _H, _A, 2 \ // ROR_2(A)
296 vprord TMP2, _A, 13 \ // ROR_13(A)
297 vprord TMP3, _A, 22 \ // ROR_22(A)
298 vmovdqu32 TMP0, _A \
299 vpternlogd TMP0, _B, _C, 0xE8 \ // TMP0 = MAJ(A,B,C)
300 vpternlogd _H, TMP2, TMP3, 0x96 \ // H(T2) = SIGMA0(A)
301 vpaddd _H, _H, TMP0 \ // H(T2) = SIGMA0(A) + MAJ(A,B,C)
302 vpaddd _H, _H, T1 \ // H(A) = H(T2) + T1
303 \
304 vmovdqu32 TMP3, [TBL + ((_ROUND+1)*64)] \ // Next Kt
305
306
307#define MSG_SCHED_ROUND_16_63(_WT, _WTp1, _WTp9, _WTp14) \
308 vprord TMP4, _WTp14, 17 \ // ROR_17(Wt-2)
309 vprord TMP5, _WTp14, 19 \ // ROR_19(Wt-2)
310 vpsrld TMP6, _WTp14, 10 \ // SHR_10(Wt-2)
311 vpternlogd TMP4, TMP5, TMP6, 0x96 \ // TMP4 = sigma1(Wt-2)
312 \
313 vpaddd _WT, _WT, TMP4 \ // Wt = Wt-16 + sigma1(Wt-2)
314 vpaddd _WT, _WT, _WTp9 \ // Wt = Wt-16 + sigma1(Wt-2) + Wt-7
315 \
316 vprord TMP4, _WTp1, 7 \ // ROR_7(Wt-15)
317 vprord TMP5, _WTp1, 18 \ // ROR_18(Wt-15)
318 vpsrld TMP6, _WTp1, 3 \ // SHR_3(Wt-15)
319 vpternlogd TMP4, TMP5, TMP6, 0x96 \ // TMP4 = sigma0(Wt-15)
320 \
321 vpaddd _WT, _WT, TMP4 \ // Wt = Wt-16 + sigma1(Wt-2) +
322 \ // Wt-7 + sigma0(Wt-15) +
323
324
325// Note this is reading in a block of data for one lane
326// When all 16 are read, the data must be transposed to build msg schedule
327#define MSG_SCHED_ROUND_00_15(_WT, OFFSET, LABEL) \
328 TESTQ $(1<<OFFSET), MASK_P9 \
329 JE LABEL \
330 MOVQ OFFSET*24(INPUT_P9), R9 \
331 vmovups _WT, [inp0+IDX] \
332LABEL: \
333
334#define MASKED_LOAD(_WT, OFFSET, LABEL) \
335 TESTQ $(1<<OFFSET), MASK_P9 \
336 JE LABEL \
337 MOVQ OFFSET*24(INPUT_P9), R9 \
338 vmovups _WT,[inp0+IDX] \
339LABEL: \
340
341TEXT ·sha256_x16_avx512(SB), 7, $0
342 MOVQ digests+0(FP), STATE_P9 //
343 MOVQ scratch+8(FP), SCRATCH_P9
344 MOVQ mask_len+32(FP), INP_SIZE_P9 // number of blocks to process
345 MOVQ mask+24(FP), MASKP_P9
346 MOVQ (MASKP_P9), MASK_P9
347 kmovq k1, mask
348 LEAQ inputs+48(FP), INPUT_P9
349
350 // Initialize digests
351 vmovdqu32 A, [STATE + 0*SHA256_DIGEST_ROW_SIZE]
352 vmovdqu32 B, [STATE + 1*SHA256_DIGEST_ROW_SIZE]
353 vmovdqu32 C, [STATE + 2*SHA256_DIGEST_ROW_SIZE]
354 vmovdqu32 D, [STATE + 3*SHA256_DIGEST_ROW_SIZE]
355 vmovdqu32 E, [STATE + 4*SHA256_DIGEST_ROW_SIZE]
356 vmovdqu32 F, [STATE + 5*SHA256_DIGEST_ROW_SIZE]
357 vmovdqu32 G, [STATE + 6*SHA256_DIGEST_ROW_SIZE]
358 vmovdqu32 H, [STATE + 7*SHA256_DIGEST_ROW_SIZE]
359
360 MOVQ table+16(FP), TBL_P9
361
362 xor IDX, IDX
363
364 // Read in first block of input data
365 MASKED_LOAD( W0, 0, skipInput0)
366 MASKED_LOAD( W1, 1, skipInput1)
367 MASKED_LOAD( W2, 2, skipInput2)
368 MASKED_LOAD( W3, 3, skipInput3)
369 MASKED_LOAD( W4, 4, skipInput4)
370 MASKED_LOAD( W5, 5, skipInput5)
371 MASKED_LOAD( W6, 6, skipInput6)
372 MASKED_LOAD( W7, 7, skipInput7)
373 MASKED_LOAD( W8, 8, skipInput8)
374 MASKED_LOAD( W9, 9, skipInput9)
375 MASKED_LOAD(W10, 10, skipInput10)
376 MASKED_LOAD(W11, 11, skipInput11)
377 MASKED_LOAD(W12, 12, skipInput12)
378 MASKED_LOAD(W13, 13, skipInput13)
379 MASKED_LOAD(W14, 14, skipInput14)
380 MASKED_LOAD(W15, 15, skipInput15)
381
382lloop:
383 LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), TBL_P9
384 vmovdqu32 TMP2, [TBL]
385
386 // Get first K from table
387 MOVQ table+16(FP), TBL_P9
388 vmovdqu32 TMP3, [TBL]
389
390 // Save digests for later addition
391 vmovdqu32 [SCRATCH + 64*0], A
392 vmovdqu32 [SCRATCH + 64*1], B
393 vmovdqu32 [SCRATCH + 64*2], C
394 vmovdqu32 [SCRATCH + 64*3], D
395 vmovdqu32 [SCRATCH + 64*4], E
396 vmovdqu32 [SCRATCH + 64*5], F
397 vmovdqu32 [SCRATCH + 64*6], G
398 vmovdqu32 [SCRATCH + 64*7], H
399
400 add IDX, 64
401
402 // Transpose input data
403 TRANSPOSE16(W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, TMP0, TMP1)
404
405 vpshufb W0, W0, TMP2
406 vpshufb W1, W1, TMP2
407 vpshufb W2, W2, TMP2
408 vpshufb W3, W3, TMP2
409 vpshufb W4, W4, TMP2
410 vpshufb W5, W5, TMP2
411 vpshufb W6, W6, TMP2
412 vpshufb W7, W7, TMP2
413 vpshufb W8, W8, TMP2
414 vpshufb W9, W9, TMP2
415 vpshufb W10, W10, TMP2
416 vpshufb W11, W11, TMP2
417 vpshufb W12, W12, TMP2
418 vpshufb W13, W13, TMP2
419 vpshufb W14, W14, TMP2
420 vpshufb W15, W15, TMP2
421
422 // MSG Schedule for W0-W15 is now complete in registers
423 // Process first 48 rounds
424 // Calculate next Wt+16 after processing is complete and Wt is unneeded
425
426 PROCESS_LOOP( W0, 0, A, B, C, D, E, F, G, H)
427 MSG_SCHED_ROUND_16_63( W0, W1, W9, W14)
428 PROCESS_LOOP( W1, 1, H, A, B, C, D, E, F, G)
429 MSG_SCHED_ROUND_16_63( W1, W2, W10, W15)
430 PROCESS_LOOP( W2, 2, G, H, A, B, C, D, E, F)
431 MSG_SCHED_ROUND_16_63( W2, W3, W11, W0)
432 PROCESS_LOOP( W3, 3, F, G, H, A, B, C, D, E)
433 MSG_SCHED_ROUND_16_63( W3, W4, W12, W1)
434 PROCESS_LOOP( W4, 4, E, F, G, H, A, B, C, D)
435 MSG_SCHED_ROUND_16_63( W4, W5, W13, W2)
436 PROCESS_LOOP( W5, 5, D, E, F, G, H, A, B, C)
437 MSG_SCHED_ROUND_16_63( W5, W6, W14, W3)
438 PROCESS_LOOP( W6, 6, C, D, E, F, G, H, A, B)
439 MSG_SCHED_ROUND_16_63( W6, W7, W15, W4)
440 PROCESS_LOOP( W7, 7, B, C, D, E, F, G, H, A)
441 MSG_SCHED_ROUND_16_63( W7, W8, W0, W5)
442 PROCESS_LOOP( W8, 8, A, B, C, D, E, F, G, H)
443 MSG_SCHED_ROUND_16_63( W8, W9, W1, W6)
444 PROCESS_LOOP( W9, 9, H, A, B, C, D, E, F, G)
445 MSG_SCHED_ROUND_16_63( W9, W10, W2, W7)
446 PROCESS_LOOP(W10, 10, G, H, A, B, C, D, E, F)
447 MSG_SCHED_ROUND_16_63(W10, W11, W3, W8)
448 PROCESS_LOOP(W11, 11, F, G, H, A, B, C, D, E)
449 MSG_SCHED_ROUND_16_63(W11, W12, W4, W9)
450 PROCESS_LOOP(W12, 12, E, F, G, H, A, B, C, D)
451 MSG_SCHED_ROUND_16_63(W12, W13, W5, W10)
452 PROCESS_LOOP(W13, 13, D, E, F, G, H, A, B, C)
453 MSG_SCHED_ROUND_16_63(W13, W14, W6, W11)
454 PROCESS_LOOP(W14, 14, C, D, E, F, G, H, A, B)
455 MSG_SCHED_ROUND_16_63(W14, W15, W7, W12)
456 PROCESS_LOOP(W15, 15, B, C, D, E, F, G, H, A)
457 MSG_SCHED_ROUND_16_63(W15, W0, W8, W13)
458 PROCESS_LOOP( W0, 16, A, B, C, D, E, F, G, H)
459 MSG_SCHED_ROUND_16_63( W0, W1, W9, W14)
460 PROCESS_LOOP( W1, 17, H, A, B, C, D, E, F, G)
461 MSG_SCHED_ROUND_16_63( W1, W2, W10, W15)
462 PROCESS_LOOP( W2, 18, G, H, A, B, C, D, E, F)
463 MSG_SCHED_ROUND_16_63( W2, W3, W11, W0)
464 PROCESS_LOOP( W3, 19, F, G, H, A, B, C, D, E)
465 MSG_SCHED_ROUND_16_63( W3, W4, W12, W1)
466 PROCESS_LOOP( W4, 20, E, F, G, H, A, B, C, D)
467 MSG_SCHED_ROUND_16_63( W4, W5, W13, W2)
468 PROCESS_LOOP( W5, 21, D, E, F, G, H, A, B, C)
469 MSG_SCHED_ROUND_16_63( W5, W6, W14, W3)
470 PROCESS_LOOP( W6, 22, C, D, E, F, G, H, A, B)
471 MSG_SCHED_ROUND_16_63( W6, W7, W15, W4)
472 PROCESS_LOOP( W7, 23, B, C, D, E, F, G, H, A)
473 MSG_SCHED_ROUND_16_63( W7, W8, W0, W5)
474 PROCESS_LOOP( W8, 24, A, B, C, D, E, F, G, H)
475 MSG_SCHED_ROUND_16_63( W8, W9, W1, W6)
476 PROCESS_LOOP( W9, 25, H, A, B, C, D, E, F, G)
477 MSG_SCHED_ROUND_16_63( W9, W10, W2, W7)
478 PROCESS_LOOP(W10, 26, G, H, A, B, C, D, E, F)
479 MSG_SCHED_ROUND_16_63(W10, W11, W3, W8)
480 PROCESS_LOOP(W11, 27, F, G, H, A, B, C, D, E)
481 MSG_SCHED_ROUND_16_63(W11, W12, W4, W9)
482 PROCESS_LOOP(W12, 28, E, F, G, H, A, B, C, D)
483 MSG_SCHED_ROUND_16_63(W12, W13, W5, W10)
484 PROCESS_LOOP(W13, 29, D, E, F, G, H, A, B, C)
485 MSG_SCHED_ROUND_16_63(W13, W14, W6, W11)
486 PROCESS_LOOP(W14, 30, C, D, E, F, G, H, A, B)
487 MSG_SCHED_ROUND_16_63(W14, W15, W7, W12)
488 PROCESS_LOOP(W15, 31, B, C, D, E, F, G, H, A)
489 MSG_SCHED_ROUND_16_63(W15, W0, W8, W13)
490 PROCESS_LOOP( W0, 32, A, B, C, D, E, F, G, H)
491 MSG_SCHED_ROUND_16_63( W0, W1, W9, W14)
492 PROCESS_LOOP( W1, 33, H, A, B, C, D, E, F, G)
493 MSG_SCHED_ROUND_16_63( W1, W2, W10, W15)
494 PROCESS_LOOP( W2, 34, G, H, A, B, C, D, E, F)
495 MSG_SCHED_ROUND_16_63( W2, W3, W11, W0)
496 PROCESS_LOOP( W3, 35, F, G, H, A, B, C, D, E)
497 MSG_SCHED_ROUND_16_63( W3, W4, W12, W1)
498 PROCESS_LOOP( W4, 36, E, F, G, H, A, B, C, D)
499 MSG_SCHED_ROUND_16_63( W4, W5, W13, W2)
500 PROCESS_LOOP( W5, 37, D, E, F, G, H, A, B, C)
501 MSG_SCHED_ROUND_16_63( W5, W6, W14, W3)
502 PROCESS_LOOP( W6, 38, C, D, E, F, G, H, A, B)
503 MSG_SCHED_ROUND_16_63( W6, W7, W15, W4)
504 PROCESS_LOOP( W7, 39, B, C, D, E, F, G, H, A)
505 MSG_SCHED_ROUND_16_63( W7, W8, W0, W5)
506 PROCESS_LOOP( W8, 40, A, B, C, D, E, F, G, H)
507 MSG_SCHED_ROUND_16_63( W8, W9, W1, W6)
508 PROCESS_LOOP( W9, 41, H, A, B, C, D, E, F, G)
509 MSG_SCHED_ROUND_16_63( W9, W10, W2, W7)
510 PROCESS_LOOP(W10, 42, G, H, A, B, C, D, E, F)
511 MSG_SCHED_ROUND_16_63(W10, W11, W3, W8)
512 PROCESS_LOOP(W11, 43, F, G, H, A, B, C, D, E)
513 MSG_SCHED_ROUND_16_63(W11, W12, W4, W9)
514 PROCESS_LOOP(W12, 44, E, F, G, H, A, B, C, D)
515 MSG_SCHED_ROUND_16_63(W12, W13, W5, W10)
516 PROCESS_LOOP(W13, 45, D, E, F, G, H, A, B, C)
517 MSG_SCHED_ROUND_16_63(W13, W14, W6, W11)
518 PROCESS_LOOP(W14, 46, C, D, E, F, G, H, A, B)
519 MSG_SCHED_ROUND_16_63(W14, W15, W7, W12)
520 PROCESS_LOOP(W15, 47, B, C, D, E, F, G, H, A)
521 MSG_SCHED_ROUND_16_63(W15, W0, W8, W13)
522
523 // Check if this is the last block
524 sub INP_SIZE, 1
525 JE lastLoop
526
527 // Load next mask for inputs
528 ADDQ $8, MASKP_P9
529 MOVQ (MASKP_P9), MASK_P9
530
531 // Process last 16 rounds
532 // Read in next block msg data for use in first 16 words of msg sched
533
534 PROCESS_LOOP( W0, 48, A, B, C, D, E, F, G, H)
535 MSG_SCHED_ROUND_00_15( W0, 0, skipNext0)
536 PROCESS_LOOP( W1, 49, H, A, B, C, D, E, F, G)
537 MSG_SCHED_ROUND_00_15( W1, 1, skipNext1)
538 PROCESS_LOOP( W2, 50, G, H, A, B, C, D, E, F)
539 MSG_SCHED_ROUND_00_15( W2, 2, skipNext2)
540 PROCESS_LOOP( W3, 51, F, G, H, A, B, C, D, E)
541 MSG_SCHED_ROUND_00_15( W3, 3, skipNext3)
542 PROCESS_LOOP( W4, 52, E, F, G, H, A, B, C, D)
543 MSG_SCHED_ROUND_00_15( W4, 4, skipNext4)
544 PROCESS_LOOP( W5, 53, D, E, F, G, H, A, B, C)
545 MSG_SCHED_ROUND_00_15( W5, 5, skipNext5)
546 PROCESS_LOOP( W6, 54, C, D, E, F, G, H, A, B)
547 MSG_SCHED_ROUND_00_15( W6, 6, skipNext6)
548 PROCESS_LOOP( W7, 55, B, C, D, E, F, G, H, A)
549 MSG_SCHED_ROUND_00_15( W7, 7, skipNext7)
550 PROCESS_LOOP( W8, 56, A, B, C, D, E, F, G, H)
551 MSG_SCHED_ROUND_00_15( W8, 8, skipNext8)
552 PROCESS_LOOP( W9, 57, H, A, B, C, D, E, F, G)
553 MSG_SCHED_ROUND_00_15( W9, 9, skipNext9)
554 PROCESS_LOOP(W10, 58, G, H, A, B, C, D, E, F)
555 MSG_SCHED_ROUND_00_15(W10, 10, skipNext10)
556 PROCESS_LOOP(W11, 59, F, G, H, A, B, C, D, E)
557 MSG_SCHED_ROUND_00_15(W11, 11, skipNext11)
558 PROCESS_LOOP(W12, 60, E, F, G, H, A, B, C, D)
559 MSG_SCHED_ROUND_00_15(W12, 12, skipNext12)
560 PROCESS_LOOP(W13, 61, D, E, F, G, H, A, B, C)
561 MSG_SCHED_ROUND_00_15(W13, 13, skipNext13)
562 PROCESS_LOOP(W14, 62, C, D, E, F, G, H, A, B)
563 MSG_SCHED_ROUND_00_15(W14, 14, skipNext14)
564 PROCESS_LOOP(W15, 63, B, C, D, E, F, G, H, A)
565 MSG_SCHED_ROUND_00_15(W15, 15, skipNext15)
566
567 // Add old digest
568 vmovdqu32 TMP2, A
569 vmovdqu32 A, [SCRATCH + 64*0]
570 vpaddd A{k1}, A, TMP2
571 vmovdqu32 TMP2, B
572 vmovdqu32 B, [SCRATCH + 64*1]
573 vpaddd B{k1}, B, TMP2
574 vmovdqu32 TMP2, C
575 vmovdqu32 C, [SCRATCH + 64*2]
576 vpaddd C{k1}, C, TMP2
577 vmovdqu32 TMP2, D
578 vmovdqu32 D, [SCRATCH + 64*3]
579 vpaddd D{k1}, D, TMP2
580 vmovdqu32 TMP2, E
581 vmovdqu32 E, [SCRATCH + 64*4]
582 vpaddd E{k1}, E, TMP2
583 vmovdqu32 TMP2, F
584 vmovdqu32 F, [SCRATCH + 64*5]
585 vpaddd F{k1}, F, TMP2
586 vmovdqu32 TMP2, G
587 vmovdqu32 G, [SCRATCH + 64*6]
588 vpaddd G{k1}, G, TMP2
589 vmovdqu32 TMP2, H
590 vmovdqu32 H, [SCRATCH + 64*7]
591 vpaddd H{k1}, H, TMP2
592
593 kmovq k1, mask
594 JMP lloop
595
596lastLoop:
597 // Process last 16 rounds
598 PROCESS_LOOP( W0, 48, A, B, C, D, E, F, G, H)
599 PROCESS_LOOP( W1, 49, H, A, B, C, D, E, F, G)
600 PROCESS_LOOP( W2, 50, G, H, A, B, C, D, E, F)
601 PROCESS_LOOP( W3, 51, F, G, H, A, B, C, D, E)
602 PROCESS_LOOP( W4, 52, E, F, G, H, A, B, C, D)
603 PROCESS_LOOP( W5, 53, D, E, F, G, H, A, B, C)
604 PROCESS_LOOP( W6, 54, C, D, E, F, G, H, A, B)
605 PROCESS_LOOP( W7, 55, B, C, D, E, F, G, H, A)
606 PROCESS_LOOP( W8, 56, A, B, C, D, E, F, G, H)
607 PROCESS_LOOP( W9, 57, H, A, B, C, D, E, F, G)
608 PROCESS_LOOP(W10, 58, G, H, A, B, C, D, E, F)
609 PROCESS_LOOP(W11, 59, F, G, H, A, B, C, D, E)
610 PROCESS_LOOP(W12, 60, E, F, G, H, A, B, C, D)
611 PROCESS_LOOP(W13, 61, D, E, F, G, H, A, B, C)
612 PROCESS_LOOP(W14, 62, C, D, E, F, G, H, A, B)
613 PROCESS_LOOP(W15, 63, B, C, D, E, F, G, H, A)
614
615 // Add old digest
616 vmovdqu32 TMP2, A
617 vmovdqu32 A, [SCRATCH + 64*0]
618 vpaddd A{k1}, A, TMP2
619 vmovdqu32 TMP2, B
620 vmovdqu32 B, [SCRATCH + 64*1]
621 vpaddd B{k1}, B, TMP2
622 vmovdqu32 TMP2, C
623 vmovdqu32 C, [SCRATCH + 64*2]
624 vpaddd C{k1}, C, TMP2
625 vmovdqu32 TMP2, D
626 vmovdqu32 D, [SCRATCH + 64*3]
627 vpaddd D{k1}, D, TMP2
628 vmovdqu32 TMP2, E
629 vmovdqu32 E, [SCRATCH + 64*4]
630 vpaddd E{k1}, E, TMP2
631 vmovdqu32 TMP2, F
632 vmovdqu32 F, [SCRATCH + 64*5]
633 vpaddd F{k1}, F, TMP2
634 vmovdqu32 TMP2, G
635 vmovdqu32 G, [SCRATCH + 64*6]
636 vpaddd G{k1}, G, TMP2
637 vmovdqu32 TMP2, H
638 vmovdqu32 H, [SCRATCH + 64*7]
639 vpaddd H{k1}, H, TMP2
640
641 // Write out digest
642 vmovdqu32 [STATE + 0*SHA256_DIGEST_ROW_SIZE], A
643 vmovdqu32 [STATE + 1*SHA256_DIGEST_ROW_SIZE], B
644 vmovdqu32 [STATE + 2*SHA256_DIGEST_ROW_SIZE], C
645 vmovdqu32 [STATE + 3*SHA256_DIGEST_ROW_SIZE], D
646 vmovdqu32 [STATE + 4*SHA256_DIGEST_ROW_SIZE], E
647 vmovdqu32 [STATE + 5*SHA256_DIGEST_ROW_SIZE], F
648 vmovdqu32 [STATE + 6*SHA256_DIGEST_ROW_SIZE], G
649 vmovdqu32 [STATE + 7*SHA256_DIGEST_ROW_SIZE], H
650
651 VZEROUPPER
652 RET
653
654//
655// Tables
656//
657
658DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x000(SB)/8, $0x0405060700010203
659DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x008(SB)/8, $0x0c0d0e0f08090a0b
660DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x010(SB)/8, $0x0405060700010203
661DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x018(SB)/8, $0x0c0d0e0f08090a0b
662DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x020(SB)/8, $0x0405060700010203
663DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x028(SB)/8, $0x0c0d0e0f08090a0b
664DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x030(SB)/8, $0x0405060700010203
665DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x038(SB)/8, $0x0c0d0e0f08090a0b
666GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), 8, $64
667
668DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x000(SB)/8, $0x0000000000000000
669DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x008(SB)/8, $0x0000000000000001
670DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x010(SB)/8, $0x0000000000000008
671DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x018(SB)/8, $0x0000000000000009
672DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x020(SB)/8, $0x0000000000000004
673DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x028(SB)/8, $0x0000000000000005
674DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x030(SB)/8, $0x000000000000000C
675DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x038(SB)/8, $0x000000000000000D
676GLOBL PSHUFFLE_TRANSPOSE16_MASK1<>(SB), 8, $64
677
678DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x000(SB)/8, $0x0000000000000002
679DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x008(SB)/8, $0x0000000000000003
680DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x010(SB)/8, $0x000000000000000A
681DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x018(SB)/8, $0x000000000000000B
682DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x020(SB)/8, $0x0000000000000006
683DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x028(SB)/8, $0x0000000000000007
684DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x030(SB)/8, $0x000000000000000E
685DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x038(SB)/8, $0x000000000000000F
686GLOBL PSHUFFLE_TRANSPOSE16_MASK2<>(SB), 8, $64
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go
new file mode 100644
index 0000000..4b9473a
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go
@@ -0,0 +1,501 @@
1//go:build !noasm && !appengine && gc
2// +build !noasm,!appengine,gc
3
4/*
5 * Minio Cloud Storage, (C) 2017 Minio, Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20package sha256
21
22import (
23 "encoding/binary"
24 "errors"
25 "hash"
26 "sort"
27 "sync/atomic"
28 "time"
29)
30
31//go:noescape
32func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
33
34// Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to
35// differentiate with default initialiation value of 0
36const Avx512ServerUID = 16
37
38var uidCounter uint64
39
40// NewAvx512 - initialize sha256 Avx512 implementation.
41func NewAvx512(a512srv *Avx512Server) hash.Hash {
42 uid := atomic.AddUint64(&uidCounter, 1)
43 return &Avx512Digest{uid: uid, a512srv: a512srv}
44}
45
46// Avx512Digest - Type for computing SHA256 using Avx512
47type Avx512Digest struct {
48 uid uint64
49 a512srv *Avx512Server
50 x [chunk]byte
51 nx int
52 len uint64
53 final bool
54 result [Size]byte
55}
56
57// Size - Return size of checksum
58func (d *Avx512Digest) Size() int { return Size }
59
60// BlockSize - Return blocksize of checksum
61func (d Avx512Digest) BlockSize() int { return BlockSize }
62
63// Reset - reset sha digest to its initial values
64func (d *Avx512Digest) Reset() {
65 d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true}
66 d.nx = 0
67 d.len = 0
68 d.final = false
69}
70
71// Write to digest
72func (d *Avx512Digest) Write(p []byte) (nn int, err error) {
73
74 if d.final {
75 return 0, errors.New("Avx512Digest already finalized. Reset first before writing again")
76 }
77
78 nn = len(p)
79 d.len += uint64(nn)
80 if d.nx > 0 {
81 n := copy(d.x[d.nx:], p)
82 d.nx += n
83 if d.nx == chunk {
84 d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]}
85 d.nx = 0
86 }
87 p = p[n:]
88 }
89 if len(p) >= chunk {
90 n := len(p) &^ (chunk - 1)
91 d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]}
92 p = p[n:]
93 }
94 if len(p) > 0 {
95 d.nx = copy(d.x[:], p)
96 }
97 return
98}
99
100// Sum - Return sha256 sum in bytes
101func (d *Avx512Digest) Sum(in []byte) (result []byte) {
102
103 if d.final {
104 return append(in, d.result[:]...)
105 }
106
107 trail := make([]byte, 0, 128)
108 trail = append(trail, d.x[:d.nx]...)
109
110 len := d.len
111 // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
112 var tmp [64]byte
113 tmp[0] = 0x80
114 if len%64 < 56 {
115 trail = append(trail, tmp[0:56-len%64]...)
116 } else {
117 trail = append(trail, tmp[0:64+56-len%64]...)
118 }
119 d.nx = 0
120
121 // Length in bits.
122 len <<= 3
123 for i := uint(0); i < 8; i++ {
124 tmp[i] = byte(len >> (56 - 8*i))
125 }
126 trail = append(trail, tmp[0:8]...)
127
128 sumCh := make(chan [Size]byte)
129 d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh}
130 d.result = <-sumCh
131 d.final = true
132 return append(in, d.result[:]...)
133}
134
135var table = [512]uint64{
136 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
137 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
138 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
139 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
140 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
141 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
142 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
143 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
144 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
145 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
146 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
147 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
148 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
149 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
150 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
151 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
152 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
153 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
154 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
155 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
156 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
157 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
158 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
159 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
160 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
161 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
162 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
163 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
164 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
165 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
166 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
167 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
168 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
169 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
170 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
171 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
172 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
173 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
174 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
175 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
176 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
177 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
178 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
179 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
180 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
181 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
182 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
183 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
184 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
185 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
186 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
187 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
188 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
189 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
190 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
191 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
192 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
193 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
194 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
195 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
196 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
197 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
198 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
199 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
200 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
201 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
202 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
203 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
204 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
205 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
206 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
207 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
208 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
209 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
210 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
211 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
212 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
213 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
214 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
215 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
216 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
217 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
218 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
219 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
220 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
221 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
222 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
223 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
224 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
225 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
226 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
227 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
228 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
229 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
230 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
231 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
232 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
233 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
234 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
235 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
236 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
237 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
238 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
239 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
240 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
241 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
242 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
243 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
244 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
245 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
246 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
247 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
248 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
249 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
250 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
251 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
252 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
253 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
254 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
255 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
256 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
257 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
258 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
259 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
260 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
261 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
262 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2,
263 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2}
264
265// Interface function to assembly ode
266func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte {
267
268 scratch := [512]byte{}
269 sha256X16Avx512(digests, &scratch, &table, mask, input)
270
271 output := [16][Size]byte{}
272 for i := 0; i < 16; i++ {
273 output[i] = getDigest(i, digests[:])
274 }
275
276 return output
277}
278
279func getDigest(index int, state []byte) (sum [Size]byte) {
280 for j := 0; j < 16; j += 2 {
281 for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size {
282 binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4]))
283 }
284 }
285 return
286}
287
288// Message to send across input channel
289type blockInput struct {
290 uid uint64
291 msg []byte
292 reset bool
293 final bool
294 sumCh chan [Size]byte
295}
296
297// Avx512Server - Type to implement 16x parallel handling of SHA256 invocations
298type Avx512Server struct {
299 blocksCh chan blockInput // Input channel
300 totalIn int // Total number of inputs waiting to be processed
301 lanes [16]Avx512LaneInfo // Array with info per lane (out of 16)
302 digests map[uint64][Size]byte // Map of uids to (interim) digest results
303}
304
305// Avx512LaneInfo - Info for each lane
306type Avx512LaneInfo struct {
307 uid uint64 // unique identification for this SHA processing
308 block []byte // input block to be processed
309 outputCh chan [Size]byte // channel for output result
310}
311
312// NewAvx512Server - Create new object for parallel processing handling
313func NewAvx512Server() *Avx512Server {
314 a512srv := &Avx512Server{}
315 a512srv.digests = make(map[uint64][Size]byte)
316 a512srv.blocksCh = make(chan blockInput)
317
318 // Start a single thread for reading from the input channel
319 go a512srv.Process()
320 return a512srv
321}
322
323// Process - Sole handler for reading from the input channel
324func (a512srv *Avx512Server) Process() {
325 for {
326 select {
327 case block := <-a512srv.blocksCh:
328 if block.reset {
329 a512srv.reset(block.uid)
330 continue
331 }
332 index := block.uid & 0xf
333 // fmt.Println("Adding message:", block.uid, index)
334
335 if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs
336 //fmt.Println("Invoking Blocks()")
337 a512srv.blocks()
338 }
339 a512srv.totalIn++
340 a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg}
341 if block.final {
342 a512srv.lanes[index].outputCh = block.sumCh
343 }
344 if a512srv.totalIn == len(a512srv.lanes) {
345 // fmt.Println("Invoking Blocks() while FULL: ")
346 a512srv.blocks()
347 }
348
349 // TODO: test with larger timeout
350 case <-time.After(1 * time.Microsecond):
351 for _, lane := range a512srv.lanes {
352 if lane.block != nil { // check if there is any input to process
353 // fmt.Println("Invoking Blocks() on TIMEOUT: ")
354 a512srv.blocks()
355 break // we are done
356 }
357 }
358 }
359 }
360}
361
362// Do a reset for this calculation
363func (a512srv *Avx512Server) reset(uid uint64) {
364
365 // Check if there is a message still waiting to be processed (and remove if so)
366 for i, lane := range a512srv.lanes {
367 if lane.uid == uid {
368 if lane.block != nil {
369 a512srv.lanes[i] = Avx512LaneInfo{} // clear message
370 a512srv.totalIn--
371 }
372 }
373 }
374
375 // Delete entry from hash map
376 delete(a512srv.digests, uid)
377}
378
379// Invoke assembly and send results back
380func (a512srv *Avx512Server) blocks() {
381
382 inputs := [16][]byte{}
383 for i := range inputs {
384 inputs[i] = a512srv.lanes[i].block
385 }
386
387 mask := expandMask(genMask(inputs))
388 outputs := blockAvx512(a512srv.getDigests(), inputs, mask)
389
390 a512srv.totalIn = 0
391 for i := 0; i < len(outputs); i++ {
392 uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh
393 a512srv.digests[uid] = outputs[i]
394 a512srv.lanes[i] = Avx512LaneInfo{}
395
396 if outputCh != nil {
397 // Send back result
398 outputCh <- outputs[i]
399 delete(a512srv.digests, uid) // Delete entry from hashmap
400 }
401 }
402}
403
404func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) {
405 a512srv.blocksCh <- blockInput{uid: uid, msg: p}
406 return len(p), nil
407}
408
409// Sum - return sha256 sum in bytes for a given sum id.
410func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte {
411 sumCh := make(chan [32]byte)
412 a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh}
413 return <-sumCh
414}
415
416func (a512srv *Avx512Server) getDigests() *[512]byte {
417 digests := [512]byte{}
418 for i, lane := range a512srv.lanes {
419 a, ok := a512srv.digests[lane.uid]
420 if ok {
421 binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4]))
422 binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8]))
423 binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12]))
424 binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16]))
425 binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20]))
426 binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24]))
427 binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28]))
428 binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32]))
429 } else {
430 binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0)
431 binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1)
432 binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2)
433 binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3)
434 binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4)
435 binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5)
436 binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6)
437 binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7)
438 }
439 }
440 return &digests
441}
442
443// Helper struct for sorting blocks based on length
444type lane struct {
445 len uint
446 pos uint
447}
448
449type lanes []lane
450
451func (lns lanes) Len() int { return len(lns) }
452func (lns lanes) Swap(i, j int) { lns[i], lns[j] = lns[j], lns[i] }
453func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len }
454
455// Helper struct for
456type maskRounds struct {
457 mask uint64
458 rounds uint64
459}
460
461func genMask(input [16][]byte) [16]maskRounds {
462
463 // Sort on blocks length small to large
464 var sorted [16]lane
465 for c, inpt := range input {
466 sorted[c] = lane{uint(len(inpt)), uint(c)}
467 }
468 sort.Sort(lanes(sorted[:]))
469
470 // Create mask array including 'rounds' between masks
471 m, round, index := uint64(0xffff), uint64(0), 0
472 var mr [16]maskRounds
473 for _, s := range sorted {
474 if s.len > 0 {
475 if uint64(s.len)>>6 > round {
476 mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round}
477 index++
478 }
479 round = uint64(s.len) >> 6
480 }
481 m = m & ^(1 << uint(s.pos))
482 }
483
484 return mr
485}
486
487// TODO: remove function
488func expandMask(mr [16]maskRounds) []uint64 {
489 size := uint64(0)
490 for _, r := range mr {
491 size += r.rounds
492 }
493 result, index := make([]uint64, size), 0
494 for _, r := range mr {
495 for j := uint64(0); j < r.rounds; j++ {
496 result[index] = r.mask
497 index++
498 }
499 }
500 return result
501}
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
new file mode 100644
index 0000000..cca534e
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
@@ -0,0 +1,267 @@
1//+build !noasm,!appengine,gc
2
3TEXT ·sha256X16Avx512(SB), 7, $0
4 MOVQ digests+0(FP), DI
5 MOVQ scratch+8(FP), R12
6 MOVQ mask_len+32(FP), SI
7 MOVQ mask_base+24(FP), R13
8 MOVQ (R13), R14
9 LONG $0x92fbc1c4; BYTE $0xce
10 LEAQ inputs+48(FP), AX
11 QUAD $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07
12 MOVQ table+16(FP), DX
13 WORD $0x3148; BYTE $0xc9
14 TESTQ $(1<<0), R14
15 JE skipInput0
16 MOVQ 0*24(AX), R9
17 LONG $0x487cc162; WORD $0x0410; BYTE $0x09
18
19skipInput0:
20 TESTQ $(1<<1), R14
21 JE skipInput1
22 MOVQ 1*24(AX), R9
23 LONG $0x487cc162; WORD $0x0c10; BYTE $0x09
24
25skipInput1:
26 TESTQ $(1<<2), R14
27 JE skipInput2
28 MOVQ 2*24(AX), R9
29 LONG $0x487cc162; WORD $0x1410; BYTE $0x09
30
31skipInput2:
32 TESTQ $(1<<3), R14
33 JE skipInput3
34 MOVQ 3*24(AX), R9
35 LONG $0x487cc162; WORD $0x1c10; BYTE $0x09
36
37skipInput3:
38 TESTQ $(1<<4), R14
39 JE skipInput4
40 MOVQ 4*24(AX), R9
41 LONG $0x487cc162; WORD $0x2410; BYTE $0x09
42
43skipInput4:
44 TESTQ $(1<<5), R14
45 JE skipInput5
46 MOVQ 5*24(AX), R9
47 LONG $0x487cc162; WORD $0x2c10; BYTE $0x09
48
49skipInput5:
50 TESTQ $(1<<6), R14
51 JE skipInput6
52 MOVQ 6*24(AX), R9
53 LONG $0x487cc162; WORD $0x3410; BYTE $0x09
54
55skipInput6:
56 TESTQ $(1<<7), R14
57 JE skipInput7
58 MOVQ 7*24(AX), R9
59 LONG $0x487cc162; WORD $0x3c10; BYTE $0x09
60
61skipInput7:
62 TESTQ $(1<<8), R14
63 JE skipInput8
64 MOVQ 8*24(AX), R9
65 LONG $0x487c4162; WORD $0x0410; BYTE $0x09
66
67skipInput8:
68 TESTQ $(1<<9), R14
69 JE skipInput9
70 MOVQ 9*24(AX), R9
71 LONG $0x487c4162; WORD $0x0c10; BYTE $0x09
72
73skipInput9:
74 TESTQ $(1<<10), R14
75 JE skipInput10
76 MOVQ 10*24(AX), R9
77 LONG $0x487c4162; WORD $0x1410; BYTE $0x09
78
79skipInput10:
80 TESTQ $(1<<11), R14
81 JE skipInput11
82 MOVQ 11*24(AX), R9
83 LONG $0x487c4162; WORD $0x1c10; BYTE $0x09
84
85skipInput11:
86 TESTQ $(1<<12), R14
87 JE skipInput12
88 MOVQ 12*24(AX), R9
89 LONG $0x487c4162; WORD $0x2410; BYTE $0x09
90
91skipInput12:
92 TESTQ $(1<<13), R14
93 JE skipInput13
94 MOVQ 13*24(AX), R9
95 LONG $0x487c4162; WORD $0x2c10; BYTE $0x09
96
97skipInput13:
98 TESTQ $(1<<14), R14
99 JE skipInput14
100 MOVQ 14*24(AX), R9
101 LONG $0x487c4162; WORD $0x3410; BYTE $0x09
102
103skipInput14:
104 TESTQ $(1<<15), R14
105 JE skipInput15
106 MOVQ 15*24(AX), R9
107 LONG $0x487c4162; WORD $0x3c10; BYTE $0x09
108
109skipInput15:
110lloop:
111 LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX
112 LONG $0x487e7162; WORD $0x1a6f
113 MOVQ table+16(FP), DX
114 QUAD $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88
115 LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX
116 LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8
117 QUAD $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005
118 JE lastLoop
119 ADDQ $8, R13
120 MOVQ (R13), R14
121 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31
122 TESTQ $(1<<0), R14
123 JE skipNext0
124 MOVQ 0*24(AX), R9
125 LONG $0x487cc162; WORD $0x0410; BYTE $0x09
126
127skipNext0:
128 QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32
129 TESTQ $(1<<1), R14
130 JE skipNext1
131 MOVQ 1*24(AX), R9
132 LONG $0x487cc162; WORD $0x0c10; BYTE $0x09
133
134skipNext1:
135 QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33
136 TESTQ $(1<<2), R14
137 JE skipNext2
138 MOVQ 2*24(AX), R9
139 LONG $0x487cc162; WORD $0x1410; BYTE $0x09
140
141skipNext2:
142 QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34
143 TESTQ $(1<<3), R14
144 JE skipNext3
145 MOVQ 3*24(AX), R9
146 LONG $0x487cc162; WORD $0x1c10; BYTE $0x09
147
148skipNext3:
149 QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35
150 TESTQ $(1<<4), R14
151 JE skipNext4
152 MOVQ 4*24(AX), R9
153 LONG $0x487cc162; WORD $0x2410; BYTE $0x09
154
155skipNext4:
156 QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36
157 TESTQ $(1<<5), R14
158 JE skipNext5
159 MOVQ 5*24(AX), R9
160 LONG $0x487cc162; WORD $0x2c10; BYTE $0x09
161
162skipNext5:
163 QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37
164 TESTQ $(1<<6), R14
165 JE skipNext6
166 MOVQ 6*24(AX), R9
167 LONG $0x487cc162; WORD $0x3410; BYTE $0x09
168
169skipNext6:
170 QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38
171 TESTQ $(1<<7), R14
172 JE skipNext7
173 MOVQ 7*24(AX), R9
174 LONG $0x487cc162; WORD $0x3c10; BYTE $0x09
175
176skipNext7:
177 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39
178 TESTQ $(1<<8), R14
179 JE skipNext8
180 MOVQ 8*24(AX), R9
181 LONG $0x487c4162; WORD $0x0410; BYTE $0x09
182
183skipNext8:
184 QUAD $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a
185 TESTQ $(1<<9), R14
186 JE skipNext9
187 MOVQ 9*24(AX), R9
188 LONG $0x487c4162; WORD $0x0c10; BYTE $0x09
189
190skipNext9:
191 QUAD $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b
192 TESTQ $(1<<10), R14
193 JE skipNext10
194 MOVQ 10*24(AX), R9
195 LONG $0x487c4162; WORD $0x1410; BYTE $0x09
196
197skipNext10:
198 QUAD $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c
199 TESTQ $(1<<11), R14
200 JE skipNext11
201 MOVQ 11*24(AX), R9
202 LONG $0x487c4162; WORD $0x1c10; BYTE $0x09
203
204skipNext11:
205 QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d
206 TESTQ $(1<<12), R14
207 JE skipNext12
208 MOVQ 12*24(AX), R9
209 LONG $0x487c4162; WORD $0x2410; BYTE $0x09
210
211skipNext12:
212 QUAD $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e
213 TESTQ $(1<<13), R14
214 JE skipNext13
215 MOVQ 13*24(AX), R9
216 LONG $0x487c4162; WORD $0x2c10; BYTE $0x09
217
218skipNext13:
219 QUAD $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f
220 TESTQ $(1<<14), R14
221 JE skipNext14
222 MOVQ 14*24(AX), R9
223 LONG $0x487c4162; WORD $0x3410; BYTE $0x09
224
225skipNext14:
226 QUAD $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40
227 TESTQ $(1<<15), R14
228 JE skipNext15
229 MOVQ 15*24(AX), R9
230 LONG $0x487c4162; WORD $0x3c10; BYTE $0x09
231
232skipNext15:
233 QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0xc4fbfe4945d16207; LONG $0xce92fbc1
234 JMP lloop
235
236lastLoop:
237 QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f
238 VZEROUPPER
239 RET
240
241DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x000(SB)/8, $0x0405060700010203
242DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x008(SB)/8, $0x0c0d0e0f08090a0b
243DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x010(SB)/8, $0x0405060700010203
244DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x018(SB)/8, $0x0c0d0e0f08090a0b
245DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x020(SB)/8, $0x0405060700010203
246DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x028(SB)/8, $0x0c0d0e0f08090a0b
247DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x030(SB)/8, $0x0405060700010203
248DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x038(SB)/8, $0x0c0d0e0f08090a0b
249GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), 8, $64
250DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x000(SB)/8, $0x0000000000000000
251DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x008(SB)/8, $0x0000000000000001
252DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x010(SB)/8, $0x0000000000000008
253DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x018(SB)/8, $0x0000000000000009
254DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x020(SB)/8, $0x0000000000000004
255DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x028(SB)/8, $0x0000000000000005
256DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x030(SB)/8, $0x000000000000000C
257DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x038(SB)/8, $0x000000000000000D
258GLOBL PSHUFFLE_TRANSPOSE16_MASK1<>(SB), 8, $64
259DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x000(SB)/8, $0x0000000000000002
260DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x008(SB)/8, $0x0000000000000003
261DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x010(SB)/8, $0x000000000000000A
262DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x018(SB)/8, $0x000000000000000B
263DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x020(SB)/8, $0x0000000000000006
264DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x028(SB)/8, $0x0000000000000007
265DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x030(SB)/8, $0x000000000000000E
266DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x038(SB)/8, $0x000000000000000F
267GLOBL PSHUFFLE_TRANSPOSE16_MASK2<>(SB), 8, $64
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.go b/vendor/github.com/minio/sha256-simd/sha256block_amd64.go
new file mode 100644
index 0000000..e536f54
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256block_amd64.go
@@ -0,0 +1,31 @@
1//go:build !noasm && !appengine && gc
2// +build !noasm,!appengine,gc
3
4/*
5 * Minio Cloud Storage, (C) 2016 Minio, Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20package sha256
21
22func blockArmSha2Go(dig *digest, p []byte) {
23 panic("blockArmSha2Go called unexpectedly")
24}
25
26//go:noescape
27func blockIntelSha(h *[8]uint32, message []uint8)
28
29func blockIntelShaGo(dig *digest, p []byte) {
30 blockIntelSha(&dig.h, p)
31}
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.s b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s
new file mode 100644
index 0000000..c98a1d8
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s
@@ -0,0 +1,266 @@
1//+build !noasm,!appengine,gc
2
3// SHA intrinsic version of SHA256
4
5// Kristofer Peterson, (C) 2018.
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10//
11// http://www.apache.org/licenses/LICENSE-2.0
12//
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18//
19
20#include "textflag.h"
21
22DATA K<>+0x00(SB)/4, $0x428a2f98
23DATA K<>+0x04(SB)/4, $0x71374491
24DATA K<>+0x08(SB)/4, $0xb5c0fbcf
25DATA K<>+0x0c(SB)/4, $0xe9b5dba5
26DATA K<>+0x10(SB)/4, $0x3956c25b
27DATA K<>+0x14(SB)/4, $0x59f111f1
28DATA K<>+0x18(SB)/4, $0x923f82a4
29DATA K<>+0x1c(SB)/4, $0xab1c5ed5
30DATA K<>+0x20(SB)/4, $0xd807aa98
31DATA K<>+0x24(SB)/4, $0x12835b01
32DATA K<>+0x28(SB)/4, $0x243185be
33DATA K<>+0x2c(SB)/4, $0x550c7dc3
34DATA K<>+0x30(SB)/4, $0x72be5d74
35DATA K<>+0x34(SB)/4, $0x80deb1fe
36DATA K<>+0x38(SB)/4, $0x9bdc06a7
37DATA K<>+0x3c(SB)/4, $0xc19bf174
38DATA K<>+0x40(SB)/4, $0xe49b69c1
39DATA K<>+0x44(SB)/4, $0xefbe4786
40DATA K<>+0x48(SB)/4, $0x0fc19dc6
41DATA K<>+0x4c(SB)/4, $0x240ca1cc
42DATA K<>+0x50(SB)/4, $0x2de92c6f
43DATA K<>+0x54(SB)/4, $0x4a7484aa
44DATA K<>+0x58(SB)/4, $0x5cb0a9dc
45DATA K<>+0x5c(SB)/4, $0x76f988da
46DATA K<>+0x60(SB)/4, $0x983e5152
47DATA K<>+0x64(SB)/4, $0xa831c66d
48DATA K<>+0x68(SB)/4, $0xb00327c8
49DATA K<>+0x6c(SB)/4, $0xbf597fc7
50DATA K<>+0x70(SB)/4, $0xc6e00bf3
51DATA K<>+0x74(SB)/4, $0xd5a79147
52DATA K<>+0x78(SB)/4, $0x06ca6351
53DATA K<>+0x7c(SB)/4, $0x14292967
54DATA K<>+0x80(SB)/4, $0x27b70a85
55DATA K<>+0x84(SB)/4, $0x2e1b2138
56DATA K<>+0x88(SB)/4, $0x4d2c6dfc
57DATA K<>+0x8c(SB)/4, $0x53380d13
58DATA K<>+0x90(SB)/4, $0x650a7354
59DATA K<>+0x94(SB)/4, $0x766a0abb
60DATA K<>+0x98(SB)/4, $0x81c2c92e
61DATA K<>+0x9c(SB)/4, $0x92722c85
62DATA K<>+0xa0(SB)/4, $0xa2bfe8a1
63DATA K<>+0xa4(SB)/4, $0xa81a664b
64DATA K<>+0xa8(SB)/4, $0xc24b8b70
65DATA K<>+0xac(SB)/4, $0xc76c51a3
66DATA K<>+0xb0(SB)/4, $0xd192e819
67DATA K<>+0xb4(SB)/4, $0xd6990624
68DATA K<>+0xb8(SB)/4, $0xf40e3585
69DATA K<>+0xbc(SB)/4, $0x106aa070
70DATA K<>+0xc0(SB)/4, $0x19a4c116
71DATA K<>+0xc4(SB)/4, $0x1e376c08
72DATA K<>+0xc8(SB)/4, $0x2748774c
73DATA K<>+0xcc(SB)/4, $0x34b0bcb5
74DATA K<>+0xd0(SB)/4, $0x391c0cb3
75DATA K<>+0xd4(SB)/4, $0x4ed8aa4a
76DATA K<>+0xd8(SB)/4, $0x5b9cca4f
77DATA K<>+0xdc(SB)/4, $0x682e6ff3
78DATA K<>+0xe0(SB)/4, $0x748f82ee
79DATA K<>+0xe4(SB)/4, $0x78a5636f
80DATA K<>+0xe8(SB)/4, $0x84c87814
81DATA K<>+0xec(SB)/4, $0x8cc70208
82DATA K<>+0xf0(SB)/4, $0x90befffa
83DATA K<>+0xf4(SB)/4, $0xa4506ceb
84DATA K<>+0xf8(SB)/4, $0xbef9a3f7
85DATA K<>+0xfc(SB)/4, $0xc67178f2
86GLOBL K<>(SB), RODATA|NOPTR, $256
87
88DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203
89DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
90GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16
91
92// Register Usage
93// BX base address of constant table (constant)
94// DX hash_state (constant)
95// SI hash_data.data
96// DI hash_data.data + hash_data.length - 64 (constant)
97// X0 scratch
98// X1 scratch
99// X2 working hash state // ABEF
100// X3 working hash state // CDGH
101// X4 first 16 bytes of block
102// X5 second 16 bytes of block
103// X6 third 16 bytes of block
104// X7 fourth 16 bytes of block
105// X12 saved hash state // ABEF
106// X13 saved hash state // CDGH
107// X15 data shuffle mask (constant)
108
109TEXT ·blockIntelSha(SB), NOSPLIT, $0-32
110 MOVQ h+0(FP), DX
111 MOVQ message_base+8(FP), SI
112 MOVQ message_len+16(FP), DI
113 LEAQ -64(SI)(DI*1), DI
114 MOVOU (DX), X2
115 MOVOU 16(DX), X1
116 MOVO X2, X3
117 PUNPCKLLQ X1, X2
118 PUNPCKHLQ X1, X3
119 PSHUFD $0x27, X2, X2
120 PSHUFD $0x27, X3, X3
121 MOVO SHUF_MASK<>(SB), X15
122 LEAQ K<>(SB), BX
123
124 JMP TEST
125
126LOOP:
127 MOVO X2, X12
128 MOVO X3, X13
129
130 // load block and shuffle
131 MOVOU (SI), X4
132 MOVOU 16(SI), X5
133 MOVOU 32(SI), X6
134 MOVOU 48(SI), X7
135 PSHUFB X15, X4
136 PSHUFB X15, X5
137 PSHUFB X15, X6
138 PSHUFB X15, X7
139
140#define ROUND456 \
141 PADDL X5, X0 \
142 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
143 MOVO X5, X1 \
144 LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4
145 PADDL X1, X6 \
146 LONG $0xf5cd380f \ // SHA256MSG2 XMM6, XMM5
147 PSHUFD $0x4e, X0, X0 \
148 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
149 LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
150
151#define ROUND567 \
152 PADDL X6, X0 \
153 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
154 MOVO X6, X1 \
155 LONG $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4
156 PADDL X1, X7 \
157 LONG $0xfecd380f \ // SHA256MSG2 XMM7, XMM6
158 PSHUFD $0x4e, X0, X0 \
159 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
160 LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
161
162#define ROUND674 \
163 PADDL X7, X0 \
164 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
165 MOVO X7, X1 \
166 LONG $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4
167 PADDL X1, X4 \
168 LONG $0xe7cd380f \ // SHA256MSG2 XMM4, XMM7
169 PSHUFD $0x4e, X0, X0 \
170 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
171 LONG $0xf7cc380f // SHA256MSG1 XMM6, XMM7
172
173#define ROUND745 \
174 PADDL X4, X0 \
175 LONG $0xdacb380f \ // SHA256RNDS2 XMM3, XMM2
176 MOVO X4, X1 \
177 LONG $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4
178 PADDL X1, X5 \
179 LONG $0xeccd380f \ // SHA256MSG2 XMM5, XMM4
180 PSHUFD $0x4e, X0, X0 \
181 LONG $0xd3cb380f \ // SHA256RNDS2 XMM2, XMM3
182 LONG $0xfccc380f // SHA256MSG1 XMM7, XMM4
183
184 // rounds 0-3
185 MOVO (BX), X0
186 PADDL X4, X0
187 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
188 PSHUFD $0x4e, X0, X0
189 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
190
191 // rounds 4-7
192 MOVO 1*16(BX), X0
193 PADDL X5, X0
194 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
195 PSHUFD $0x4e, X0, X0
196 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
197 LONG $0xe5cc380f // SHA256MSG1 XMM4, XMM5
198
199 // rounds 8-11
200 MOVO 2*16(BX), X0
201 PADDL X6, X0
202 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
203 PSHUFD $0x4e, X0, X0
204 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
205 LONG $0xeecc380f // SHA256MSG1 XMM5, XMM6
206
207 MOVO 3*16(BX), X0; ROUND674 // rounds 12-15
208 MOVO 4*16(BX), X0; ROUND745 // rounds 16-19
209 MOVO 5*16(BX), X0; ROUND456 // rounds 20-23
210 MOVO 6*16(BX), X0; ROUND567 // rounds 24-27
211 MOVO 7*16(BX), X0; ROUND674 // rounds 28-31
212 MOVO 8*16(BX), X0; ROUND745 // rounds 32-35
213 MOVO 9*16(BX), X0; ROUND456 // rounds 36-39
214 MOVO 10*16(BX), X0; ROUND567 // rounds 40-43
215 MOVO 11*16(BX), X0; ROUND674 // rounds 44-47
216 MOVO 12*16(BX), X0; ROUND745 // rounds 48-51
217
218 // rounds 52-55
219 MOVO 13*16(BX), X0
220 PADDL X5, X0
221 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
222 MOVO X5, X1
223 LONG $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4
224 PADDL X1, X6
225 LONG $0xf5cd380f // SHA256MSG2 XMM6, XMM5
226 PSHUFD $0x4e, X0, X0
227 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
228
229 // rounds 56-59
230 MOVO 14*16(BX), X0
231 PADDL X6, X0
232 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
233 MOVO X6, X1
234 LONG $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4
235 PADDL X1, X7
236 LONG $0xfecd380f // SHA256MSG2 XMM7, XMM6
237 PSHUFD $0x4e, X0, X0
238 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
239
240 // rounds 60-63
241 MOVO 15*16(BX), X0
242 PADDL X7, X0
243 LONG $0xdacb380f // SHA256RNDS2 XMM3, XMM2
244 PSHUFD $0x4e, X0, X0
245 LONG $0xd3cb380f // SHA256RNDS2 XMM2, XMM3
246
247 PADDL X12, X2
248 PADDL X13, X3
249
250 ADDQ $64, SI
251
252TEST:
253 CMPQ SI, DI
254 JBE LOOP
255
256 PSHUFD $0x4e, X3, X0
257 LONG $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0
258 PSHUFD $0x4e, X2, X1
259 LONG $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f
260 PSHUFD $0x1b, X0, X0
261 PSHUFD $0x1b, X1, X1
262
263 MOVOU X0, (DX)
264 MOVOU X1, 16(DX)
265
266 RET
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_arm64.go b/vendor/github.com/minio/sha256-simd/sha256block_arm64.go
new file mode 100644
index 0000000..d4369e2
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256block_arm64.go
@@ -0,0 +1,37 @@
1//go:build !noasm && !appengine && gc
2// +build !noasm,!appengine,gc
3
4/*
5 * Minio Cloud Storage, (C) 2016 Minio, Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20package sha256
21
22func blockIntelShaGo(dig *digest, p []byte) {
23 panic("blockIntelShaGo called unexpectedly")
24}
25
26//go:noescape
27func blockArmSha2(h []uint32, message []uint8)
28
29func blockArmSha2Go(dig *digest, p []byte) {
30
31 h := []uint32{dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]}
32
33 blockArmSha2(h[:], p[:])
34
35 dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4],
36 h[5], h[6], h[7]
37}
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_arm64.s b/vendor/github.com/minio/sha256-simd/sha256block_arm64.s
new file mode 100644
index 0000000..7ab88b1
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256block_arm64.s
@@ -0,0 +1,192 @@
1//+build !noasm,!appengine,gc
2
3// ARM64 version of SHA256
4
5//
6// Minio Cloud Storage, (C) 2016 Minio, Inc.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11//
12// http://www.apache.org/licenses/LICENSE-2.0
13//
14// Unless required by applicable law or agreed to in writing, software
15// distributed under the License is distributed on an "AS IS" BASIS,
16// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17// See the License for the specific language governing permissions and
18// limitations under the License.
19//
20
21//
22// Based on implementation as found in https://github.com/jocover/sha256-armv8
23//
24// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
25// their Plan9 equivalents
26//
27
28TEXT ·blockArmSha2(SB), 7, $0
29 MOVD h+0(FP), R0
30 MOVD message+24(FP), R1
31 MOVD message_len+32(FP), R2 // length of message
32 SUBS $64, R2
33 BMI complete
34
35 // Load constants table pointer
36 MOVD $·constants(SB), R3
37
38 // Cache constants table in registers v16 - v31
39 WORD $0x4cdf2870 // ld1 {v16.4s-v19.4s}, [x3], #64
40 WORD $0x4cdf7800 // ld1 {v0.4s}, [x0], #16
41 WORD $0x4cdf2874 // ld1 {v20.4s-v23.4s}, [x3], #64
42
43 WORD $0x4c407801 // ld1 {v1.4s}, [x0]
44 WORD $0x4cdf2878 // ld1 {v24.4s-v27.4s}, [x3], #64
45 WORD $0xd1004000 // sub x0, x0, #0x10
46 WORD $0x4cdf287c // ld1 {v28.4s-v31.4s}, [x3], #64
47
48loop:
49 // Main loop
50 WORD $0x4cdf2025 // ld1 {v5.16b-v8.16b}, [x1], #64
51 WORD $0x4ea01c02 // mov v2.16b, v0.16b
52 WORD $0x4ea11c23 // mov v3.16b, v1.16b
53 WORD $0x6e2008a5 // rev32 v5.16b, v5.16b
54 WORD $0x6e2008c6 // rev32 v6.16b, v6.16b
55 WORD $0x4eb084a9 // add v9.4s, v5.4s, v16.4s
56 WORD $0x6e2008e7 // rev32 v7.16b, v7.16b
57 WORD $0x4eb184ca // add v10.4s, v6.4s, v17.4s
58 WORD $0x4ea21c44 // mov v4.16b, v2.16b
59 WORD $0x5e094062 // sha256h q2, q3, v9.4s
60 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
61 WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s
62 WORD $0x6e200908 // rev32 v8.16b, v8.16b
63 WORD $0x4eb284e9 // add v9.4s, v7.4s, v18.4s
64 WORD $0x4ea21c44 // mov v4.16b, v2.16b
65 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
66 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
67 WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s
68 WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s
69 WORD $0x4eb3850a // add v10.4s, v8.4s, v19.4s
70 WORD $0x4ea21c44 // mov v4.16b, v2.16b
71 WORD $0x5e094062 // sha256h q2, q3, v9.4s
72 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
73 WORD $0x5e282907 // sha256su0 v7.4s, v8.4s
74 WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s
75 WORD $0x4eb484a9 // add v9.4s, v5.4s, v20.4s
76 WORD $0x4ea21c44 // mov v4.16b, v2.16b
77 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
78 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
79 WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s
80 WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s
81 WORD $0x4eb584ca // add v10.4s, v6.4s, v21.4s
82 WORD $0x4ea21c44 // mov v4.16b, v2.16b
83 WORD $0x5e094062 // sha256h q2, q3, v9.4s
84 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
85 WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s
86 WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s
87 WORD $0x4eb684e9 // add v9.4s, v7.4s, v22.4s
88 WORD $0x4ea21c44 // mov v4.16b, v2.16b
89 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
90 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
91 WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s
92 WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s
93 WORD $0x4eb7850a // add v10.4s, v8.4s, v23.4s
94 WORD $0x4ea21c44 // mov v4.16b, v2.16b
95 WORD $0x5e094062 // sha256h q2, q3, v9.4s
96 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
97 WORD $0x5e282907 // sha256su0 v7.4s, v8.4s
98 WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s
99 WORD $0x4eb884a9 // add v9.4s, v5.4s, v24.4s
100 WORD $0x4ea21c44 // mov v4.16b, v2.16b
101 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
102 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
103 WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s
104 WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s
105 WORD $0x4eb984ca // add v10.4s, v6.4s, v25.4s
106 WORD $0x4ea21c44 // mov v4.16b, v2.16b
107 WORD $0x5e094062 // sha256h q2, q3, v9.4s
108 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
109 WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s
110 WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s
111 WORD $0x4eba84e9 // add v9.4s, v7.4s, v26.4s
112 WORD $0x4ea21c44 // mov v4.16b, v2.16b
113 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
114 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
115 WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s
116 WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s
117 WORD $0x4ebb850a // add v10.4s, v8.4s, v27.4s
118 WORD $0x4ea21c44 // mov v4.16b, v2.16b
119 WORD $0x5e094062 // sha256h q2, q3, v9.4s
120 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
121 WORD $0x5e282907 // sha256su0 v7.4s, v8.4s
122 WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s
123 WORD $0x4ebc84a9 // add v9.4s, v5.4s, v28.4s
124 WORD $0x4ea21c44 // mov v4.16b, v2.16b
125 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
126 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
127 WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s
128 WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s
129 WORD $0x4ebd84ca // add v10.4s, v6.4s, v29.4s
130 WORD $0x4ea21c44 // mov v4.16b, v2.16b
131 WORD $0x5e094062 // sha256h q2, q3, v9.4s
132 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
133 WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s
134 WORD $0x4ebe84e9 // add v9.4s, v7.4s, v30.4s
135 WORD $0x4ea21c44 // mov v4.16b, v2.16b
136 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
137 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
138 WORD $0x4ebf850a // add v10.4s, v8.4s, v31.4s
139 WORD $0x4ea21c44 // mov v4.16b, v2.16b
140 WORD $0x5e094062 // sha256h q2, q3, v9.4s
141 WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
142 WORD $0x4ea21c44 // mov v4.16b, v2.16b
143 WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
144 WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
145 WORD $0x4ea38421 // add v1.4s, v1.4s, v3.4s
146 WORD $0x4ea28400 // add v0.4s, v0.4s, v2.4s
147
148 SUBS $64, R2
149 BPL loop
150
151 // Store result
152 WORD $0x4c00a800 // st1 {v0.4s, v1.4s}, [x0]
153
154complete:
155 RET
156
157// Constants table
158DATA ·constants+0x0(SB)/8, $0x71374491428a2f98
159DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf
160DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b
161DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4
162DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98
163DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be
164DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74
165DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7
166DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1
167DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6
168DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f
169DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc
170DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152
171DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8
172DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3
173DATA ·constants+0x78(SB)/8, $0x1429296706ca6351
174DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85
175DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc
176DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354
177DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e
178DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1
179DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70
180DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819
181DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585
182DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116
183DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c
184DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3
185DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f
186DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee
187DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814
188DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa
189DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7
190
191GLOBL ·constants(SB), 8, $256
192
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_other.go b/vendor/github.com/minio/sha256-simd/sha256block_other.go
new file mode 100644
index 0000000..94d7eb0
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/sha256block_other.go
@@ -0,0 +1,29 @@
1//go:build appengine || noasm || (!amd64 && !arm64) || !gc
2// +build appengine noasm !amd64,!arm64 !gc
3
4/*
5 * Minio Cloud Storage, (C) 2019 Minio, Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20package sha256
21
22func blockIntelShaGo(dig *digest, p []byte) {
23 panic("blockIntelShaGo called unexpectedly")
24
25}
26
27func blockArmSha2Go(dig *digest, p []byte) {
28 panic("blockArmSha2Go called unexpectedly")
29}
diff --git a/vendor/github.com/minio/sha256-simd/test-architectures.sh b/vendor/github.com/minio/sha256-simd/test-architectures.sh
new file mode 100644
index 0000000..50150ea
--- /dev/null
+++ b/vendor/github.com/minio/sha256-simd/test-architectures.sh
@@ -0,0 +1,15 @@
1#!/bin/sh
2
3set -e
4
5go tool dist list | while IFS=/ read os arch; do
6 echo "Checking $os/$arch..."
7 echo " normal"
8 GOARCH=$arch GOOS=$os go build -o /dev/null ./...
9 echo " noasm"
10 GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null ./...
11 echo " appengine"
12 GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null ./...
13 echo " noasm,appengine"
14 GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null ./...
15done