diff options
Diffstat (limited to 'vendor/github.com/klauspost/cpuid')
19 files changed, 3149 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/.gitignore b/vendor/github.com/klauspost/cpuid/v2/.gitignore new file mode 100644 index 0000000..daf913b --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/.gitignore | |||
@@ -0,0 +1,24 @@ | |||
1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) | ||
2 | *.o | ||
3 | *.a | ||
4 | *.so | ||
5 | |||
6 | # Folders | ||
7 | _obj | ||
8 | _test | ||
9 | |||
10 | # Architecture specific extensions/prefixes | ||
11 | *.[568vq] | ||
12 | [568vq].out | ||
13 | |||
14 | *.cgo1.go | ||
15 | *.cgo2.c | ||
16 | _cgo_defun.c | ||
17 | _cgo_gotypes.go | ||
18 | _cgo_export.* | ||
19 | |||
20 | _testmain.go | ||
21 | |||
22 | *.exe | ||
23 | *.test | ||
24 | *.prof | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml new file mode 100644 index 0000000..944cc00 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml | |||
@@ -0,0 +1,74 @@ | |||
1 | # This is an example goreleaser.yaml file with some sane defaults. | ||
2 | # Make sure to check the documentation at http://goreleaser.com | ||
3 | |||
4 | builds: | ||
5 | - | ||
6 | id: "cpuid" | ||
7 | binary: cpuid | ||
8 | main: ./cmd/cpuid/main.go | ||
9 | env: | ||
10 | - CGO_ENABLED=0 | ||
11 | flags: | ||
12 | - -ldflags=-s -w | ||
13 | goos: | ||
14 | - aix | ||
15 | - linux | ||
16 | - freebsd | ||
17 | - netbsd | ||
18 | - windows | ||
19 | - darwin | ||
20 | goarch: | ||
21 | - 386 | ||
22 | - amd64 | ||
23 | - arm64 | ||
24 | goarm: | ||
25 | - 7 | ||
26 | |||
27 | archives: | ||
28 | - | ||
29 | id: cpuid | ||
30 | name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}" | ||
31 | replacements: | ||
32 | aix: AIX | ||
33 | darwin: OSX | ||
34 | linux: Linux | ||
35 | windows: Windows | ||
36 | 386: i386 | ||
37 | amd64: x86_64 | ||
38 | freebsd: FreeBSD | ||
39 | netbsd: NetBSD | ||
40 | format_overrides: | ||
41 | - goos: windows | ||
42 | format: zip | ||
43 | files: | ||
44 | - LICENSE | ||
45 | checksum: | ||
46 | name_template: 'checksums.txt' | ||
47 | snapshot: | ||
48 | name_template: "{{ .Tag }}-next" | ||
49 | changelog: | ||
50 | sort: asc | ||
51 | filters: | ||
52 | exclude: | ||
53 | - '^doc:' | ||
54 | - '^docs:' | ||
55 | - '^test:' | ||
56 | - '^tests:' | ||
57 | - '^Update\sREADME.md' | ||
58 | |||
59 | nfpms: | ||
60 | - | ||
61 | file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" | ||
62 | vendor: Klaus Post | ||
63 | homepage: https://github.com/klauspost/cpuid | ||
64 | maintainer: Klaus Post <[email protected]> | ||
65 | description: CPUID Tool | ||
66 | license: BSD 3-Clause | ||
67 | formats: | ||
68 | - deb | ||
69 | - rpm | ||
70 | replacements: | ||
71 | darwin: Darwin | ||
72 | linux: Linux | ||
73 | freebsd: FreeBSD | ||
74 | amd64: x86_64 | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt new file mode 100644 index 0000000..452d28e --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt | |||
@@ -0,0 +1,35 @@ | |||
1 | Developer Certificate of Origin | ||
2 | Version 1.1 | ||
3 | |||
4 | Copyright (C) 2015- Klaus Post & Contributors. | ||
5 | Email: [email protected] | ||
6 | |||
7 | Everyone is permitted to copy and distribute verbatim copies of this | ||
8 | license document, but changing it is not allowed. | ||
9 | |||
10 | |||
11 | Developer's Certificate of Origin 1.1 | ||
12 | |||
13 | By making a contribution to this project, I certify that: | ||
14 | |||
15 | (a) The contribution was created in whole or in part by me and I | ||
16 | have the right to submit it under the open source license | ||
17 | indicated in the file; or | ||
18 | |||
19 | (b) The contribution is based upon previous work that, to the best | ||
20 | of my knowledge, is covered under an appropriate open source | ||
21 | license and I have the right under that license to submit that | ||
22 | work with modifications, whether created in whole or in part | ||
23 | by me, under the same open source license (unless I am | ||
24 | permitted to submit under a different license), as indicated | ||
25 | in the file; or | ||
26 | |||
27 | (c) The contribution was provided directly to me by some other | ||
28 | person who certified (a), (b) or (c) and I have not modified | ||
29 | it. | ||
30 | |||
31 | (d) I understand and agree that this project and the contribution | ||
32 | are public and that a record of the contribution (including all | ||
33 | personal information I submit with it, including my sign-off) is | ||
34 | maintained indefinitely and may be redistributed consistent with | ||
35 | this project or the open source license(s) involved. | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/LICENSE b/vendor/github.com/klauspost/cpuid/v2/LICENSE new file mode 100644 index 0000000..5cec7ee --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/LICENSE | |||
@@ -0,0 +1,22 @@ | |||
1 | The MIT License (MIT) | ||
2 | |||
3 | Copyright (c) 2015 Klaus Post | ||
4 | |||
5 | Permission is hereby granted, free of charge, to any person obtaining a copy | ||
6 | of this software and associated documentation files (the "Software"), to deal | ||
7 | in the Software without restriction, including without limitation the rights | ||
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
9 | copies of the Software, and to permit persons to whom the Software is | ||
10 | furnished to do so, subject to the following conditions: | ||
11 | |||
12 | The above copyright notice and this permission notice shall be included in all | ||
13 | copies or substantial portions of the Software. | ||
14 | |||
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
21 | SOFTWARE. | ||
22 | |||
diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md new file mode 100644 index 0000000..30f8d29 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/README.md | |||
@@ -0,0 +1,497 @@ | |||
1 | # cpuid | ||
2 | Package cpuid provides information about the CPU running the current program. | ||
3 | |||
4 | CPU features are detected on startup, and kept for fast access through the life of the application. | ||
5 | Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use. | ||
6 | |||
7 | You can access the CPU information by accessing the shared CPU variable of the cpuid library. | ||
8 | |||
9 | Package home: https://github.com/klauspost/cpuid | ||
10 | |||
11 | [![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2) | ||
12 | [![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml) | ||
13 | |||
14 | ## installing | ||
15 | |||
16 | `go get -u github.com/klauspost/cpuid/v2` using modules. | ||
17 | Drop `v2` for others. | ||
18 | |||
19 | Installing binary: | ||
20 | |||
21 | `go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest` | ||
22 | |||
23 | Or download binaries from release page: https://github.com/klauspost/cpuid/releases | ||
24 | |||
25 | ### Homebrew | ||
26 | |||
27 | For macOS/Linux users, you can install via [brew](https://brew.sh/) | ||
28 | |||
29 | ```sh | ||
30 | $ brew install cpuid | ||
31 | ``` | ||
32 | |||
33 | ## example | ||
34 | |||
35 | ```Go | ||
36 | package main | ||
37 | |||
38 | import ( | ||
39 | "fmt" | ||
40 | "strings" | ||
41 | |||
42 | . "github.com/klauspost/cpuid/v2" | ||
43 | ) | ||
44 | |||
45 | func main() { | ||
46 | // Print basic CPU information: | ||
47 | fmt.Println("Name:", CPU.BrandName) | ||
48 | fmt.Println("PhysicalCores:", CPU.PhysicalCores) | ||
49 | fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore) | ||
50 | fmt.Println("LogicalCores:", CPU.LogicalCores) | ||
51 | fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID) | ||
52 | fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ",")) | ||
53 | fmt.Println("Cacheline bytes:", CPU.CacheLine) | ||
54 | fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes") | ||
55 | fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes") | ||
56 | fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes") | ||
57 | fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes") | ||
58 | fmt.Println("Frequency", CPU.Hz, "hz") | ||
59 | |||
60 | // Test if we have these specific features: | ||
61 | if CPU.Supports(SSE, SSE2) { | ||
62 | fmt.Println("We have Streaming SIMD 2 Extensions") | ||
63 | } | ||
64 | } | ||
65 | ``` | ||
66 | |||
67 | Sample output: | ||
68 | ``` | ||
69 | >go run main.go | ||
70 | Name: AMD Ryzen 9 3950X 16-Core Processor | ||
71 | PhysicalCores: 16 | ||
72 | ThreadsPerCore: 2 | ||
73 | LogicalCores: 32 | ||
74 | Family 23 Model: 113 Vendor ID: AMD | ||
75 | Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3 | ||
76 | Cacheline bytes: 64 | ||
77 | L1 Data Cache: 32768 bytes | ||
78 | L1 Instruction Cache: 32768 bytes | ||
79 | L2 Cache: 524288 bytes | ||
80 | L3 Cache: 16777216 bytes | ||
81 | Frequency 0 hz | ||
82 | We have Streaming SIMD 2 Extensions | ||
83 | ``` | ||
84 | |||
85 | # usage | ||
86 | |||
87 | The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features. | ||
88 | A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler. | ||
89 | |||
90 | To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc. | ||
91 | This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported. | ||
92 | |||
93 | Note that for some cpu/os combinations some features will not be detected. | ||
94 | `amd64` has rather good support and should work reliably on all platforms. | ||
95 | |||
96 | Note that hypervisors may not pass through all CPU features through to the guest OS, | ||
97 | so even if your host supports a feature it may not be visible on guests. | ||
98 | |||
99 | ## arm64 feature detection | ||
100 | |||
101 | Not all operating systems provide ARM features directly | ||
102 | and there is no safe way to do so for the rest. | ||
103 | |||
104 | Currently `arm64/linux` and `arm64/freebsd` should be quite reliable. | ||
105 | `arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected. | ||
106 | |||
107 | A `DetectARM()` can be used if you are able to control your deployment, | ||
108 | it will detect CPU features, but may crash if the OS doesn't intercept the calls. | ||
109 | A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below. | ||
110 | |||
111 | Note that currently only features are detected on ARM, | ||
112 | no additional information is currently available. | ||
113 | |||
114 | ## flags | ||
115 | |||
116 | It is possible to add flags that affects cpu detection. | ||
117 | |||
118 | For this the `Flags()` command is provided. | ||
119 | |||
120 | This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called. | ||
121 | |||
122 | This means that any detection used in `init()` functions will not contain these flags. | ||
123 | |||
124 | Example: | ||
125 | |||
126 | ```Go | ||
127 | package main | ||
128 | |||
129 | import ( | ||
130 | "flag" | ||
131 | "fmt" | ||
132 | "strings" | ||
133 | |||
134 | "github.com/klauspost/cpuid/v2" | ||
135 | ) | ||
136 | |||
137 | func main() { | ||
138 | cpuid.Flags() | ||
139 | flag.Parse() | ||
140 | cpuid.Detect() | ||
141 | |||
142 | // Test if we have these specific features: | ||
143 | if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) { | ||
144 | fmt.Println("We have Streaming SIMD 2 Extensions") | ||
145 | } | ||
146 | } | ||
147 | ``` | ||
148 | |||
149 | ## commandline | ||
150 | |||
151 | Download as binary from: https://github.com/klauspost/cpuid/releases | ||
152 | |||
153 | Install from source: | ||
154 | |||
155 | `go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest` | ||
156 | |||
157 | ### Example | ||
158 | |||
159 | ``` | ||
160 | λ cpuid | ||
161 | Name: AMD Ryzen 9 3950X 16-Core Processor | ||
162 | Vendor String: AuthenticAMD | ||
163 | Vendor ID: AMD | ||
164 | PhysicalCores: 16 | ||
165 | Threads Per Core: 2 | ||
166 | Logical Cores: 32 | ||
167 | CPU Family 23 Model: 113 | ||
168 | Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE | ||
169 | Microarchitecture level: 3 | ||
170 | Cacheline bytes: 64 | ||
171 | L1 Instruction Cache: 32768 bytes | ||
172 | L1 Data Cache: 32768 bytes | ||
173 | L2 Cache: 524288 bytes | ||
174 | L3 Cache: 16777216 bytes | ||
175 | |||
176 | ``` | ||
177 | ### JSON Output: | ||
178 | |||
179 | ``` | ||
180 | λ cpuid --json | ||
181 | { | ||
182 | "BrandName": "AMD Ryzen 9 3950X 16-Core Processor", | ||
183 | "VendorID": 2, | ||
184 | "VendorString": "AuthenticAMD", | ||
185 | "PhysicalCores": 16, | ||
186 | "ThreadsPerCore": 2, | ||
187 | "LogicalCores": 32, | ||
188 | "Family": 23, | ||
189 | "Model": 113, | ||
190 | "CacheLine": 64, | ||
191 | "Hz": 0, | ||
192 | "BoostFreq": 0, | ||
193 | "Cache": { | ||
194 | "L1I": 32768, | ||
195 | "L1D": 32768, | ||
196 | "L2": 524288, | ||
197 | "L3": 16777216 | ||
198 | }, | ||
199 | "SGX": { | ||
200 | "Available": false, | ||
201 | "LaunchControl": false, | ||
202 | "SGX1Supported": false, | ||
203 | "SGX2Supported": false, | ||
204 | "MaxEnclaveSizeNot64": 0, | ||
205 | "MaxEnclaveSize64": 0, | ||
206 | "EPCSections": null | ||
207 | }, | ||
208 | "Features": [ | ||
209 | "ADX", | ||
210 | "AESNI", | ||
211 | "AVX", | ||
212 | "AVX2", | ||
213 | "BMI1", | ||
214 | "BMI2", | ||
215 | "CLMUL", | ||
216 | "CLZERO", | ||
217 | "CMOV", | ||
218 | "CMPXCHG8", | ||
219 | "CPBOOST", | ||
220 | "CX16", | ||
221 | "F16C", | ||
222 | "FMA3", | ||
223 | "FXSR", | ||
224 | "FXSROPT", | ||
225 | "HTT", | ||
226 | "HYPERVISOR", | ||
227 | "LAHF", | ||
228 | "LZCNT", | ||
229 | "MCAOVERFLOW", | ||
230 | "MMX", | ||
231 | "MMXEXT", | ||
232 | "MOVBE", | ||
233 | "NX", | ||
234 | "OSXSAVE", | ||
235 | "POPCNT", | ||
236 | "RDRAND", | ||
237 | "RDSEED", | ||
238 | "RDTSCP", | ||
239 | "SCE", | ||
240 | "SHA", | ||
241 | "SSE", | ||
242 | "SSE2", | ||
243 | "SSE3", | ||
244 | "SSE4", | ||
245 | "SSE42", | ||
246 | "SSE4A", | ||
247 | "SSSE3", | ||
248 | "SUCCOR", | ||
249 | "X87", | ||
250 | "XSAVE" | ||
251 | ], | ||
252 | "X64Level": 3 | ||
253 | } | ||
254 | ``` | ||
255 | |||
256 | ### Check CPU microarch level | ||
257 | |||
258 | ``` | ||
259 | λ cpuid --check-level=3 | ||
260 | 2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor | ||
261 | 2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3. | ||
262 | Exit Code 0 | ||
263 | |||
264 | λ cpuid --check-level=4 | ||
265 | 2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor | ||
266 | 2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3. | ||
267 | Exit Code 1 | ||
268 | ``` | ||
269 | |||
270 | |||
271 | ## Available flags | ||
272 | |||
273 | ### x86 & amd64 | ||
274 | |||
275 | | Feature Flag | Description | | ||
276 | |--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ||
277 | | ADX | Intel ADX (Multi-Precision Add-Carry Instruction Extensions) | | ||
278 | | AESNI | Advanced Encryption Standard New Instructions | | ||
279 | | AMD3DNOW | AMD 3DNOW | | ||
280 | | AMD3DNOWEXT | AMD 3DNowExt | | ||
281 | | AMXBF16 | Tile computational operations on BFLOAT16 numbers | | ||
282 | | AMXINT8 | Tile computational operations on 8-bit integers | | ||
283 | | AMXFP16 | Tile computational operations on FP16 numbers | | ||
284 | | AMXTILE | Tile architecture | | ||
285 | | APX_F | Intel APX | | ||
286 | | AVX | AVX functions | | ||
287 | | AVX10 | If set the Intel AVX10 Converged Vector ISA is supported | | ||
288 | | AVX10_128 | If set indicates that AVX10 128-bit vector support is present | | ||
289 | | AVX10_256 | If set indicates that AVX10 256-bit vector support is present | | ||
290 | | AVX10_512 | If set indicates that AVX10 512-bit vector support is present | | ||
291 | | AVX2 | AVX2 functions | | ||
292 | | AVX512BF16 | AVX-512 BFLOAT16 Instructions | | ||
293 | | AVX512BITALG | AVX-512 Bit Algorithms | | ||
294 | | AVX512BW | AVX-512 Byte and Word Instructions | | ||
295 | | AVX512CD | AVX-512 Conflict Detection Instructions | | ||
296 | | AVX512DQ | AVX-512 Doubleword and Quadword Instructions | | ||
297 | | AVX512ER | AVX-512 Exponential and Reciprocal Instructions | | ||
298 | | AVX512F | AVX-512 Foundation | | ||
299 | | AVX512FP16 | AVX-512 FP16 Instructions | | ||
300 | | AVX512IFMA | AVX-512 Integer Fused Multiply-Add Instructions | | ||
301 | | AVX512PF | AVX-512 Prefetch Instructions | | ||
302 | | AVX512VBMI | AVX-512 Vector Bit Manipulation Instructions | | ||
303 | | AVX512VBMI2 | AVX-512 Vector Bit Manipulation Instructions, Version 2 | | ||
304 | | AVX512VL | AVX-512 Vector Length Extensions | | ||
305 | | AVX512VNNI | AVX-512 Vector Neural Network Instructions | | ||
306 | | AVX512VP2INTERSECT | AVX-512 Intersect for D/Q | | ||
307 | | AVX512VPOPCNTDQ | AVX-512 Vector Population Count Doubleword and Quadword | | ||
308 | | AVXIFMA | AVX-IFMA instructions | | ||
309 | | AVXNECONVERT | AVX-NE-CONVERT instructions | | ||
310 | | AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one | | ||
311 | | AVXVNNI | AVX (VEX encoded) VNNI neural network instructions | | ||
312 | | AVXVNNIINT8 | AVX-VNNI-INT8 instructions | | ||
313 | | BHI_CTRL | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 | | ||
314 | | BMI1 | Bit Manipulation Instruction Set 1 | | ||
315 | | BMI2 | Bit Manipulation Instruction Set 2 | | ||
316 | | CETIBT | Intel CET Indirect Branch Tracking | | ||
317 | | CETSS | Intel CET Shadow Stack | | ||
318 | | CLDEMOTE | Cache Line Demote | | ||
319 | | CLMUL | Carry-less Multiplication | | ||
320 | | CLZERO | CLZERO instruction supported | | ||
321 | | CMOV | i686 CMOV | | ||
322 | | CMPCCXADD | CMPCCXADD instructions | | ||
323 | | CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB | | ||
324 | | CMPXCHG8 | CMPXCHG8 instruction | | ||
325 | | CPBOOST | Core Performance Boost | | ||
326 | | CPPC | AMD: Collaborative Processor Performance Control | | ||
327 | | CX16 | CMPXCHG16B Instruction | | ||
328 | | EFER_LMSLE_UNS | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ | | ||
329 | | ENQCMD | Enqueue Command | | ||
330 | | ERMS | Enhanced REP MOVSB/STOSB | | ||
331 | | F16C | Half-precision floating-point conversion | | ||
332 | | FLUSH_L1D | Flush L1D cache | | ||
333 | | FMA3 | Intel FMA 3. Does not imply AVX. | | ||
334 | | FMA4 | Bulldozer FMA4 functions | | ||
335 | | FP128 | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide | | ||
336 | | FP256 | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide | | ||
337 | | FSRM | Fast Short Rep Mov | | ||
338 | | FXSR | FXSAVE, FXRESTOR instructions, CR4 bit 9 | | ||
339 | | FXSROPT | FXSAVE/FXRSTOR optimizations | | ||
340 | | GFNI | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. | | ||
341 | | HLE | Hardware Lock Elision | | ||
342 | | HRESET | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR | | ||
343 | | HTT | Hyperthreading (enabled) | | ||
344 | | HWA | Hardware assert supported. Indicates support for MSRC001_10 | | ||
345 | | HYBRID_CPU | This part has CPUs of more than one type. | | ||
346 | | HYPERVISOR | This bit has been reserved by Intel & AMD for use by hypervisors | | ||
347 | | IA32_ARCH_CAP | IA32_ARCH_CAPABILITIES MSR (Intel) | | ||
348 | | IA32_CORE_CAP | IA32_CORE_CAPABILITIES MSR | | ||
349 | | IBPB | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) | | ||
350 | | IBRS | AMD: Indirect Branch Restricted Speculation | | ||
351 | | IBRS_PREFERRED | AMD: IBRS is preferred over software solution | | ||
352 | | IBRS_PROVIDES_SMP | AMD: IBRS provides Same Mode Protection | | ||
353 | | IBS | Instruction Based Sampling (AMD) | | ||
354 | | IBSBRNTRGT | Instruction Based Sampling Feature (AMD) | | ||
355 | | IBSFETCHSAM | Instruction Based Sampling Feature (AMD) | | ||
356 | | IBSFFV | Instruction Based Sampling Feature (AMD) | | ||
357 | | IBSOPCNT | Instruction Based Sampling Feature (AMD) | | ||
358 | | IBSOPCNTEXT | Instruction Based Sampling Feature (AMD) | | ||
359 | | IBSOPSAM | Instruction Based Sampling Feature (AMD) | | ||
360 | | IBSRDWROPCNT | Instruction Based Sampling Feature (AMD) | | ||
361 | | IBSRIPINVALIDCHK | Instruction Based Sampling Feature (AMD) | | ||
362 | | IBS_FETCH_CTLX | AMD: IBS fetch control extended MSR supported | | ||
363 | | IBS_OPDATA4 | AMD: IBS op data 4 MSR supported | | ||
364 | | IBS_OPFUSE | AMD: Indicates support for IbsOpFuse | | ||
365 | | IBS_PREVENTHOST | Disallowing IBS use by the host supported | | ||
366 | | IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 | | ||
367 | | IDPRED_CTRL | IPRED_DIS | | ||
368 | | INT_WBINVD | WBINVD/WBNOINVD are interruptible. | | ||
369 | | INVLPGB | NVLPGB and TLBSYNC instruction supported | | ||
370 | | KEYLOCKER | Key locker | | ||
371 | | KEYLOCKERW | Key locker wide | | ||
372 | | LAHF | LAHF/SAHF in long mode | | ||
373 | | LAM | If set, CPU supports Linear Address Masking | | ||
374 | | LBRVIRT | LBR virtualization | | ||
375 | | LZCNT | LZCNT instruction | | ||
376 | | MCAOVERFLOW | MCA overflow recovery support. | | ||
377 | | MCDT_NO | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. | | ||
378 | | MCOMMIT | MCOMMIT instruction supported | | ||
379 | | MD_CLEAR | VERW clears CPU buffers | | ||
380 | | MMX | standard MMX | | ||
381 | | MMXEXT | SSE integer functions or AMD MMX ext | | ||
382 | | MOVBE | MOVBE instruction (big-endian) | | ||
383 | | MOVDIR64B | Move 64 Bytes as Direct Store | | ||
384 | | MOVDIRI | Move Doubleword as Direct Store | | ||
385 | | MOVSB_ZL | Fast Zero-Length MOVSB | | ||
386 | | MPX | Intel MPX (Memory Protection Extensions) | | ||
387 | | MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | | ||
388 | | MSRIRC | Instruction Retired Counter MSR available | | ||
389 | | MSRLIST | Read/Write List of Model Specific Registers | | ||
390 | | MSR_PAGEFLUSH | Page Flush MSR available | | ||
391 | | NRIPS | Indicates support for NRIP save on VMEXIT | | ||
392 | | NX | NX (No-Execute) bit | | ||
393 | | OSXSAVE | XSAVE enabled by OS | | ||
394 | | PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption | | ||
395 | | POPCNT | POPCNT instruction | | ||
396 | | PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled | | ||
397 | | PREFETCHI | PREFETCHIT0/1 instructions | | ||
398 | | PSFD | Predictive Store Forward Disable | | ||
399 | | RDPRU | RDPRU instruction supported | | ||
400 | | RDRAND | RDRAND instruction is available | | ||
401 | | RDSEED | RDSEED instruction is available | | ||
402 | | RDTSCP | RDTSCP Instruction | | ||
403 | | RRSBA_CTRL | Restricted RSB Alternate | | ||
404 | | RTM | Restricted Transactional Memory | | ||
405 | | RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. | | ||
406 | | SERIALIZE | Serialize Instruction Execution | | ||
407 | | SEV | AMD Secure Encrypted Virtualization supported | | ||
408 | | SEV_64BIT | AMD SEV guest execution only allowed from a 64-bit host | | ||
409 | | SEV_ALTERNATIVE | AMD SEV Alternate Injection supported | | ||
410 | | SEV_DEBUGSWAP | Full debug state swap supported for SEV-ES guests | | ||
411 | | SEV_ES | AMD SEV Encrypted State supported | | ||
412 | | SEV_RESTRICTED | AMD SEV Restricted Injection supported | | ||
413 | | SEV_SNP | AMD SEV Secure Nested Paging supported | | ||
414 | | SGX | Software Guard Extensions | | ||
415 | | SGXLC | Software Guard Extensions Launch Control | | ||
416 | | SHA | Intel SHA Extensions | | ||
417 | | SME | AMD Secure Memory Encryption supported | | ||
418 | | SME_COHERENT | AMD Hardware cache coherency across encryption domains enforced | | ||
419 | | SPEC_CTRL_SSBD | Speculative Store Bypass Disable | | ||
420 | | SRBDS_CTRL | SRBDS mitigation MSR available | | ||
421 | | SSE | SSE functions | | ||
422 | | SSE2 | P4 SSE functions | | ||
423 | | SSE3 | Prescott SSE3 functions | | ||
424 | | SSE4 | Penryn SSE4.1 functions | | ||
425 | | SSE42 | Nehalem SSE4.2 functions | | ||
426 | | SSE4A | AMD Barcelona microarchitecture SSE4a instructions | | ||
427 | | SSSE3 | Conroe SSSE3 functions | | ||
428 | | STIBP | Single Thread Indirect Branch Predictors | | ||
429 | | STIBP_ALWAYSON | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On | | ||
430 | | STOSB_SHORT | Fast short STOSB | | ||
431 | | SUCCOR | Software uncorrectable error containment and recovery capability. | | ||
432 | | SVM | AMD Secure Virtual Machine | | ||
433 | | SVMDA | Indicates support for the SVM decode assists. | | ||
434 | | SVMFBASID | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control | | ||
435 | | SVML | AMD SVM lock. Indicates support for SVM-Lock. | | ||
436 | | SVMNP | AMD SVM nested paging | | ||
437 | | SVMPF | SVM pause intercept filter. Indicates support for the pause intercept filter | | ||
438 | | SVMPFT | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold | | ||
439 | | SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. | | ||
440 | | SYSEE | SYSENTER and SYSEXIT instructions | | ||
441 | | TBM | AMD Trailing Bit Manipulation | | ||
442 | | TDX_GUEST | Intel Trust Domain Extensions Guest | | ||
443 | | TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations | | ||
444 | | TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. | | ||
445 | | TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. | | ||
446 | | TSCRATEMSR | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 | | ||
447 | | TSXLDTRK | Intel TSX Suspend Load Address Tracking | | ||
448 | | VAES | Vector AES. AVX(512) versions requires additional checks. | | ||
449 | | VMCBCLEAN | VMCB clean bits. Indicates support for VMCB clean bits. | | ||
450 | | VMPL | AMD VM Permission Levels supported | | ||
451 | | VMSA_REGPROT | AMD VMSA Register Protection supported | | ||
452 | | VMX | Virtual Machine Extensions | | ||
453 | | VPCLMULQDQ | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. | | ||
454 | | VTE | AMD Virtual Transparent Encryption supported | | ||
455 | | WAITPKG | TPAUSE, UMONITOR, UMWAIT | | ||
456 | | WBNOINVD | Write Back and Do Not Invalidate Cache | | ||
457 | | WRMSRNS | Non-Serializing Write to Model Specific Register | | ||
458 | | X87 | FPU | | ||
459 | | XGETBV1 | Supports XGETBV with ECX = 1 | | ||
460 | | XOP | Bulldozer XOP functions | | ||
461 | | XSAVE | XSAVE, XRESTOR, XSETBV, XGETBV | | ||
462 | | XSAVEC | Supports XSAVEC and the compacted form of XRSTOR. | | ||
463 | | XSAVEOPT | XSAVEOPT available | | ||
464 | | XSAVES | Supports XSAVES/XRSTORS and IA32_XSS | | ||
465 | |||
466 | # ARM features: | ||
467 | |||
468 | | Feature Flag | Description | | ||
469 | |--------------|------------------------------------------------------------------| | ||
470 | | AESARM | AES instructions | | ||
471 | | ARMCPUID | Some CPU ID registers readable at user-level | | ||
472 | | ASIMD | Advanced SIMD | | ||
473 | | ASIMDDP | SIMD Dot Product | | ||
474 | | ASIMDHP | Advanced SIMD half-precision floating point | | ||
475 | | ASIMDRDM | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) | | ||
476 | | ATOMICS | Large System Extensions (LSE) | | ||
477 | | CRC32 | CRC32/CRC32C instructions | | ||
478 | | DCPOP | Data cache clean to Point of Persistence (DC CVAP) | | ||
479 | | EVTSTRM | Generic timer | | ||
480 | | FCMA | Floatin point complex number addition and multiplication | | ||
481 | | FP | Single-precision and double-precision floating point | | ||
482 | | FPHP | Half-precision floating point | | ||
483 | | GPA | Generic Pointer Authentication | | ||
484 | | JSCVT | Javascript-style double->int convert (FJCVTZS) | | ||
485 | | LRCPC | Weaker release consistency (LDAPR, etc) | | ||
486 | | PMULL | Polynomial Multiply instructions (PMULL/PMULL2) | | ||
487 | | SHA1 | SHA-1 instructions (SHA1C, etc) | | ||
488 | | SHA2 | SHA-2 instructions (SHA256H, etc) | | ||
489 | | SHA3 | SHA-3 instructions (EOR3, RAXI, XAR, BCAX) | | ||
490 | | SHA512 | SHA512 instructions | | ||
491 | | SM3 | SM3 instructions | | ||
492 | | SM4 | SM4 instructions | | ||
493 | | SVE | Scalable Vector Extension | | ||
494 | |||
495 | # license | ||
496 | |||
497 | This code is published under an MIT license. See LICENSE file for more information. | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go new file mode 100644 index 0000000..15b7603 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go | |||
@@ -0,0 +1,1473 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | // Package cpuid provides information about the CPU running the current program. | ||
4 | // | ||
5 | // CPU features are detected on startup, and kept for fast access through the life of the application. | ||
6 | // Currently x86 / x64 (AMD64) as well as arm64 is supported. | ||
7 | // | ||
8 | // You can access the CPU information by accessing the shared CPU variable of the cpuid library. | ||
9 | // | ||
10 | // Package home: https://github.com/klauspost/cpuid | ||
11 | package cpuid | ||
12 | |||
13 | import ( | ||
14 | "flag" | ||
15 | "fmt" | ||
16 | "math" | ||
17 | "math/bits" | ||
18 | "os" | ||
19 | "runtime" | ||
20 | "strings" | ||
21 | ) | ||
22 | |||
23 | // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf | ||
24 | // and Processor Programming Reference (PPR) | ||
25 | |||
26 | // Vendor is a representation of a CPU vendor. | ||
27 | type Vendor int | ||
28 | |||
29 | const ( | ||
30 | VendorUnknown Vendor = iota | ||
31 | Intel | ||
32 | AMD | ||
33 | VIA | ||
34 | Transmeta | ||
35 | NSC | ||
36 | KVM // Kernel-based Virtual Machine | ||
37 | MSVM // Microsoft Hyper-V or Windows Virtual PC | ||
38 | VMware | ||
39 | XenHVM | ||
40 | Bhyve | ||
41 | Hygon | ||
42 | SiS | ||
43 | RDC | ||
44 | |||
45 | Ampere | ||
46 | ARM | ||
47 | Broadcom | ||
48 | Cavium | ||
49 | DEC | ||
50 | Fujitsu | ||
51 | Infineon | ||
52 | Motorola | ||
53 | NVIDIA | ||
54 | AMCC | ||
55 | Qualcomm | ||
56 | Marvell | ||
57 | |||
58 | lastVendor | ||
59 | ) | ||
60 | |||
61 | //go:generate stringer -type=FeatureID,Vendor | ||
62 | |||
63 | // FeatureID is the ID of a specific cpu feature. | ||
64 | type FeatureID int | ||
65 | |||
66 | const ( | ||
67 | // Keep index -1 as unknown | ||
68 | UNKNOWN = -1 | ||
69 | |||
70 | // Add features | ||
71 | ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) | ||
72 | AESNI // Advanced Encryption Standard New Instructions | ||
73 | AMD3DNOW // AMD 3DNOW | ||
74 | AMD3DNOWEXT // AMD 3DNowExt | ||
75 | AMXBF16 // Tile computational operations on BFLOAT16 numbers | ||
76 | AMXFP16 // Tile computational operations on FP16 numbers | ||
77 | AMXINT8 // Tile computational operations on 8-bit integers | ||
78 | AMXTILE // Tile architecture | ||
79 | APX_F // Intel APX | ||
80 | AVX // AVX functions | ||
81 | AVX10 // If set the Intel AVX10 Converged Vector ISA is supported | ||
82 | AVX10_128 // If set indicates that AVX10 128-bit vector support is present | ||
83 | AVX10_256 // If set indicates that AVX10 256-bit vector support is present | ||
84 | AVX10_512 // If set indicates that AVX10 512-bit vector support is present | ||
85 | AVX2 // AVX2 functions | ||
86 | AVX512BF16 // AVX-512 BFLOAT16 Instructions | ||
87 | AVX512BITALG // AVX-512 Bit Algorithms | ||
88 | AVX512BW // AVX-512 Byte and Word Instructions | ||
89 | AVX512CD // AVX-512 Conflict Detection Instructions | ||
90 | AVX512DQ // AVX-512 Doubleword and Quadword Instructions | ||
91 | AVX512ER // AVX-512 Exponential and Reciprocal Instructions | ||
92 | AVX512F // AVX-512 Foundation | ||
93 | AVX512FP16 // AVX-512 FP16 Instructions | ||
94 | AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions | ||
95 | AVX512PF // AVX-512 Prefetch Instructions | ||
96 | AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions | ||
97 | AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 | ||
98 | AVX512VL // AVX-512 Vector Length Extensions | ||
99 | AVX512VNNI // AVX-512 Vector Neural Network Instructions | ||
100 | AVX512VP2INTERSECT // AVX-512 Intersect for D/Q | ||
101 | AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword | ||
102 | AVXIFMA // AVX-IFMA instructions | ||
103 | AVXNECONVERT // AVX-NE-CONVERT instructions | ||
104 | AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one | ||
105 | AVXVNNI // AVX (VEX encoded) VNNI neural network instructions | ||
106 | AVXVNNIINT8 // AVX-VNNI-INT8 instructions | ||
107 | BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 | ||
108 | BMI1 // Bit Manipulation Instruction Set 1 | ||
109 | BMI2 // Bit Manipulation Instruction Set 2 | ||
110 | CETIBT // Intel CET Indirect Branch Tracking | ||
111 | CETSS // Intel CET Shadow Stack | ||
112 | CLDEMOTE // Cache Line Demote | ||
113 | CLMUL // Carry-less Multiplication | ||
114 | CLZERO // CLZERO instruction supported | ||
115 | CMOV // i686 CMOV | ||
116 | CMPCCXADD // CMPCCXADD instructions | ||
117 | CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB | ||
118 | CMPXCHG8 // CMPXCHG8 instruction | ||
119 | CPBOOST // Core Performance Boost | ||
120 | CPPC // AMD: Collaborative Processor Performance Control | ||
121 | CX16 // CMPXCHG16B Instruction | ||
122 | EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ | ||
123 | ENQCMD // Enqueue Command | ||
124 | ERMS // Enhanced REP MOVSB/STOSB | ||
125 | F16C // Half-precision floating-point conversion | ||
126 | FLUSH_L1D // Flush L1D cache | ||
127 | FMA3 // Intel FMA 3. Does not imply AVX. | ||
128 | FMA4 // Bulldozer FMA4 functions | ||
129 | FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide | ||
130 | FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide | ||
131 | FSRM // Fast Short Rep Mov | ||
132 | FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 | ||
133 | FXSROPT // FXSAVE/FXRSTOR optimizations | ||
134 | GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. | ||
135 | HLE // Hardware Lock Elision | ||
136 | HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR | ||
137 | HTT // Hyperthreading (enabled) | ||
138 | HWA // Hardware assert supported. Indicates support for MSRC001_10 | ||
139 | HYBRID_CPU // This part has CPUs of more than one type. | ||
140 | HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors | ||
141 | IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) | ||
142 | IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR | ||
143 | IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) | ||
144 | IBRS // AMD: Indirect Branch Restricted Speculation | ||
145 | IBRS_PREFERRED // AMD: IBRS is preferred over software solution | ||
146 | IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection | ||
147 | IBS // Instruction Based Sampling (AMD) | ||
148 | IBSBRNTRGT // Instruction Based Sampling Feature (AMD) | ||
149 | IBSFETCHSAM // Instruction Based Sampling Feature (AMD) | ||
150 | IBSFFV // Instruction Based Sampling Feature (AMD) | ||
151 | IBSOPCNT // Instruction Based Sampling Feature (AMD) | ||
152 | IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) | ||
153 | IBSOPSAM // Instruction Based Sampling Feature (AMD) | ||
154 | IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) | ||
155 | IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) | ||
156 | IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported | ||
157 | IBS_OPDATA4 // AMD: IBS op data 4 MSR supported | ||
158 | IBS_OPFUSE // AMD: Indicates support for IbsOpFuse | ||
159 | IBS_PREVENTHOST // Disallowing IBS use by the host supported | ||
160 | IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 | ||
161 | IDPRED_CTRL // IPRED_DIS | ||
162 | INT_WBINVD // WBINVD/WBNOINVD are interruptible. | ||
163 | INVLPGB // NVLPGB and TLBSYNC instruction supported | ||
164 | KEYLOCKER // Key locker | ||
165 | KEYLOCKERW // Key locker wide | ||
166 | LAHF // LAHF/SAHF in long mode | ||
167 | LAM // If set, CPU supports Linear Address Masking | ||
168 | LBRVIRT // LBR virtualization | ||
169 | LZCNT // LZCNT instruction | ||
170 | MCAOVERFLOW // MCA overflow recovery support. | ||
171 | MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. | ||
172 | MCOMMIT // MCOMMIT instruction supported | ||
173 | MD_CLEAR // VERW clears CPU buffers | ||
174 | MMX // standard MMX | ||
175 | MMXEXT // SSE integer functions or AMD MMX ext | ||
176 | MOVBE // MOVBE instruction (big-endian) | ||
177 | MOVDIR64B // Move 64 Bytes as Direct Store | ||
178 | MOVDIRI // Move Doubleword as Direct Store | ||
179 | MOVSB_ZL // Fast Zero-Length MOVSB | ||
180 | MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | ||
181 | MPX // Intel MPX (Memory Protection Extensions) | ||
182 | MSRIRC // Instruction Retired Counter MSR available | ||
183 | MSRLIST // Read/Write List of Model Specific Registers | ||
184 | MSR_PAGEFLUSH // Page Flush MSR available | ||
185 | NRIPS // Indicates support for NRIP save on VMEXIT | ||
186 | NX // NX (No-Execute) bit | ||
187 | OSXSAVE // XSAVE enabled by OS | ||
188 | PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption | ||
189 | POPCNT // POPCNT instruction | ||
190 | PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled | ||
191 | PREFETCHI // PREFETCHIT0/1 instructions | ||
192 | PSFD // Predictive Store Forward Disable | ||
193 | RDPRU // RDPRU instruction supported | ||
194 | RDRAND // RDRAND instruction is available | ||
195 | RDSEED // RDSEED instruction is available | ||
196 | RDTSCP // RDTSCP Instruction | ||
197 | RRSBA_CTRL // Restricted RSB Alternate | ||
198 | RTM // Restricted Transactional Memory | ||
199 | RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. | ||
200 | SERIALIZE // Serialize Instruction Execution | ||
201 | SEV // AMD Secure Encrypted Virtualization supported | ||
202 | SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host | ||
203 | SEV_ALTERNATIVE // AMD SEV Alternate Injection supported | ||
204 | SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests | ||
205 | SEV_ES // AMD SEV Encrypted State supported | ||
206 | SEV_RESTRICTED // AMD SEV Restricted Injection supported | ||
207 | SEV_SNP // AMD SEV Secure Nested Paging supported | ||
208 | SGX // Software Guard Extensions | ||
209 | SGXLC // Software Guard Extensions Launch Control | ||
210 | SHA // Intel SHA Extensions | ||
211 | SME // AMD Secure Memory Encryption supported | ||
212 | SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced | ||
213 | SPEC_CTRL_SSBD // Speculative Store Bypass Disable | ||
214 | SRBDS_CTRL // SRBDS mitigation MSR available | ||
215 | SSE // SSE functions | ||
216 | SSE2 // P4 SSE functions | ||
217 | SSE3 // Prescott SSE3 functions | ||
218 | SSE4 // Penryn SSE4.1 functions | ||
219 | SSE42 // Nehalem SSE4.2 functions | ||
220 | SSE4A // AMD Barcelona microarchitecture SSE4a instructions | ||
221 | SSSE3 // Conroe SSSE3 functions | ||
222 | STIBP // Single Thread Indirect Branch Predictors | ||
223 | STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On | ||
224 | STOSB_SHORT // Fast short STOSB | ||
225 | SUCCOR // Software uncorrectable error containment and recovery capability. | ||
226 | SVM // AMD Secure Virtual Machine | ||
227 | SVMDA // Indicates support for the SVM decode assists. | ||
228 | SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control | ||
229 | SVML // AMD SVM lock. Indicates support for SVM-Lock. | ||
230 | SVMNP // AMD SVM nested paging | ||
231 | SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter | ||
232 | SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold | ||
233 | SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. | ||
234 | SYSEE // SYSENTER and SYSEXIT instructions | ||
235 | TBM // AMD Trailing Bit Manipulation | ||
236 | TDX_GUEST // Intel Trust Domain Extensions Guest | ||
237 | TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations | ||
238 | TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. | ||
239 | TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. | ||
240 | TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 | ||
241 | TSXLDTRK // Intel TSX Suspend Load Address Tracking | ||
242 | VAES // Vector AES. AVX(512) versions requires additional checks. | ||
243 | VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. | ||
244 | VMPL // AMD VM Permission Levels supported | ||
245 | VMSA_REGPROT // AMD VMSA Register Protection supported | ||
246 | VMX // Virtual Machine Extensions | ||
247 | VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. | ||
248 | VTE // AMD Virtual Transparent Encryption supported | ||
249 | WAITPKG // TPAUSE, UMONITOR, UMWAIT | ||
250 | WBNOINVD // Write Back and Do Not Invalidate Cache | ||
251 | WRMSRNS // Non-Serializing Write to Model Specific Register | ||
252 | X87 // FPU | ||
253 | XGETBV1 // Supports XGETBV with ECX = 1 | ||
254 | XOP // Bulldozer XOP functions | ||
255 | XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV | ||
256 | XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. | ||
257 | XSAVEOPT // XSAVEOPT available | ||
258 | XSAVES // Supports XSAVES/XRSTORS and IA32_XSS | ||
259 | |||
260 | // ARM features: | ||
261 | AESARM // AES instructions | ||
262 | ARMCPUID // Some CPU ID registers readable at user-level | ||
263 | ASIMD // Advanced SIMD | ||
264 | ASIMDDP // SIMD Dot Product | ||
265 | ASIMDHP // Advanced SIMD half-precision floating point | ||
266 | ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) | ||
267 | ATOMICS // Large System Extensions (LSE) | ||
268 | CRC32 // CRC32/CRC32C instructions | ||
269 | DCPOP // Data cache clean to Point of Persistence (DC CVAP) | ||
270 | EVTSTRM // Generic timer | ||
271 | FCMA // Floatin point complex number addition and multiplication | ||
272 | FP // Single-precision and double-precision floating point | ||
273 | FPHP // Half-precision floating point | ||
274 | GPA // Generic Pointer Authentication | ||
275 | JSCVT // Javascript-style double->int convert (FJCVTZS) | ||
276 | LRCPC // Weaker release consistency (LDAPR, etc) | ||
277 | PMULL // Polynomial Multiply instructions (PMULL/PMULL2) | ||
278 | SHA1 // SHA-1 instructions (SHA1C, etc) | ||
279 | SHA2 // SHA-2 instructions (SHA256H, etc) | ||
280 | SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) | ||
281 | SHA512 // SHA512 instructions | ||
282 | SM3 // SM3 instructions | ||
283 | SM4 // SM4 instructions | ||
284 | SVE // Scalable Vector Extension | ||
285 | // Keep it last. It automatically defines the size of []flagSet | ||
286 | lastID | ||
287 | |||
288 | firstID FeatureID = UNKNOWN + 1 | ||
289 | ) | ||
290 | |||
291 | // CPUInfo contains information about the detected system CPU. | ||
292 | type CPUInfo struct { | ||
293 | BrandName string // Brand name reported by the CPU | ||
294 | VendorID Vendor // Comparable CPU vendor ID | ||
295 | VendorString string // Raw vendor string. | ||
296 | featureSet flagSet // Features of the CPU | ||
297 | PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. | ||
298 | ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. | ||
299 | LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. | ||
300 | Family int // CPU family number | ||
301 | Model int // CPU model number | ||
302 | Stepping int // CPU stepping info | ||
303 | CacheLine int // Cache line size in bytes. Will be 0 if undetectable. | ||
304 | Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. | ||
305 | BoostFreq int64 // Max clock speed, if known, 0 otherwise | ||
306 | Cache struct { | ||
307 | L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected | ||
308 | L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected | ||
309 | L2 int // L2 Cache (per core or shared). Will be -1 if undetected | ||
310 | L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected | ||
311 | } | ||
312 | SGX SGXSupport | ||
313 | AVX10Level uint8 | ||
314 | maxFunc uint32 | ||
315 | maxExFunc uint32 | ||
316 | } | ||
317 | |||
318 | var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) | ||
319 | var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) | ||
320 | var xgetbv func(index uint32) (eax, edx uint32) | ||
321 | var rdtscpAsm func() (eax, ebx, ecx, edx uint32) | ||
322 | var darwinHasAVX512 = func() bool { return false } | ||
323 | |||
324 | // CPU contains information about the CPU as detected on startup, | ||
325 | // or when Detect last was called. | ||
326 | // | ||
327 | // Use this as the primary entry point to you data. | ||
328 | var CPU CPUInfo | ||
329 | |||
330 | func init() { | ||
331 | initCPU() | ||
332 | Detect() | ||
333 | } | ||
334 | |||
335 | // Detect will re-detect current CPU info. | ||
336 | // This will replace the content of the exported CPU variable. | ||
337 | // | ||
338 | // Unless you expect the CPU to change while you are running your program | ||
339 | // you should not need to call this function. | ||
340 | // If you call this, you must ensure that no other goroutine is accessing the | ||
341 | // exported CPU variable. | ||
342 | func Detect() { | ||
343 | // Set defaults | ||
344 | CPU.ThreadsPerCore = 1 | ||
345 | CPU.Cache.L1I = -1 | ||
346 | CPU.Cache.L1D = -1 | ||
347 | CPU.Cache.L2 = -1 | ||
348 | CPU.Cache.L3 = -1 | ||
349 | safe := true | ||
350 | if detectArmFlag != nil { | ||
351 | safe = !*detectArmFlag | ||
352 | } | ||
353 | addInfo(&CPU, safe) | ||
354 | if displayFeats != nil && *displayFeats { | ||
355 | fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) | ||
356 | // Exit with non-zero so tests will print value. | ||
357 | os.Exit(1) | ||
358 | } | ||
359 | if disableFlag != nil { | ||
360 | s := strings.Split(*disableFlag, ",") | ||
361 | for _, feat := range s { | ||
362 | feat := ParseFeature(strings.TrimSpace(feat)) | ||
363 | if feat != UNKNOWN { | ||
364 | CPU.featureSet.unset(feat) | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | |||
370 | // DetectARM will detect ARM64 features. | ||
371 | // This is NOT done automatically since it can potentially crash | ||
372 | // if the OS does not handle the command. | ||
373 | // If in the future this can be done safely this function may not | ||
374 | // do anything. | ||
375 | func DetectARM() { | ||
376 | addInfo(&CPU, false) | ||
377 | } | ||
378 | |||
379 | var detectArmFlag *bool | ||
380 | var displayFeats *bool | ||
381 | var disableFlag *string | ||
382 | |||
383 | // Flags will enable flags. | ||
384 | // This must be called *before* flag.Parse AND | ||
385 | // Detect must be called after the flags have been parsed. | ||
386 | // Note that this means that any detection used in init() functions | ||
387 | // will not contain these flags. | ||
388 | func Flags() { | ||
389 | disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") | ||
390 | displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") | ||
391 | detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") | ||
392 | } | ||
393 | |||
394 | // Supports returns whether the CPU supports all of the requested features. | ||
395 | func (c CPUInfo) Supports(ids ...FeatureID) bool { | ||
396 | for _, id := range ids { | ||
397 | if !c.featureSet.inSet(id) { | ||
398 | return false | ||
399 | } | ||
400 | } | ||
401 | return true | ||
402 | } | ||
403 | |||
404 | // Has allows for checking a single feature. | ||
405 | // Should be inlined by the compiler. | ||
406 | func (c *CPUInfo) Has(id FeatureID) bool { | ||
407 | return c.featureSet.inSet(id) | ||
408 | } | ||
409 | |||
410 | // AnyOf returns whether the CPU supports one or more of the requested features. | ||
411 | func (c CPUInfo) AnyOf(ids ...FeatureID) bool { | ||
412 | for _, id := range ids { | ||
413 | if c.featureSet.inSet(id) { | ||
414 | return true | ||
415 | } | ||
416 | } | ||
417 | return false | ||
418 | } | ||
419 | |||
420 | // Features contains several features combined for a fast check using | ||
421 | // CpuInfo.HasAll | ||
422 | type Features *flagSet | ||
423 | |||
424 | // CombineFeatures allows to combine several features for a close to constant time lookup. | ||
425 | func CombineFeatures(ids ...FeatureID) Features { | ||
426 | var v flagSet | ||
427 | for _, id := range ids { | ||
428 | v.set(id) | ||
429 | } | ||
430 | return &v | ||
431 | } | ||
432 | |||
433 | func (c *CPUInfo) HasAll(f Features) bool { | ||
434 | return c.featureSet.hasSetP(f) | ||
435 | } | ||
436 | |||
437 | // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels | ||
438 | var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) | ||
439 | var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) | ||
440 | var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) | ||
441 | var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) | ||
442 | var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) | ||
443 | |||
444 | // X64Level returns the microarchitecture level detected on the CPU. | ||
445 | // If features are lacking or non x64 mode, 0 is returned. | ||
446 | // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels | ||
447 | func (c CPUInfo) X64Level() int { | ||
448 | if !c.featureSet.hasOneOf(oneOfLevel) { | ||
449 | return 0 | ||
450 | } | ||
451 | if c.featureSet.hasSetP(level4Features) { | ||
452 | return 4 | ||
453 | } | ||
454 | if c.featureSet.hasSetP(level3Features) { | ||
455 | return 3 | ||
456 | } | ||
457 | if c.featureSet.hasSetP(level2Features) { | ||
458 | return 2 | ||
459 | } | ||
460 | if c.featureSet.hasSetP(level1Features) { | ||
461 | return 1 | ||
462 | } | ||
463 | return 0 | ||
464 | } | ||
465 | |||
466 | // Disable will disable one or several features. | ||
467 | func (c *CPUInfo) Disable(ids ...FeatureID) bool { | ||
468 | for _, id := range ids { | ||
469 | c.featureSet.unset(id) | ||
470 | } | ||
471 | return true | ||
472 | } | ||
473 | |||
474 | // Enable will disable one or several features even if they were undetected. | ||
475 | // This is of course not recommended for obvious reasons. | ||
476 | func (c *CPUInfo) Enable(ids ...FeatureID) bool { | ||
477 | for _, id := range ids { | ||
478 | c.featureSet.set(id) | ||
479 | } | ||
480 | return true | ||
481 | } | ||
482 | |||
483 | // IsVendor returns true if vendor is recognized as Intel | ||
484 | func (c CPUInfo) IsVendor(v Vendor) bool { | ||
485 | return c.VendorID == v | ||
486 | } | ||
487 | |||
488 | // FeatureSet returns all available features as strings. | ||
489 | func (c CPUInfo) FeatureSet() []string { | ||
490 | s := make([]string, 0, c.featureSet.nEnabled()) | ||
491 | s = append(s, c.featureSet.Strings()...) | ||
492 | return s | ||
493 | } | ||
494 | |||
495 | // RTCounter returns the 64-bit time-stamp counter | ||
496 | // Uses the RDTSCP instruction. The value 0 is returned | ||
497 | // if the CPU does not support the instruction. | ||
498 | func (c CPUInfo) RTCounter() uint64 { | ||
499 | if !c.Supports(RDTSCP) { | ||
500 | return 0 | ||
501 | } | ||
502 | a, _, _, d := rdtscpAsm() | ||
503 | return uint64(a) | (uint64(d) << 32) | ||
504 | } | ||
505 | |||
506 | // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. | ||
507 | // This variable is OS dependent, but on Linux contains information | ||
508 | // about the current cpu/core the code is running on. | ||
509 | // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. | ||
510 | func (c CPUInfo) Ia32TscAux() uint32 { | ||
511 | if !c.Supports(RDTSCP) { | ||
512 | return 0 | ||
513 | } | ||
514 | _, _, ecx, _ := rdtscpAsm() | ||
515 | return ecx | ||
516 | } | ||
517 | |||
518 | // LogicalCPU will return the Logical CPU the code is currently executing on. | ||
519 | // This is likely to change when the OS re-schedules the running thread | ||
520 | // to another CPU. | ||
521 | // If the current core cannot be detected, -1 will be returned. | ||
522 | func (c CPUInfo) LogicalCPU() int { | ||
523 | if c.maxFunc < 1 { | ||
524 | return -1 | ||
525 | } | ||
526 | _, ebx, _, _ := cpuid(1) | ||
527 | return int(ebx >> 24) | ||
528 | } | ||
529 | |||
530 | // frequencies tries to compute the clock speed of the CPU. If leaf 15 is | ||
531 | // supported, use it, otherwise parse the brand string. Yes, really. | ||
532 | func (c *CPUInfo) frequencies() { | ||
533 | c.Hz, c.BoostFreq = 0, 0 | ||
534 | mfi := maxFunctionID() | ||
535 | if mfi >= 0x15 { | ||
536 | eax, ebx, ecx, _ := cpuid(0x15) | ||
537 | if eax != 0 && ebx != 0 && ecx != 0 { | ||
538 | c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) | ||
539 | } | ||
540 | } | ||
541 | if mfi >= 0x16 { | ||
542 | a, b, _, _ := cpuid(0x16) | ||
543 | // Base... | ||
544 | if a&0xffff > 0 { | ||
545 | c.Hz = int64(a&0xffff) * 1_000_000 | ||
546 | } | ||
547 | // Boost... | ||
548 | if b&0xffff > 0 { | ||
549 | c.BoostFreq = int64(b&0xffff) * 1_000_000 | ||
550 | } | ||
551 | } | ||
552 | if c.Hz > 0 { | ||
553 | return | ||
554 | } | ||
555 | |||
556 | // computeHz determines the official rated speed of a CPU from its brand | ||
557 | // string. This insanity is *actually the official documented way to do | ||
558 | // this according to Intel*, prior to leaf 0x15 existing. The official | ||
559 | // documentation only shows this working for exactly `x.xx` or `xxxx` | ||
560 | // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other | ||
561 | // sizes. | ||
562 | model := c.BrandName | ||
563 | hz := strings.LastIndex(model, "Hz") | ||
564 | if hz < 3 { | ||
565 | return | ||
566 | } | ||
567 | var multiplier int64 | ||
568 | switch model[hz-1] { | ||
569 | case 'M': | ||
570 | multiplier = 1000 * 1000 | ||
571 | case 'G': | ||
572 | multiplier = 1000 * 1000 * 1000 | ||
573 | case 'T': | ||
574 | multiplier = 1000 * 1000 * 1000 * 1000 | ||
575 | } | ||
576 | if multiplier == 0 { | ||
577 | return | ||
578 | } | ||
579 | freq := int64(0) | ||
580 | divisor := int64(0) | ||
581 | decimalShift := int64(1) | ||
582 | var i int | ||
583 | for i = hz - 2; i >= 0 && model[i] != ' '; i-- { | ||
584 | if model[i] >= '0' && model[i] <= '9' { | ||
585 | freq += int64(model[i]-'0') * decimalShift | ||
586 | decimalShift *= 10 | ||
587 | } else if model[i] == '.' { | ||
588 | if divisor != 0 { | ||
589 | return | ||
590 | } | ||
591 | divisor = decimalShift | ||
592 | } else { | ||
593 | return | ||
594 | } | ||
595 | } | ||
596 | // we didn't find a space | ||
597 | if i < 0 { | ||
598 | return | ||
599 | } | ||
600 | if divisor != 0 { | ||
601 | c.Hz = (freq * multiplier) / divisor | ||
602 | return | ||
603 | } | ||
604 | c.Hz = freq * multiplier | ||
605 | } | ||
606 | |||
607 | // VM Will return true if the cpu id indicates we are in | ||
608 | // a virtual machine. | ||
609 | func (c CPUInfo) VM() bool { | ||
610 | return CPU.featureSet.inSet(HYPERVISOR) | ||
611 | } | ||
612 | |||
613 | // flags contains detected cpu features and characteristics | ||
614 | type flags uint64 | ||
615 | |||
616 | // log2(bits_in_uint64) | ||
617 | const flagBitsLog2 = 6 | ||
618 | const flagBits = 1 << flagBitsLog2 | ||
619 | const flagMask = flagBits - 1 | ||
620 | |||
621 | // flagSet contains detected cpu features and characteristics in an array of flags | ||
622 | type flagSet [(lastID + flagMask) / flagBits]flags | ||
623 | |||
624 | func (s *flagSet) inSet(feat FeatureID) bool { | ||
625 | return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 | ||
626 | } | ||
627 | |||
628 | func (s *flagSet) set(feat FeatureID) { | ||
629 | s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) | ||
630 | } | ||
631 | |||
632 | // setIf will set a feature if boolean is true. | ||
633 | func (s *flagSet) setIf(cond bool, features ...FeatureID) { | ||
634 | if cond { | ||
635 | for _, offset := range features { | ||
636 | s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) | ||
637 | } | ||
638 | } | ||
639 | } | ||
640 | |||
641 | func (s *flagSet) unset(offset FeatureID) { | ||
642 | bit := flags(1 << (offset & flagMask)) | ||
643 | s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit | ||
644 | } | ||
645 | |||
646 | // or with another flagset. | ||
647 | func (s *flagSet) or(other flagSet) { | ||
648 | for i, v := range other[:] { | ||
649 | s[i] |= v | ||
650 | } | ||
651 | } | ||
652 | |||
653 | // hasSet returns whether all features are present. | ||
654 | func (s *flagSet) hasSet(other flagSet) bool { | ||
655 | for i, v := range other[:] { | ||
656 | if s[i]&v != v { | ||
657 | return false | ||
658 | } | ||
659 | } | ||
660 | return true | ||
661 | } | ||
662 | |||
663 | // hasSet returns whether all features are present. | ||
664 | func (s *flagSet) hasSetP(other *flagSet) bool { | ||
665 | for i, v := range other[:] { | ||
666 | if s[i]&v != v { | ||
667 | return false | ||
668 | } | ||
669 | } | ||
670 | return true | ||
671 | } | ||
672 | |||
673 | // hasOneOf returns whether one or more features are present. | ||
674 | func (s *flagSet) hasOneOf(other *flagSet) bool { | ||
675 | for i, v := range other[:] { | ||
676 | if s[i]&v != 0 { | ||
677 | return true | ||
678 | } | ||
679 | } | ||
680 | return false | ||
681 | } | ||
682 | |||
683 | // nEnabled will return the number of enabled flags. | ||
684 | func (s *flagSet) nEnabled() (n int) { | ||
685 | for _, v := range s[:] { | ||
686 | n += bits.OnesCount64(uint64(v)) | ||
687 | } | ||
688 | return n | ||
689 | } | ||
690 | |||
691 | func flagSetWith(feat ...FeatureID) flagSet { | ||
692 | var res flagSet | ||
693 | for _, f := range feat { | ||
694 | res.set(f) | ||
695 | } | ||
696 | return res | ||
697 | } | ||
698 | |||
699 | // ParseFeature will parse the string and return the ID of the matching feature. | ||
700 | // Will return UNKNOWN if not found. | ||
701 | func ParseFeature(s string) FeatureID { | ||
702 | s = strings.ToUpper(s) | ||
703 | for i := firstID; i < lastID; i++ { | ||
704 | if i.String() == s { | ||
705 | return i | ||
706 | } | ||
707 | } | ||
708 | return UNKNOWN | ||
709 | } | ||
710 | |||
711 | // Strings returns an array of the detected features for FlagsSet. | ||
712 | func (s flagSet) Strings() []string { | ||
713 | if len(s) == 0 { | ||
714 | return []string{""} | ||
715 | } | ||
716 | r := make([]string, 0) | ||
717 | for i := firstID; i < lastID; i++ { | ||
718 | if s.inSet(i) { | ||
719 | r = append(r, i.String()) | ||
720 | } | ||
721 | } | ||
722 | return r | ||
723 | } | ||
724 | |||
725 | func maxExtendedFunction() uint32 { | ||
726 | eax, _, _, _ := cpuid(0x80000000) | ||
727 | return eax | ||
728 | } | ||
729 | |||
730 | func maxFunctionID() uint32 { | ||
731 | a, _, _, _ := cpuid(0) | ||
732 | return a | ||
733 | } | ||
734 | |||
735 | func brandName() string { | ||
736 | if maxExtendedFunction() >= 0x80000004 { | ||
737 | v := make([]uint32, 0, 48) | ||
738 | for i := uint32(0); i < 3; i++ { | ||
739 | a, b, c, d := cpuid(0x80000002 + i) | ||
740 | v = append(v, a, b, c, d) | ||
741 | } | ||
742 | return strings.Trim(string(valAsString(v...)), " ") | ||
743 | } | ||
744 | return "unknown" | ||
745 | } | ||
746 | |||
747 | func threadsPerCore() int { | ||
748 | mfi := maxFunctionID() | ||
749 | vend, _ := vendorID() | ||
750 | |||
751 | if mfi < 0x4 || (vend != Intel && vend != AMD) { | ||
752 | return 1 | ||
753 | } | ||
754 | |||
755 | if mfi < 0xb { | ||
756 | if vend != Intel { | ||
757 | return 1 | ||
758 | } | ||
759 | _, b, _, d := cpuid(1) | ||
760 | if (d & (1 << 28)) != 0 { | ||
761 | // v will contain logical core count | ||
762 | v := (b >> 16) & 255 | ||
763 | if v > 1 { | ||
764 | a4, _, _, _ := cpuid(4) | ||
765 | // physical cores | ||
766 | v2 := (a4 >> 26) + 1 | ||
767 | if v2 > 0 { | ||
768 | return int(v) / int(v2) | ||
769 | } | ||
770 | } | ||
771 | } | ||
772 | return 1 | ||
773 | } | ||
774 | _, b, _, _ := cpuidex(0xb, 0) | ||
775 | if b&0xffff == 0 { | ||
776 | if vend == AMD { | ||
777 | // Workaround for AMD returning 0, assume 2 if >= Zen 2 | ||
778 | // It will be more correct than not. | ||
779 | fam, _, _ := familyModel() | ||
780 | _, _, _, d := cpuid(1) | ||
781 | if (d&(1<<28)) != 0 && fam >= 23 { | ||
782 | return 2 | ||
783 | } | ||
784 | } | ||
785 | return 1 | ||
786 | } | ||
787 | return int(b & 0xffff) | ||
788 | } | ||
789 | |||
790 | func logicalCores() int { | ||
791 | mfi := maxFunctionID() | ||
792 | v, _ := vendorID() | ||
793 | switch v { | ||
794 | case Intel: | ||
795 | // Use this on old Intel processors | ||
796 | if mfi < 0xb { | ||
797 | if mfi < 1 { | ||
798 | return 0 | ||
799 | } | ||
800 | // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) | ||
801 | // that can be assigned to logical processors in a physical package. | ||
802 | // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. | ||
803 | _, ebx, _, _ := cpuid(1) | ||
804 | logical := (ebx >> 16) & 0xff | ||
805 | return int(logical) | ||
806 | } | ||
807 | _, b, _, _ := cpuidex(0xb, 1) | ||
808 | return int(b & 0xffff) | ||
809 | case AMD, Hygon: | ||
810 | _, b, _, _ := cpuid(1) | ||
811 | return int((b >> 16) & 0xff) | ||
812 | default: | ||
813 | return 0 | ||
814 | } | ||
815 | } | ||
816 | |||
817 | func familyModel() (family, model, stepping int) { | ||
818 | if maxFunctionID() < 0x1 { | ||
819 | return 0, 0, 0 | ||
820 | } | ||
821 | eax, _, _, _ := cpuid(1) | ||
822 | // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. | ||
823 | family = int((eax >> 8) & 0xf) | ||
824 | extFam := family == 0x6 // Intel is 0x6, needs extended model. | ||
825 | if family == 0xf { | ||
826 | // Add ExtFamily | ||
827 | family += int((eax >> 20) & 0xff) | ||
828 | extFam = true | ||
829 | } | ||
830 | // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. | ||
831 | model = int((eax >> 4) & 0xf) | ||
832 | if extFam { | ||
833 | // Add ExtModel | ||
834 | model += int((eax >> 12) & 0xf0) | ||
835 | } | ||
836 | stepping = int(eax & 0xf) | ||
837 | return family, model, stepping | ||
838 | } | ||
839 | |||
840 | func physicalCores() int { | ||
841 | v, _ := vendorID() | ||
842 | switch v { | ||
843 | case Intel: | ||
844 | return logicalCores() / threadsPerCore() | ||
845 | case AMD, Hygon: | ||
846 | lc := logicalCores() | ||
847 | tpc := threadsPerCore() | ||
848 | if lc > 0 && tpc > 0 { | ||
849 | return lc / tpc | ||
850 | } | ||
851 | |||
852 | // The following is inaccurate on AMD EPYC 7742 64-Core Processor | ||
853 | if maxExtendedFunction() >= 0x80000008 { | ||
854 | _, _, c, _ := cpuid(0x80000008) | ||
855 | if c&0xff > 0 { | ||
856 | return int(c&0xff) + 1 | ||
857 | } | ||
858 | } | ||
859 | } | ||
860 | return 0 | ||
861 | } | ||
862 | |||
863 | // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID | ||
864 | var vendorMapping = map[string]Vendor{ | ||
865 | "AMDisbetter!": AMD, | ||
866 | "AuthenticAMD": AMD, | ||
867 | "CentaurHauls": VIA, | ||
868 | "GenuineIntel": Intel, | ||
869 | "TransmetaCPU": Transmeta, | ||
870 | "GenuineTMx86": Transmeta, | ||
871 | "Geode by NSC": NSC, | ||
872 | "VIA VIA VIA ": VIA, | ||
873 | "KVMKVMKVMKVM": KVM, | ||
874 | "Microsoft Hv": MSVM, | ||
875 | "VMwareVMware": VMware, | ||
876 | "XenVMMXenVMM": XenHVM, | ||
877 | "bhyve bhyve ": Bhyve, | ||
878 | "HygonGenuine": Hygon, | ||
879 | "Vortex86 SoC": SiS, | ||
880 | "SiS SiS SiS ": SiS, | ||
881 | "RiseRiseRise": SiS, | ||
882 | "Genuine RDC": RDC, | ||
883 | } | ||
884 | |||
885 | func vendorID() (Vendor, string) { | ||
886 | _, b, c, d := cpuid(0) | ||
887 | v := string(valAsString(b, d, c)) | ||
888 | vend, ok := vendorMapping[v] | ||
889 | if !ok { | ||
890 | return VendorUnknown, v | ||
891 | } | ||
892 | return vend, v | ||
893 | } | ||
894 | |||
895 | func cacheLine() int { | ||
896 | if maxFunctionID() < 0x1 { | ||
897 | return 0 | ||
898 | } | ||
899 | |||
900 | _, ebx, _, _ := cpuid(1) | ||
901 | cache := (ebx & 0xff00) >> 5 // cflush size | ||
902 | if cache == 0 && maxExtendedFunction() >= 0x80000006 { | ||
903 | _, _, ecx, _ := cpuid(0x80000006) | ||
904 | cache = ecx & 0xff // cacheline size | ||
905 | } | ||
906 | // TODO: Read from Cache and TLB Information | ||
907 | return int(cache) | ||
908 | } | ||
909 | |||
910 | func (c *CPUInfo) cacheSize() { | ||
911 | c.Cache.L1D = -1 | ||
912 | c.Cache.L1I = -1 | ||
913 | c.Cache.L2 = -1 | ||
914 | c.Cache.L3 = -1 | ||
915 | vendor, _ := vendorID() | ||
916 | switch vendor { | ||
917 | case Intel: | ||
918 | if maxFunctionID() < 4 { | ||
919 | return | ||
920 | } | ||
921 | c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 | ||
922 | for i := uint32(0); ; i++ { | ||
923 | eax, ebx, ecx, _ := cpuidex(4, i) | ||
924 | cacheType := eax & 15 | ||
925 | if cacheType == 0 { | ||
926 | break | ||
927 | } | ||
928 | cacheLevel := (eax >> 5) & 7 | ||
929 | coherency := int(ebx&0xfff) + 1 | ||
930 | partitions := int((ebx>>12)&0x3ff) + 1 | ||
931 | associativity := int((ebx>>22)&0x3ff) + 1 | ||
932 | sets := int(ecx) + 1 | ||
933 | size := associativity * partitions * coherency * sets | ||
934 | switch cacheLevel { | ||
935 | case 1: | ||
936 | if cacheType == 1 { | ||
937 | // 1 = Data Cache | ||
938 | c.Cache.L1D = size | ||
939 | } else if cacheType == 2 { | ||
940 | // 2 = Instruction Cache | ||
941 | c.Cache.L1I = size | ||
942 | } else { | ||
943 | if c.Cache.L1D < 0 { | ||
944 | c.Cache.L1I = size | ||
945 | } | ||
946 | if c.Cache.L1I < 0 { | ||
947 | c.Cache.L1I = size | ||
948 | } | ||
949 | } | ||
950 | case 2: | ||
951 | c.Cache.L2 = size | ||
952 | case 3: | ||
953 | c.Cache.L3 = size | ||
954 | } | ||
955 | } | ||
956 | case AMD, Hygon: | ||
957 | // Untested. | ||
958 | if maxExtendedFunction() < 0x80000005 { | ||
959 | return | ||
960 | } | ||
961 | _, _, ecx, edx := cpuid(0x80000005) | ||
962 | c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) | ||
963 | c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) | ||
964 | |||
965 | if maxExtendedFunction() < 0x80000006 { | ||
966 | return | ||
967 | } | ||
968 | _, _, ecx, _ = cpuid(0x80000006) | ||
969 | c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) | ||
970 | |||
971 | // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties | ||
972 | if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { | ||
973 | return | ||
974 | } | ||
975 | |||
976 | // Xen Hypervisor is buggy and returns the same entry no matter ECX value. | ||
977 | // Hack: When we encounter the same entry 100 times we break. | ||
978 | nSame := 0 | ||
979 | var last uint32 | ||
980 | for i := uint32(0); i < math.MaxUint32; i++ { | ||
981 | eax, ebx, ecx, _ := cpuidex(0x8000001D, i) | ||
982 | |||
983 | level := (eax >> 5) & 7 | ||
984 | cacheNumSets := ecx + 1 | ||
985 | cacheLineSize := 1 + (ebx & 2047) | ||
986 | cachePhysPartitions := 1 + ((ebx >> 12) & 511) | ||
987 | cacheNumWays := 1 + ((ebx >> 22) & 511) | ||
988 | |||
989 | typ := eax & 15 | ||
990 | size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) | ||
991 | if typ == 0 { | ||
992 | return | ||
993 | } | ||
994 | |||
995 | // Check for the same value repeated. | ||
996 | comb := eax ^ ebx ^ ecx | ||
997 | if comb == last { | ||
998 | nSame++ | ||
999 | if nSame == 100 { | ||
1000 | return | ||
1001 | } | ||
1002 | } | ||
1003 | last = comb | ||
1004 | |||
1005 | switch level { | ||
1006 | case 1: | ||
1007 | switch typ { | ||
1008 | case 1: | ||
1009 | // Data cache | ||
1010 | c.Cache.L1D = size | ||
1011 | case 2: | ||
1012 | // Inst cache | ||
1013 | c.Cache.L1I = size | ||
1014 | default: | ||
1015 | if c.Cache.L1D < 0 { | ||
1016 | c.Cache.L1I = size | ||
1017 | } | ||
1018 | if c.Cache.L1I < 0 { | ||
1019 | c.Cache.L1I = size | ||
1020 | } | ||
1021 | } | ||
1022 | case 2: | ||
1023 | c.Cache.L2 = size | ||
1024 | case 3: | ||
1025 | c.Cache.L3 = size | ||
1026 | } | ||
1027 | } | ||
1028 | } | ||
1029 | } | ||
1030 | |||
1031 | type SGXEPCSection struct { | ||
1032 | BaseAddress uint64 | ||
1033 | EPCSize uint64 | ||
1034 | } | ||
1035 | |||
1036 | type SGXSupport struct { | ||
1037 | Available bool | ||
1038 | LaunchControl bool | ||
1039 | SGX1Supported bool | ||
1040 | SGX2Supported bool | ||
1041 | MaxEnclaveSizeNot64 int64 | ||
1042 | MaxEnclaveSize64 int64 | ||
1043 | EPCSections []SGXEPCSection | ||
1044 | } | ||
1045 | |||
1046 | func hasSGX(available, lc bool) (rval SGXSupport) { | ||
1047 | rval.Available = available | ||
1048 | |||
1049 | if !available { | ||
1050 | return | ||
1051 | } | ||
1052 | |||
1053 | rval.LaunchControl = lc | ||
1054 | |||
1055 | a, _, _, d := cpuidex(0x12, 0) | ||
1056 | rval.SGX1Supported = a&0x01 != 0 | ||
1057 | rval.SGX2Supported = a&0x02 != 0 | ||
1058 | rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 | ||
1059 | rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 | ||
1060 | rval.EPCSections = make([]SGXEPCSection, 0) | ||
1061 | |||
1062 | for subleaf := uint32(2); subleaf < 2+8; subleaf++ { | ||
1063 | eax, ebx, ecx, edx := cpuidex(0x12, subleaf) | ||
1064 | leafType := eax & 0xf | ||
1065 | |||
1066 | if leafType == 0 { | ||
1067 | // Invalid subleaf, stop iterating | ||
1068 | break | ||
1069 | } else if leafType == 1 { | ||
1070 | // EPC Section subleaf | ||
1071 | baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) | ||
1072 | size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) | ||
1073 | |||
1074 | section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} | ||
1075 | rval.EPCSections = append(rval.EPCSections, section) | ||
1076 | } | ||
1077 | } | ||
1078 | |||
1079 | return | ||
1080 | } | ||
1081 | |||
1082 | func support() flagSet { | ||
1083 | var fs flagSet | ||
1084 | mfi := maxFunctionID() | ||
1085 | vend, _ := vendorID() | ||
1086 | if mfi < 0x1 { | ||
1087 | return fs | ||
1088 | } | ||
1089 | family, model, _ := familyModel() | ||
1090 | |||
1091 | _, _, c, d := cpuid(1) | ||
1092 | fs.setIf((d&(1<<0)) != 0, X87) | ||
1093 | fs.setIf((d&(1<<8)) != 0, CMPXCHG8) | ||
1094 | fs.setIf((d&(1<<11)) != 0, SYSEE) | ||
1095 | fs.setIf((d&(1<<15)) != 0, CMOV) | ||
1096 | fs.setIf((d&(1<<23)) != 0, MMX) | ||
1097 | fs.setIf((d&(1<<24)) != 0, FXSR) | ||
1098 | fs.setIf((d&(1<<25)) != 0, FXSROPT) | ||
1099 | fs.setIf((d&(1<<25)) != 0, SSE) | ||
1100 | fs.setIf((d&(1<<26)) != 0, SSE2) | ||
1101 | fs.setIf((c&1) != 0, SSE3) | ||
1102 | fs.setIf((c&(1<<5)) != 0, VMX) | ||
1103 | fs.setIf((c&(1<<9)) != 0, SSSE3) | ||
1104 | fs.setIf((c&(1<<19)) != 0, SSE4) | ||
1105 | fs.setIf((c&(1<<20)) != 0, SSE42) | ||
1106 | fs.setIf((c&(1<<25)) != 0, AESNI) | ||
1107 | fs.setIf((c&(1<<1)) != 0, CLMUL) | ||
1108 | fs.setIf(c&(1<<22) != 0, MOVBE) | ||
1109 | fs.setIf(c&(1<<23) != 0, POPCNT) | ||
1110 | fs.setIf(c&(1<<30) != 0, RDRAND) | ||
1111 | |||
1112 | // This bit has been reserved by Intel & AMD for use by hypervisors, | ||
1113 | // and indicates the presence of a hypervisor. | ||
1114 | fs.setIf(c&(1<<31) != 0, HYPERVISOR) | ||
1115 | fs.setIf(c&(1<<29) != 0, F16C) | ||
1116 | fs.setIf(c&(1<<13) != 0, CX16) | ||
1117 | |||
1118 | if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { | ||
1119 | fs.setIf(threadsPerCore() > 1, HTT) | ||
1120 | } | ||
1121 | if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { | ||
1122 | fs.setIf(threadsPerCore() > 1, HTT) | ||
1123 | } | ||
1124 | fs.setIf(c&1<<26 != 0, XSAVE) | ||
1125 | fs.setIf(c&1<<27 != 0, OSXSAVE) | ||
1126 | // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits | ||
1127 | const avxCheck = 1<<26 | 1<<27 | 1<<28 | ||
1128 | if c&avxCheck == avxCheck { | ||
1129 | // Check for OS support | ||
1130 | eax, _ := xgetbv(0) | ||
1131 | if (eax & 0x6) == 0x6 { | ||
1132 | fs.set(AVX) | ||
1133 | switch vend { | ||
1134 | case Intel: | ||
1135 | // Older than Haswell. | ||
1136 | fs.setIf(family == 6 && model < 60, AVXSLOW) | ||
1137 | case AMD: | ||
1138 | // Older than Zen 2 | ||
1139 | fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) | ||
1140 | } | ||
1141 | } | ||
1142 | } | ||
1143 | // FMA3 can be used with SSE registers, so no OS support is strictly needed. | ||
1144 | // fma3 and OSXSAVE needed. | ||
1145 | const fma3Check = 1<<12 | 1<<27 | ||
1146 | fs.setIf(c&fma3Check == fma3Check, FMA3) | ||
1147 | |||
1148 | // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. | ||
1149 | if mfi >= 7 { | ||
1150 | _, ebx, ecx, edx := cpuidex(7, 0) | ||
1151 | if fs.inSet(AVX) && (ebx&0x00000020) != 0 { | ||
1152 | fs.set(AVX2) | ||
1153 | } | ||
1154 | // CPUID.(EAX=7, ECX=0).EBX | ||
1155 | if (ebx & 0x00000008) != 0 { | ||
1156 | fs.set(BMI1) | ||
1157 | fs.setIf((ebx&0x00000100) != 0, BMI2) | ||
1158 | } | ||
1159 | fs.setIf(ebx&(1<<2) != 0, SGX) | ||
1160 | fs.setIf(ebx&(1<<4) != 0, HLE) | ||
1161 | fs.setIf(ebx&(1<<9) != 0, ERMS) | ||
1162 | fs.setIf(ebx&(1<<11) != 0, RTM) | ||
1163 | fs.setIf(ebx&(1<<14) != 0, MPX) | ||
1164 | fs.setIf(ebx&(1<<18) != 0, RDSEED) | ||
1165 | fs.setIf(ebx&(1<<19) != 0, ADX) | ||
1166 | fs.setIf(ebx&(1<<29) != 0, SHA) | ||
1167 | |||
1168 | // CPUID.(EAX=7, ECX=0).ECX | ||
1169 | fs.setIf(ecx&(1<<5) != 0, WAITPKG) | ||
1170 | fs.setIf(ecx&(1<<7) != 0, CETSS) | ||
1171 | fs.setIf(ecx&(1<<8) != 0, GFNI) | ||
1172 | fs.setIf(ecx&(1<<9) != 0, VAES) | ||
1173 | fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) | ||
1174 | fs.setIf(ecx&(1<<13) != 0, TME) | ||
1175 | fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) | ||
1176 | fs.setIf(ecx&(1<<23) != 0, KEYLOCKER) | ||
1177 | fs.setIf(ecx&(1<<27) != 0, MOVDIRI) | ||
1178 | fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) | ||
1179 | fs.setIf(ecx&(1<<29) != 0, ENQCMD) | ||
1180 | fs.setIf(ecx&(1<<30) != 0, SGXLC) | ||
1181 | |||
1182 | // CPUID.(EAX=7, ECX=0).EDX | ||
1183 | fs.setIf(edx&(1<<4) != 0, FSRM) | ||
1184 | fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) | ||
1185 | fs.setIf(edx&(1<<10) != 0, MD_CLEAR) | ||
1186 | fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) | ||
1187 | fs.setIf(edx&(1<<14) != 0, SERIALIZE) | ||
1188 | fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) | ||
1189 | fs.setIf(edx&(1<<16) != 0, TSXLDTRK) | ||
1190 | fs.setIf(edx&(1<<18) != 0, PCONFIG) | ||
1191 | fs.setIf(edx&(1<<20) != 0, CETIBT) | ||
1192 | fs.setIf(edx&(1<<26) != 0, IBPB) | ||
1193 | fs.setIf(edx&(1<<27) != 0, STIBP) | ||
1194 | fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) | ||
1195 | fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) | ||
1196 | fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) | ||
1197 | fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) | ||
1198 | |||
1199 | // CPUID.(EAX=7, ECX=1).EAX | ||
1200 | eax1, _, _, edx1 := cpuidex(7, 1) | ||
1201 | fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) | ||
1202 | fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) | ||
1203 | fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) | ||
1204 | fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) | ||
1205 | fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) | ||
1206 | fs.setIf(eax1&(1<<22) != 0, HRESET) | ||
1207 | fs.setIf(eax1&(1<<23) != 0, AVXIFMA) | ||
1208 | fs.setIf(eax1&(1<<26) != 0, LAM) | ||
1209 | |||
1210 | // CPUID.(EAX=7, ECX=1).EDX | ||
1211 | fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8) | ||
1212 | fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT) | ||
1213 | fs.setIf(edx1&(1<<14) != 0, PREFETCHI) | ||
1214 | fs.setIf(edx1&(1<<19) != 0, AVX10) | ||
1215 | fs.setIf(edx1&(1<<21) != 0, APX_F) | ||
1216 | |||
1217 | // Only detect AVX-512 features if XGETBV is supported | ||
1218 | if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { | ||
1219 | // Check for OS support | ||
1220 | eax, _ := xgetbv(0) | ||
1221 | |||
1222 | // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and | ||
1223 | // ZMM16-ZMM31 state are enabled by OS) | ||
1224 | /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). | ||
1225 | hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 | ||
1226 | if runtime.GOOS == "darwin" { | ||
1227 | hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() | ||
1228 | } | ||
1229 | if hasAVX512 { | ||
1230 | fs.setIf(ebx&(1<<16) != 0, AVX512F) | ||
1231 | fs.setIf(ebx&(1<<17) != 0, AVX512DQ) | ||
1232 | fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) | ||
1233 | fs.setIf(ebx&(1<<26) != 0, AVX512PF) | ||
1234 | fs.setIf(ebx&(1<<27) != 0, AVX512ER) | ||
1235 | fs.setIf(ebx&(1<<28) != 0, AVX512CD) | ||
1236 | fs.setIf(ebx&(1<<30) != 0, AVX512BW) | ||
1237 | fs.setIf(ebx&(1<<31) != 0, AVX512VL) | ||
1238 | // ecx | ||
1239 | fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) | ||
1240 | fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) | ||
1241 | fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) | ||
1242 | fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) | ||
1243 | fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) | ||
1244 | // edx | ||
1245 | fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) | ||
1246 | fs.setIf(edx&(1<<22) != 0, AMXBF16) | ||
1247 | fs.setIf(edx&(1<<23) != 0, AVX512FP16) | ||
1248 | fs.setIf(edx&(1<<24) != 0, AMXTILE) | ||
1249 | fs.setIf(edx&(1<<25) != 0, AMXINT8) | ||
1250 | // eax1 = CPUID.(EAX=7, ECX=1).EAX | ||
1251 | fs.setIf(eax1&(1<<5) != 0, AVX512BF16) | ||
1252 | fs.setIf(eax1&(1<<19) != 0, WRMSRNS) | ||
1253 | fs.setIf(eax1&(1<<21) != 0, AMXFP16) | ||
1254 | fs.setIf(eax1&(1<<27) != 0, MSRLIST) | ||
1255 | } | ||
1256 | } | ||
1257 | |||
1258 | // CPUID.(EAX=7, ECX=2) | ||
1259 | _, _, _, edx = cpuidex(7, 2) | ||
1260 | fs.setIf(edx&(1<<0) != 0, PSFD) | ||
1261 | fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL) | ||
1262 | fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL) | ||
1263 | fs.setIf(edx&(1<<4) != 0, BHI_CTRL) | ||
1264 | fs.setIf(edx&(1<<5) != 0, MCDT_NO) | ||
1265 | |||
1266 | // Add keylocker features. | ||
1267 | if fs.inSet(KEYLOCKER) && mfi >= 0x19 { | ||
1268 | _, ebx, _, _ := cpuidex(0x19, 0) | ||
1269 | fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4) | ||
1270 | } | ||
1271 | |||
1272 | // Add AVX10 features. | ||
1273 | if fs.inSet(AVX10) && mfi >= 0x24 { | ||
1274 | _, ebx, _, _ := cpuidex(0x24, 0) | ||
1275 | fs.setIf(ebx&(1<<16) != 0, AVX10_128) | ||
1276 | fs.setIf(ebx&(1<<17) != 0, AVX10_256) | ||
1277 | fs.setIf(ebx&(1<<18) != 0, AVX10_512) | ||
1278 | } | ||
1279 | } | ||
1280 | |||
1281 | // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) | ||
1282 | // EAX | ||
1283 | // Bit 00: XSAVEOPT is available. | ||
1284 | // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. | ||
1285 | // Bit 02: Supports XGETBV with ECX = 1 if set. | ||
1286 | // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. | ||
1287 | // Bits 31 - 04: Reserved. | ||
1288 | // EBX | ||
1289 | // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. | ||
1290 | // ECX | ||
1291 | // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. | ||
1292 | // EDX? | ||
1293 | // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. | ||
1294 | if mfi >= 0xd { | ||
1295 | if fs.inSet(XSAVE) { | ||
1296 | eax, _, _, _ := cpuidex(0xd, 1) | ||
1297 | fs.setIf(eax&(1<<0) != 0, XSAVEOPT) | ||
1298 | fs.setIf(eax&(1<<1) != 0, XSAVEC) | ||
1299 | fs.setIf(eax&(1<<2) != 0, XGETBV1) | ||
1300 | fs.setIf(eax&(1<<3) != 0, XSAVES) | ||
1301 | } | ||
1302 | } | ||
1303 | if maxExtendedFunction() >= 0x80000001 { | ||
1304 | _, _, c, d := cpuid(0x80000001) | ||
1305 | if (c & (1 << 5)) != 0 { | ||
1306 | fs.set(LZCNT) | ||
1307 | fs.set(POPCNT) | ||
1308 | } | ||
1309 | // ECX | ||
1310 | fs.setIf((c&(1<<0)) != 0, LAHF) | ||
1311 | fs.setIf((c&(1<<2)) != 0, SVM) | ||
1312 | fs.setIf((c&(1<<6)) != 0, SSE4A) | ||
1313 | fs.setIf((c&(1<<10)) != 0, IBS) | ||
1314 | fs.setIf((c&(1<<22)) != 0, TOPEXT) | ||
1315 | |||
1316 | // EDX | ||
1317 | fs.setIf(d&(1<<11) != 0, SYSCALL) | ||
1318 | fs.setIf(d&(1<<20) != 0, NX) | ||
1319 | fs.setIf(d&(1<<22) != 0, MMXEXT) | ||
1320 | fs.setIf(d&(1<<23) != 0, MMX) | ||
1321 | fs.setIf(d&(1<<24) != 0, FXSR) | ||
1322 | fs.setIf(d&(1<<25) != 0, FXSROPT) | ||
1323 | fs.setIf(d&(1<<27) != 0, RDTSCP) | ||
1324 | fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) | ||
1325 | fs.setIf(d&(1<<31) != 0, AMD3DNOW) | ||
1326 | |||
1327 | /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be | ||
1328 | * used unless the OS has AVX support. */ | ||
1329 | if fs.inSet(AVX) { | ||
1330 | fs.setIf((c&(1<<11)) != 0, XOP) | ||
1331 | fs.setIf((c&(1<<16)) != 0, FMA4) | ||
1332 | } | ||
1333 | |||
1334 | } | ||
1335 | if maxExtendedFunction() >= 0x80000007 { | ||
1336 | _, b, _, d := cpuid(0x80000007) | ||
1337 | fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) | ||
1338 | fs.setIf((b&(1<<1)) != 0, SUCCOR) | ||
1339 | fs.setIf((b&(1<<2)) != 0, HWA) | ||
1340 | fs.setIf((d&(1<<9)) != 0, CPBOOST) | ||
1341 | } | ||
1342 | |||
1343 | if maxExtendedFunction() >= 0x80000008 { | ||
1344 | _, b, _, _ := cpuid(0x80000008) | ||
1345 | fs.setIf(b&(1<<28) != 0, PSFD) | ||
1346 | fs.setIf(b&(1<<27) != 0, CPPC) | ||
1347 | fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) | ||
1348 | fs.setIf(b&(1<<23) != 0, PPIN) | ||
1349 | fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) | ||
1350 | fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) | ||
1351 | fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) | ||
1352 | fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) | ||
1353 | fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) | ||
1354 | fs.setIf(b&(1<<15) != 0, STIBP) | ||
1355 | fs.setIf(b&(1<<14) != 0, IBRS) | ||
1356 | fs.setIf((b&(1<<13)) != 0, INT_WBINVD) | ||
1357 | fs.setIf(b&(1<<12) != 0, IBPB) | ||
1358 | fs.setIf((b&(1<<9)) != 0, WBNOINVD) | ||
1359 | fs.setIf((b&(1<<8)) != 0, MCOMMIT) | ||
1360 | fs.setIf((b&(1<<4)) != 0, RDPRU) | ||
1361 | fs.setIf((b&(1<<3)) != 0, INVLPGB) | ||
1362 | fs.setIf((b&(1<<1)) != 0, MSRIRC) | ||
1363 | fs.setIf((b&(1<<0)) != 0, CLZERO) | ||
1364 | } | ||
1365 | |||
1366 | if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { | ||
1367 | _, _, _, edx := cpuid(0x8000000A) | ||
1368 | fs.setIf((edx>>0)&1 == 1, SVMNP) | ||
1369 | fs.setIf((edx>>1)&1 == 1, LBRVIRT) | ||
1370 | fs.setIf((edx>>2)&1 == 1, SVML) | ||
1371 | fs.setIf((edx>>3)&1 == 1, NRIPS) | ||
1372 | fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) | ||
1373 | fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) | ||
1374 | fs.setIf((edx>>6)&1 == 1, SVMFBASID) | ||
1375 | fs.setIf((edx>>7)&1 == 1, SVMDA) | ||
1376 | fs.setIf((edx>>10)&1 == 1, SVMPF) | ||
1377 | fs.setIf((edx>>12)&1 == 1, SVMPFT) | ||
1378 | } | ||
1379 | |||
1380 | if maxExtendedFunction() >= 0x8000001a { | ||
1381 | eax, _, _, _ := cpuid(0x8000001a) | ||
1382 | fs.setIf((eax>>0)&1 == 1, FP128) | ||
1383 | fs.setIf((eax>>1)&1 == 1, MOVU) | ||
1384 | fs.setIf((eax>>2)&1 == 1, FP256) | ||
1385 | } | ||
1386 | |||
1387 | if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { | ||
1388 | eax, _, _, _ := cpuid(0x8000001b) | ||
1389 | fs.setIf((eax>>0)&1 == 1, IBSFFV) | ||
1390 | fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) | ||
1391 | fs.setIf((eax>>2)&1 == 1, IBSOPSAM) | ||
1392 | fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) | ||
1393 | fs.setIf((eax>>4)&1 == 1, IBSOPCNT) | ||
1394 | fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) | ||
1395 | fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) | ||
1396 | fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) | ||
1397 | fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) | ||
1398 | fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) | ||
1399 | fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. | ||
1400 | fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) | ||
1401 | } | ||
1402 | |||
1403 | if maxExtendedFunction() >= 0x8000001f && vend == AMD { | ||
1404 | a, _, _, _ := cpuid(0x8000001f) | ||
1405 | fs.setIf((a>>0)&1 == 1, SME) | ||
1406 | fs.setIf((a>>1)&1 == 1, SEV) | ||
1407 | fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) | ||
1408 | fs.setIf((a>>3)&1 == 1, SEV_ES) | ||
1409 | fs.setIf((a>>4)&1 == 1, SEV_SNP) | ||
1410 | fs.setIf((a>>5)&1 == 1, VMPL) | ||
1411 | fs.setIf((a>>10)&1 == 1, SME_COHERENT) | ||
1412 | fs.setIf((a>>11)&1 == 1, SEV_64BIT) | ||
1413 | fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) | ||
1414 | fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) | ||
1415 | fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) | ||
1416 | fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) | ||
1417 | fs.setIf((a>>16)&1 == 1, VTE) | ||
1418 | fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) | ||
1419 | } | ||
1420 | |||
1421 | if mfi >= 0x20 { | ||
1422 | // Microsoft has decided to purposefully hide the information | ||
1423 | // of the guest TEE when VMs are being created using Hyper-V. | ||
1424 | // | ||
1425 | // This leads us to check for the Hyper-V cpuid features | ||
1426 | // (0x4000000C), and then for the `ebx` value set. | ||
1427 | // | ||
1428 | // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part | ||
1429 | // we're mostly interested about,according to: | ||
1430 | // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174 | ||
1431 | _, ebx, _, _ := cpuid(0x4000000C) | ||
1432 | fs.setIf(ebx == 0xbe3, TDX_GUEST) | ||
1433 | } | ||
1434 | |||
1435 | if mfi >= 0x21 { | ||
1436 | // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21). | ||
1437 | _, ebx, ecx, edx := cpuid(0x21) | ||
1438 | identity := string(valAsString(ebx, edx, ecx)) | ||
1439 | fs.setIf(identity == "IntelTDX ", TDX_GUEST) | ||
1440 | } | ||
1441 | |||
1442 | return fs | ||
1443 | } | ||
1444 | |||
1445 | func (c *CPUInfo) supportAVX10() uint8 { | ||
1446 | if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) { | ||
1447 | _, ebx, _, _ := cpuidex(0x24, 0) | ||
1448 | return uint8(ebx) | ||
1449 | } | ||
1450 | return 0 | ||
1451 | } | ||
1452 | |||
1453 | func valAsString(values ...uint32) []byte { | ||
1454 | r := make([]byte, 4*len(values)) | ||
1455 | for i, v := range values { | ||
1456 | dst := r[i*4:] | ||
1457 | dst[0] = byte(v & 0xff) | ||
1458 | dst[1] = byte((v >> 8) & 0xff) | ||
1459 | dst[2] = byte((v >> 16) & 0xff) | ||
1460 | dst[3] = byte((v >> 24) & 0xff) | ||
1461 | switch { | ||
1462 | case dst[0] == 0: | ||
1463 | return r[:i*4] | ||
1464 | case dst[1] == 0: | ||
1465 | return r[:i*4+1] | ||
1466 | case dst[2] == 0: | ||
1467 | return r[:i*4+2] | ||
1468 | case dst[3] == 0: | ||
1469 | return r[:i*4+3] | ||
1470 | } | ||
1471 | } | ||
1472 | return r | ||
1473 | } | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s new file mode 100644 index 0000000..8587c3a --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s | |||
@@ -0,0 +1,47 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //+build 386,!gccgo,!noasm,!appengine | ||
4 | |||
5 | // func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) | ||
6 | TEXT ·asmCpuid(SB), 7, $0 | ||
7 | XORL CX, CX | ||
8 | MOVL op+0(FP), AX | ||
9 | CPUID | ||
10 | MOVL AX, eax+4(FP) | ||
11 | MOVL BX, ebx+8(FP) | ||
12 | MOVL CX, ecx+12(FP) | ||
13 | MOVL DX, edx+16(FP) | ||
14 | RET | ||
15 | |||
16 | // func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) | ||
17 | TEXT ·asmCpuidex(SB), 7, $0 | ||
18 | MOVL op+0(FP), AX | ||
19 | MOVL op2+4(FP), CX | ||
20 | CPUID | ||
21 | MOVL AX, eax+8(FP) | ||
22 | MOVL BX, ebx+12(FP) | ||
23 | MOVL CX, ecx+16(FP) | ||
24 | MOVL DX, edx+20(FP) | ||
25 | RET | ||
26 | |||
27 | // func xgetbv(index uint32) (eax, edx uint32) | ||
28 | TEXT ·asmXgetbv(SB), 7, $0 | ||
29 | MOVL index+0(FP), CX | ||
30 | BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV | ||
31 | MOVL AX, eax+4(FP) | ||
32 | MOVL DX, edx+8(FP) | ||
33 | RET | ||
34 | |||
35 | // func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) | ||
36 | TEXT ·asmRdtscpAsm(SB), 7, $0 | ||
37 | BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP | ||
38 | MOVL AX, eax+0(FP) | ||
39 | MOVL BX, ebx+4(FP) | ||
40 | MOVL CX, ecx+8(FP) | ||
41 | MOVL DX, edx+12(FP) | ||
42 | RET | ||
43 | |||
44 | // func asmDarwinHasAVX512() bool | ||
45 | TEXT ·asmDarwinHasAVX512(SB), 7, $0 | ||
46 | MOVL $0, eax+0(FP) | ||
47 | RET | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s new file mode 100644 index 0000000..bc11f89 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s | |||
@@ -0,0 +1,72 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //+build amd64,!gccgo,!noasm,!appengine | ||
4 | |||
5 | // func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) | ||
6 | TEXT ·asmCpuid(SB), 7, $0 | ||
7 | XORQ CX, CX | ||
8 | MOVL op+0(FP), AX | ||
9 | CPUID | ||
10 | MOVL AX, eax+8(FP) | ||
11 | MOVL BX, ebx+12(FP) | ||
12 | MOVL CX, ecx+16(FP) | ||
13 | MOVL DX, edx+20(FP) | ||
14 | RET | ||
15 | |||
16 | // func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) | ||
17 | TEXT ·asmCpuidex(SB), 7, $0 | ||
18 | MOVL op+0(FP), AX | ||
19 | MOVL op2+4(FP), CX | ||
20 | CPUID | ||
21 | MOVL AX, eax+8(FP) | ||
22 | MOVL BX, ebx+12(FP) | ||
23 | MOVL CX, ecx+16(FP) | ||
24 | MOVL DX, edx+20(FP) | ||
25 | RET | ||
26 | |||
27 | // func asmXgetbv(index uint32) (eax, edx uint32) | ||
28 | TEXT ·asmXgetbv(SB), 7, $0 | ||
29 | MOVL index+0(FP), CX | ||
30 | BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV | ||
31 | MOVL AX, eax+8(FP) | ||
32 | MOVL DX, edx+12(FP) | ||
33 | RET | ||
34 | |||
35 | // func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) | ||
36 | TEXT ·asmRdtscpAsm(SB), 7, $0 | ||
37 | BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP | ||
38 | MOVL AX, eax+0(FP) | ||
39 | MOVL BX, ebx+4(FP) | ||
40 | MOVL CX, ecx+8(FP) | ||
41 | MOVL DX, edx+12(FP) | ||
42 | RET | ||
43 | |||
44 | // From https://go-review.googlesource.com/c/sys/+/285572/ | ||
45 | // func asmDarwinHasAVX512() bool | ||
46 | TEXT ·asmDarwinHasAVX512(SB), 7, $0-1 | ||
47 | MOVB $0, ret+0(FP) // default to false | ||
48 | |||
49 | #ifdef GOOS_darwin // return if not darwin | ||
50 | #ifdef GOARCH_amd64 // return if not amd64 | ||
51 | // These values from: | ||
52 | // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h | ||
53 | #define commpage64_base_address 0x00007fffffe00000 | ||
54 | #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010) | ||
55 | #define commpage64_version (commpage64_base_address+0x01E) | ||
56 | #define hasAVX512F 0x0000004000000000 | ||
57 | MOVQ $commpage64_version, BX | ||
58 | MOVW (BX), AX | ||
59 | CMPW AX, $13 // versions < 13 do not support AVX512 | ||
60 | JL no_avx512 | ||
61 | MOVQ $commpage64_cpu_capabilities64, BX | ||
62 | MOVQ (BX), AX | ||
63 | MOVQ $hasAVX512F, CX | ||
64 | ANDQ CX, AX | ||
65 | JZ no_avx512 | ||
66 | MOVB $1, ret+0(FP) | ||
67 | |||
68 | no_avx512: | ||
69 | #endif | ||
70 | #endif | ||
71 | RET | ||
72 | |||
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s new file mode 100644 index 0000000..b31d6ae --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s | |||
@@ -0,0 +1,26 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //+build arm64,!gccgo,!noasm,!appengine | ||
4 | |||
5 | // See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt | ||
6 | |||
7 | // func getMidr | ||
8 | TEXT ·getMidr(SB), 7, $0 | ||
9 | WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */ | ||
10 | MOVD R0, midr+0(FP) | ||
11 | RET | ||
12 | |||
13 | // func getProcFeatures | ||
14 | TEXT ·getProcFeatures(SB), 7, $0 | ||
15 | WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */ | ||
16 | MOVD R0, procFeatures+0(FP) | ||
17 | RET | ||
18 | |||
19 | // func getInstAttributes | ||
20 | TEXT ·getInstAttributes(SB), 7, $0 | ||
21 | WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */ | ||
22 | WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */ | ||
23 | MOVD R0, instAttrReg0+0(FP) | ||
24 | MOVD R1, instAttrReg1+8(FP) | ||
25 | RET | ||
26 | |||
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go new file mode 100644 index 0000000..9a53504 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go | |||
@@ -0,0 +1,247 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //go:build arm64 && !gccgo && !noasm && !appengine | ||
4 | // +build arm64,!gccgo,!noasm,!appengine | ||
5 | |||
6 | package cpuid | ||
7 | |||
8 | import "runtime" | ||
9 | |||
10 | func getMidr() (midr uint64) | ||
11 | func getProcFeatures() (procFeatures uint64) | ||
12 | func getInstAttributes() (instAttrReg0, instAttrReg1 uint64) | ||
13 | |||
14 | func initCPU() { | ||
15 | cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } | ||
16 | cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } | ||
17 | xgetbv = func(uint32) (a, b uint32) { return 0, 0 } | ||
18 | rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } | ||
19 | } | ||
20 | |||
21 | func addInfo(c *CPUInfo, safe bool) { | ||
22 | // Seems to be safe to assume on ARM64 | ||
23 | c.CacheLine = 64 | ||
24 | detectOS(c) | ||
25 | |||
26 | // ARM64 disabled since it may crash if interrupt is not intercepted by OS. | ||
27 | if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" { | ||
28 | return | ||
29 | } | ||
30 | midr := getMidr() | ||
31 | |||
32 | // MIDR_EL1 - Main ID Register | ||
33 | // https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1 | ||
34 | // x--------------------------------------------------x | ||
35 | // | Name | bits | visible | | ||
36 | // |--------------------------------------------------| | ||
37 | // | Implementer | [31-24] | y | | ||
38 | // |--------------------------------------------------| | ||
39 | // | Variant | [23-20] | y | | ||
40 | // |--------------------------------------------------| | ||
41 | // | Architecture | [19-16] | y | | ||
42 | // |--------------------------------------------------| | ||
43 | // | PartNum | [15-4] | y | | ||
44 | // |--------------------------------------------------| | ||
45 | // | Revision | [3-0] | y | | ||
46 | // x--------------------------------------------------x | ||
47 | |||
48 | switch (midr >> 24) & 0xff { | ||
49 | case 0xC0: | ||
50 | c.VendorString = "Ampere Computing" | ||
51 | c.VendorID = Ampere | ||
52 | case 0x41: | ||
53 | c.VendorString = "Arm Limited" | ||
54 | c.VendorID = ARM | ||
55 | case 0x42: | ||
56 | c.VendorString = "Broadcom Corporation" | ||
57 | c.VendorID = Broadcom | ||
58 | case 0x43: | ||
59 | c.VendorString = "Cavium Inc" | ||
60 | c.VendorID = Cavium | ||
61 | case 0x44: | ||
62 | c.VendorString = "Digital Equipment Corporation" | ||
63 | c.VendorID = DEC | ||
64 | case 0x46: | ||
65 | c.VendorString = "Fujitsu Ltd" | ||
66 | c.VendorID = Fujitsu | ||
67 | case 0x49: | ||
68 | c.VendorString = "Infineon Technologies AG" | ||
69 | c.VendorID = Infineon | ||
70 | case 0x4D: | ||
71 | c.VendorString = "Motorola or Freescale Semiconductor Inc" | ||
72 | c.VendorID = Motorola | ||
73 | case 0x4E: | ||
74 | c.VendorString = "NVIDIA Corporation" | ||
75 | c.VendorID = NVIDIA | ||
76 | case 0x50: | ||
77 | c.VendorString = "Applied Micro Circuits Corporation" | ||
78 | c.VendorID = AMCC | ||
79 | case 0x51: | ||
80 | c.VendorString = "Qualcomm Inc" | ||
81 | c.VendorID = Qualcomm | ||
82 | case 0x56: | ||
83 | c.VendorString = "Marvell International Ltd" | ||
84 | c.VendorID = Marvell | ||
85 | case 0x69: | ||
86 | c.VendorString = "Intel Corporation" | ||
87 | c.VendorID = Intel | ||
88 | } | ||
89 | |||
90 | // Lower 4 bits: Architecture | ||
91 | // Architecture Meaning | ||
92 | // 0b0001 Armv4. | ||
93 | // 0b0010 Armv4T. | ||
94 | // 0b0011 Armv5 (obsolete). | ||
95 | // 0b0100 Armv5T. | ||
96 | // 0b0101 Armv5TE. | ||
97 | // 0b0110 Armv5TEJ. | ||
98 | // 0b0111 Armv6. | ||
99 | // 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'. | ||
100 | // Upper 4 bit: Variant | ||
101 | // An IMPLEMENTATION DEFINED variant number. | ||
102 | // Typically, this field is used to distinguish between different product variants, or major revisions of a product. | ||
103 | c.Family = int(midr>>16) & 0xff | ||
104 | |||
105 | // PartNum, bits [15:4] | ||
106 | // An IMPLEMENTATION DEFINED primary part number for the device. | ||
107 | // On processors implemented by Arm, if the top four bits of the primary | ||
108 | // part number are 0x0 or 0x7, the variant and architecture are encoded differently. | ||
109 | // Revision, bits [3:0] | ||
110 | // An IMPLEMENTATION DEFINED revision number for the device. | ||
111 | c.Model = int(midr) & 0xffff | ||
112 | |||
113 | procFeatures := getProcFeatures() | ||
114 | |||
115 | // ID_AA64PFR0_EL1 - Processor Feature Register 0 | ||
116 | // x--------------------------------------------------x | ||
117 | // | Name | bits | visible | | ||
118 | // |--------------------------------------------------| | ||
119 | // | DIT | [51-48] | y | | ||
120 | // |--------------------------------------------------| | ||
121 | // | SVE | [35-32] | y | | ||
122 | // |--------------------------------------------------| | ||
123 | // | GIC | [27-24] | n | | ||
124 | // |--------------------------------------------------| | ||
125 | // | AdvSIMD | [23-20] | y | | ||
126 | // |--------------------------------------------------| | ||
127 | // | FP | [19-16] | y | | ||
128 | // |--------------------------------------------------| | ||
129 | // | EL3 | [15-12] | n | | ||
130 | // |--------------------------------------------------| | ||
131 | // | EL2 | [11-8] | n | | ||
132 | // |--------------------------------------------------| | ||
133 | // | EL1 | [7-4] | n | | ||
134 | // |--------------------------------------------------| | ||
135 | // | EL0 | [3-0] | n | | ||
136 | // x--------------------------------------------------x | ||
137 | |||
138 | var f flagSet | ||
139 | // if procFeatures&(0xf<<48) != 0 { | ||
140 | // fmt.Println("DIT") | ||
141 | // } | ||
142 | f.setIf(procFeatures&(0xf<<32) != 0, SVE) | ||
143 | if procFeatures&(0xf<<20) != 15<<20 { | ||
144 | f.set(ASIMD) | ||
145 | // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1 | ||
146 | // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic. | ||
147 | f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP) | ||
148 | } | ||
149 | f.setIf(procFeatures&(0xf<<16) != 0, FP) | ||
150 | |||
151 | instAttrReg0, instAttrReg1 := getInstAttributes() | ||
152 | |||
153 | // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 | ||
154 | // | ||
155 | // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 | ||
156 | // x--------------------------------------------------x | ||
157 | // | Name | bits | visible | | ||
158 | // |--------------------------------------------------| | ||
159 | // | TS | [55-52] | y | | ||
160 | // |--------------------------------------------------| | ||
161 | // | FHM | [51-48] | y | | ||
162 | // |--------------------------------------------------| | ||
163 | // | DP | [47-44] | y | | ||
164 | // |--------------------------------------------------| | ||
165 | // | SM4 | [43-40] | y | | ||
166 | // |--------------------------------------------------| | ||
167 | // | SM3 | [39-36] | y | | ||
168 | // |--------------------------------------------------| | ||
169 | // | SHA3 | [35-32] | y | | ||
170 | // |--------------------------------------------------| | ||
171 | // | RDM | [31-28] | y | | ||
172 | // |--------------------------------------------------| | ||
173 | // | ATOMICS | [23-20] | y | | ||
174 | // |--------------------------------------------------| | ||
175 | // | CRC32 | [19-16] | y | | ||
176 | // |--------------------------------------------------| | ||
177 | // | SHA2 | [15-12] | y | | ||
178 | // |--------------------------------------------------| | ||
179 | // | SHA1 | [11-8] | y | | ||
180 | // |--------------------------------------------------| | ||
181 | // | AES | [7-4] | y | | ||
182 | // x--------------------------------------------------x | ||
183 | |||
184 | // if instAttrReg0&(0xf<<52) != 0 { | ||
185 | // fmt.Println("TS") | ||
186 | // } | ||
187 | // if instAttrReg0&(0xf<<48) != 0 { | ||
188 | // fmt.Println("FHM") | ||
189 | // } | ||
190 | f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP) | ||
191 | f.setIf(instAttrReg0&(0xf<<40) != 0, SM4) | ||
192 | f.setIf(instAttrReg0&(0xf<<36) != 0, SM3) | ||
193 | f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3) | ||
194 | f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM) | ||
195 | f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS) | ||
196 | f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32) | ||
197 | f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2) | ||
198 | // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 | ||
199 | // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented. | ||
200 | f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512) | ||
201 | f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1) | ||
202 | f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM) | ||
203 | // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 | ||
204 | // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities. | ||
205 | f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL) | ||
206 | |||
207 | // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1 | ||
208 | // | ||
209 | // ID_AA64ISAR1_EL1 - Instruction set attribute register 1 | ||
210 | // x--------------------------------------------------x | ||
211 | // | Name | bits | visible | | ||
212 | // |--------------------------------------------------| | ||
213 | // | GPI | [31-28] | y | | ||
214 | // |--------------------------------------------------| | ||
215 | // | GPA | [27-24] | y | | ||
216 | // |--------------------------------------------------| | ||
217 | // | LRCPC | [23-20] | y | | ||
218 | // |--------------------------------------------------| | ||
219 | // | FCMA | [19-16] | y | | ||
220 | // |--------------------------------------------------| | ||
221 | // | JSCVT | [15-12] | y | | ||
222 | // |--------------------------------------------------| | ||
223 | // | API | [11-8] | y | | ||
224 | // |--------------------------------------------------| | ||
225 | // | APA | [7-4] | y | | ||
226 | // |--------------------------------------------------| | ||
227 | // | DPB | [3-0] | y | | ||
228 | // x--------------------------------------------------x | ||
229 | |||
230 | // if instAttrReg1&(0xf<<28) != 0 { | ||
231 | // fmt.Println("GPI") | ||
232 | // } | ||
233 | f.setIf(instAttrReg1&(0xf<<28) != 24, GPA) | ||
234 | f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC) | ||
235 | f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA) | ||
236 | f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT) | ||
237 | // if instAttrReg1&(0xf<<8) != 0 { | ||
238 | // fmt.Println("API") | ||
239 | // } | ||
240 | // if instAttrReg1&(0xf<<4) != 0 { | ||
241 | // fmt.Println("APA") | ||
242 | // } | ||
243 | f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP) | ||
244 | |||
245 | // Store | ||
246 | c.featureSet.or(f) | ||
247 | } | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go new file mode 100644 index 0000000..9636c2b --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go | |||
@@ -0,0 +1,15 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine | ||
4 | // +build !amd64,!386,!arm64 gccgo noasm appengine | ||
5 | |||
6 | package cpuid | ||
7 | |||
8 | func initCPU() { | ||
9 | cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } | ||
10 | cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } | ||
11 | xgetbv = func(uint32) (a, b uint32) { return 0, 0 } | ||
12 | rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } | ||
13 | } | ||
14 | |||
15 | func addInfo(info *CPUInfo, safe bool) {} | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go new file mode 100644 index 0000000..c7dfa12 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go | |||
@@ -0,0 +1,37 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine) | ||
4 | // +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine | ||
5 | |||
6 | package cpuid | ||
7 | |||
8 | func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) | ||
9 | func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) | ||
10 | func asmXgetbv(index uint32) (eax, edx uint32) | ||
11 | func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) | ||
12 | func asmDarwinHasAVX512() bool | ||
13 | |||
14 | func initCPU() { | ||
15 | cpuid = asmCpuid | ||
16 | cpuidex = asmCpuidex | ||
17 | xgetbv = asmXgetbv | ||
18 | rdtscpAsm = asmRdtscpAsm | ||
19 | darwinHasAVX512 = asmDarwinHasAVX512 | ||
20 | } | ||
21 | |||
22 | func addInfo(c *CPUInfo, safe bool) { | ||
23 | c.maxFunc = maxFunctionID() | ||
24 | c.maxExFunc = maxExtendedFunction() | ||
25 | c.BrandName = brandName() | ||
26 | c.CacheLine = cacheLine() | ||
27 | c.Family, c.Model, c.Stepping = familyModel() | ||
28 | c.featureSet = support() | ||
29 | c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC)) | ||
30 | c.ThreadsPerCore = threadsPerCore() | ||
31 | c.LogicalCores = logicalCores() | ||
32 | c.PhysicalCores = physicalCores() | ||
33 | c.VendorID, c.VendorString = vendorID() | ||
34 | c.AVX10Level = c.supportAVX10() | ||
35 | c.cacheSize() | ||
36 | c.frequencies() | ||
37 | } | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go new file mode 100644 index 0000000..43bd05f --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go | |||
@@ -0,0 +1,279 @@ | |||
1 | // Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT. | ||
2 | |||
3 | package cpuid | ||
4 | |||
5 | import "strconv" | ||
6 | |||
7 | func _() { | ||
8 | // An "invalid array index" compiler error signifies that the constant values have changed. | ||
9 | // Re-run the stringer command to generate them again. | ||
10 | var x [1]struct{} | ||
11 | _ = x[ADX-1] | ||
12 | _ = x[AESNI-2] | ||
13 | _ = x[AMD3DNOW-3] | ||
14 | _ = x[AMD3DNOWEXT-4] | ||
15 | _ = x[AMXBF16-5] | ||
16 | _ = x[AMXFP16-6] | ||
17 | _ = x[AMXINT8-7] | ||
18 | _ = x[AMXTILE-8] | ||
19 | _ = x[APX_F-9] | ||
20 | _ = x[AVX-10] | ||
21 | _ = x[AVX10-11] | ||
22 | _ = x[AVX10_128-12] | ||
23 | _ = x[AVX10_256-13] | ||
24 | _ = x[AVX10_512-14] | ||
25 | _ = x[AVX2-15] | ||
26 | _ = x[AVX512BF16-16] | ||
27 | _ = x[AVX512BITALG-17] | ||
28 | _ = x[AVX512BW-18] | ||
29 | _ = x[AVX512CD-19] | ||
30 | _ = x[AVX512DQ-20] | ||
31 | _ = x[AVX512ER-21] | ||
32 | _ = x[AVX512F-22] | ||
33 | _ = x[AVX512FP16-23] | ||
34 | _ = x[AVX512IFMA-24] | ||
35 | _ = x[AVX512PF-25] | ||
36 | _ = x[AVX512VBMI-26] | ||
37 | _ = x[AVX512VBMI2-27] | ||
38 | _ = x[AVX512VL-28] | ||
39 | _ = x[AVX512VNNI-29] | ||
40 | _ = x[AVX512VP2INTERSECT-30] | ||
41 | _ = x[AVX512VPOPCNTDQ-31] | ||
42 | _ = x[AVXIFMA-32] | ||
43 | _ = x[AVXNECONVERT-33] | ||
44 | _ = x[AVXSLOW-34] | ||
45 | _ = x[AVXVNNI-35] | ||
46 | _ = x[AVXVNNIINT8-36] | ||
47 | _ = x[BHI_CTRL-37] | ||
48 | _ = x[BMI1-38] | ||
49 | _ = x[BMI2-39] | ||
50 | _ = x[CETIBT-40] | ||
51 | _ = x[CETSS-41] | ||
52 | _ = x[CLDEMOTE-42] | ||
53 | _ = x[CLMUL-43] | ||
54 | _ = x[CLZERO-44] | ||
55 | _ = x[CMOV-45] | ||
56 | _ = x[CMPCCXADD-46] | ||
57 | _ = x[CMPSB_SCADBS_SHORT-47] | ||
58 | _ = x[CMPXCHG8-48] | ||
59 | _ = x[CPBOOST-49] | ||
60 | _ = x[CPPC-50] | ||
61 | _ = x[CX16-51] | ||
62 | _ = x[EFER_LMSLE_UNS-52] | ||
63 | _ = x[ENQCMD-53] | ||
64 | _ = x[ERMS-54] | ||
65 | _ = x[F16C-55] | ||
66 | _ = x[FLUSH_L1D-56] | ||
67 | _ = x[FMA3-57] | ||
68 | _ = x[FMA4-58] | ||
69 | _ = x[FP128-59] | ||
70 | _ = x[FP256-60] | ||
71 | _ = x[FSRM-61] | ||
72 | _ = x[FXSR-62] | ||
73 | _ = x[FXSROPT-63] | ||
74 | _ = x[GFNI-64] | ||
75 | _ = x[HLE-65] | ||
76 | _ = x[HRESET-66] | ||
77 | _ = x[HTT-67] | ||
78 | _ = x[HWA-68] | ||
79 | _ = x[HYBRID_CPU-69] | ||
80 | _ = x[HYPERVISOR-70] | ||
81 | _ = x[IA32_ARCH_CAP-71] | ||
82 | _ = x[IA32_CORE_CAP-72] | ||
83 | _ = x[IBPB-73] | ||
84 | _ = x[IBRS-74] | ||
85 | _ = x[IBRS_PREFERRED-75] | ||
86 | _ = x[IBRS_PROVIDES_SMP-76] | ||
87 | _ = x[IBS-77] | ||
88 | _ = x[IBSBRNTRGT-78] | ||
89 | _ = x[IBSFETCHSAM-79] | ||
90 | _ = x[IBSFFV-80] | ||
91 | _ = x[IBSOPCNT-81] | ||
92 | _ = x[IBSOPCNTEXT-82] | ||
93 | _ = x[IBSOPSAM-83] | ||
94 | _ = x[IBSRDWROPCNT-84] | ||
95 | _ = x[IBSRIPINVALIDCHK-85] | ||
96 | _ = x[IBS_FETCH_CTLX-86] | ||
97 | _ = x[IBS_OPDATA4-87] | ||
98 | _ = x[IBS_OPFUSE-88] | ||
99 | _ = x[IBS_PREVENTHOST-89] | ||
100 | _ = x[IBS_ZEN4-90] | ||
101 | _ = x[IDPRED_CTRL-91] | ||
102 | _ = x[INT_WBINVD-92] | ||
103 | _ = x[INVLPGB-93] | ||
104 | _ = x[KEYLOCKER-94] | ||
105 | _ = x[KEYLOCKERW-95] | ||
106 | _ = x[LAHF-96] | ||
107 | _ = x[LAM-97] | ||
108 | _ = x[LBRVIRT-98] | ||
109 | _ = x[LZCNT-99] | ||
110 | _ = x[MCAOVERFLOW-100] | ||
111 | _ = x[MCDT_NO-101] | ||
112 | _ = x[MCOMMIT-102] | ||
113 | _ = x[MD_CLEAR-103] | ||
114 | _ = x[MMX-104] | ||
115 | _ = x[MMXEXT-105] | ||
116 | _ = x[MOVBE-106] | ||
117 | _ = x[MOVDIR64B-107] | ||
118 | _ = x[MOVDIRI-108] | ||
119 | _ = x[MOVSB_ZL-109] | ||
120 | _ = x[MOVU-110] | ||
121 | _ = x[MPX-111] | ||
122 | _ = x[MSRIRC-112] | ||
123 | _ = x[MSRLIST-113] | ||
124 | _ = x[MSR_PAGEFLUSH-114] | ||
125 | _ = x[NRIPS-115] | ||
126 | _ = x[NX-116] | ||
127 | _ = x[OSXSAVE-117] | ||
128 | _ = x[PCONFIG-118] | ||
129 | _ = x[POPCNT-119] | ||
130 | _ = x[PPIN-120] | ||
131 | _ = x[PREFETCHI-121] | ||
132 | _ = x[PSFD-122] | ||
133 | _ = x[RDPRU-123] | ||
134 | _ = x[RDRAND-124] | ||
135 | _ = x[RDSEED-125] | ||
136 | _ = x[RDTSCP-126] | ||
137 | _ = x[RRSBA_CTRL-127] | ||
138 | _ = x[RTM-128] | ||
139 | _ = x[RTM_ALWAYS_ABORT-129] | ||
140 | _ = x[SERIALIZE-130] | ||
141 | _ = x[SEV-131] | ||
142 | _ = x[SEV_64BIT-132] | ||
143 | _ = x[SEV_ALTERNATIVE-133] | ||
144 | _ = x[SEV_DEBUGSWAP-134] | ||
145 | _ = x[SEV_ES-135] | ||
146 | _ = x[SEV_RESTRICTED-136] | ||
147 | _ = x[SEV_SNP-137] | ||
148 | _ = x[SGX-138] | ||
149 | _ = x[SGXLC-139] | ||
150 | _ = x[SHA-140] | ||
151 | _ = x[SME-141] | ||
152 | _ = x[SME_COHERENT-142] | ||
153 | _ = x[SPEC_CTRL_SSBD-143] | ||
154 | _ = x[SRBDS_CTRL-144] | ||
155 | _ = x[SSE-145] | ||
156 | _ = x[SSE2-146] | ||
157 | _ = x[SSE3-147] | ||
158 | _ = x[SSE4-148] | ||
159 | _ = x[SSE42-149] | ||
160 | _ = x[SSE4A-150] | ||
161 | _ = x[SSSE3-151] | ||
162 | _ = x[STIBP-152] | ||
163 | _ = x[STIBP_ALWAYSON-153] | ||
164 | _ = x[STOSB_SHORT-154] | ||
165 | _ = x[SUCCOR-155] | ||
166 | _ = x[SVM-156] | ||
167 | _ = x[SVMDA-157] | ||
168 | _ = x[SVMFBASID-158] | ||
169 | _ = x[SVML-159] | ||
170 | _ = x[SVMNP-160] | ||
171 | _ = x[SVMPF-161] | ||
172 | _ = x[SVMPFT-162] | ||
173 | _ = x[SYSCALL-163] | ||
174 | _ = x[SYSEE-164] | ||
175 | _ = x[TBM-165] | ||
176 | _ = x[TDX_GUEST-166] | ||
177 | _ = x[TLB_FLUSH_NESTED-167] | ||
178 | _ = x[TME-168] | ||
179 | _ = x[TOPEXT-169] | ||
180 | _ = x[TSCRATEMSR-170] | ||
181 | _ = x[TSXLDTRK-171] | ||
182 | _ = x[VAES-172] | ||
183 | _ = x[VMCBCLEAN-173] | ||
184 | _ = x[VMPL-174] | ||
185 | _ = x[VMSA_REGPROT-175] | ||
186 | _ = x[VMX-176] | ||
187 | _ = x[VPCLMULQDQ-177] | ||
188 | _ = x[VTE-178] | ||
189 | _ = x[WAITPKG-179] | ||
190 | _ = x[WBNOINVD-180] | ||
191 | _ = x[WRMSRNS-181] | ||
192 | _ = x[X87-182] | ||
193 | _ = x[XGETBV1-183] | ||
194 | _ = x[XOP-184] | ||
195 | _ = x[XSAVE-185] | ||
196 | _ = x[XSAVEC-186] | ||
197 | _ = x[XSAVEOPT-187] | ||
198 | _ = x[XSAVES-188] | ||
199 | _ = x[AESARM-189] | ||
200 | _ = x[ARMCPUID-190] | ||
201 | _ = x[ASIMD-191] | ||
202 | _ = x[ASIMDDP-192] | ||
203 | _ = x[ASIMDHP-193] | ||
204 | _ = x[ASIMDRDM-194] | ||
205 | _ = x[ATOMICS-195] | ||
206 | _ = x[CRC32-196] | ||
207 | _ = x[DCPOP-197] | ||
208 | _ = x[EVTSTRM-198] | ||
209 | _ = x[FCMA-199] | ||
210 | _ = x[FP-200] | ||
211 | _ = x[FPHP-201] | ||
212 | _ = x[GPA-202] | ||
213 | _ = x[JSCVT-203] | ||
214 | _ = x[LRCPC-204] | ||
215 | _ = x[PMULL-205] | ||
216 | _ = x[SHA1-206] | ||
217 | _ = x[SHA2-207] | ||
218 | _ = x[SHA3-208] | ||
219 | _ = x[SHA512-209] | ||
220 | _ = x[SM3-210] | ||
221 | _ = x[SM4-211] | ||
222 | _ = x[SVE-212] | ||
223 | _ = x[lastID-213] | ||
224 | _ = x[firstID-0] | ||
225 | } | ||
226 | |||
227 | const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" | ||
228 | |||
229 | var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 554, 568, 585, 588, 598, 609, 615, 623, 634, 642, 654, 670, 684, 695, 705, 720, 728, 739, 749, 756, 765, 775, 779, 782, 789, 794, 805, 812, 819, 827, 830, 836, 841, 850, 857, 865, 869, 872, 878, 885, 898, 903, 905, 912, 919, 925, 929, 938, 942, 947, 953, 959, 965, 975, 978, 994, 1003, 1006, 1015, 1030, 1043, 1049, 1063, 1070, 1073, 1078, 1081, 1084, 1096, 1110, 1120, 1123, 1127, 1131, 1135, 1140, 1145, 1150, 1155, 1169, 1180, 1186, 1189, 1194, 1203, 1207, 1212, 1217, 1223, 1230, 1235, 1238, 1247, 1263, 1266, 1272, 1282, 1290, 1294, 1303, 1307, 1319, 1322, 1332, 1335, 1342, 1350, 1357, 1360, 1367, 1370, 1375, 1381, 1389, 1395, 1401, 1409, 1414, 1421, 1428, 1436, 1443, 1448, 1453, 1460, 1464, 1466, 1470, 1473, 1478, 1483, 1488, 1492, 1496, 1500, 1506, 1509, 1512, 1515, 1521} | ||
230 | |||
231 | func (i FeatureID) String() string { | ||
232 | if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { | ||
233 | return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")" | ||
234 | } | ||
235 | return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]] | ||
236 | } | ||
237 | func _() { | ||
238 | // An "invalid array index" compiler error signifies that the constant values have changed. | ||
239 | // Re-run the stringer command to generate them again. | ||
240 | var x [1]struct{} | ||
241 | _ = x[VendorUnknown-0] | ||
242 | _ = x[Intel-1] | ||
243 | _ = x[AMD-2] | ||
244 | _ = x[VIA-3] | ||
245 | _ = x[Transmeta-4] | ||
246 | _ = x[NSC-5] | ||
247 | _ = x[KVM-6] | ||
248 | _ = x[MSVM-7] | ||
249 | _ = x[VMware-8] | ||
250 | _ = x[XenHVM-9] | ||
251 | _ = x[Bhyve-10] | ||
252 | _ = x[Hygon-11] | ||
253 | _ = x[SiS-12] | ||
254 | _ = x[RDC-13] | ||
255 | _ = x[Ampere-14] | ||
256 | _ = x[ARM-15] | ||
257 | _ = x[Broadcom-16] | ||
258 | _ = x[Cavium-17] | ||
259 | _ = x[DEC-18] | ||
260 | _ = x[Fujitsu-19] | ||
261 | _ = x[Infineon-20] | ||
262 | _ = x[Motorola-21] | ||
263 | _ = x[NVIDIA-22] | ||
264 | _ = x[AMCC-23] | ||
265 | _ = x[Qualcomm-24] | ||
266 | _ = x[Marvell-25] | ||
267 | _ = x[lastVendor-26] | ||
268 | } | ||
269 | |||
270 | const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor" | ||
271 | |||
272 | var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155} | ||
273 | |||
274 | func (i Vendor) String() string { | ||
275 | if i < 0 || i >= Vendor(len(_Vendor_index)-1) { | ||
276 | return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")" | ||
277 | } | ||
278 | return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]] | ||
279 | } | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go new file mode 100644 index 0000000..84b1acd --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go | |||
@@ -0,0 +1,121 @@ | |||
1 | // Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | package cpuid | ||
4 | |||
5 | import ( | ||
6 | "runtime" | ||
7 | "strings" | ||
8 | |||
9 | "golang.org/x/sys/unix" | ||
10 | ) | ||
11 | |||
12 | func detectOS(c *CPUInfo) bool { | ||
13 | if runtime.GOOS != "ios" { | ||
14 | tryToFillCPUInfoFomSysctl(c) | ||
15 | } | ||
16 | // There are no hw.optional sysctl values for the below features on Mac OS 11.0 | ||
17 | // to detect their supported state dynamically. Assume the CPU features that | ||
18 | // Apple Silicon M1 supports to be available as a minimal set of features | ||
19 | // to all Go programs running on darwin/arm64. | ||
20 | // TODO: Add more if we know them. | ||
21 | c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2) | ||
22 | |||
23 | return true | ||
24 | } | ||
25 | |||
26 | func sysctlGetBool(name string) bool { | ||
27 | value, err := unix.SysctlUint32(name) | ||
28 | if err != nil { | ||
29 | return false | ||
30 | } | ||
31 | return value != 0 | ||
32 | } | ||
33 | |||
34 | func sysctlGetString(name string) string { | ||
35 | value, err := unix.Sysctl(name) | ||
36 | if err != nil { | ||
37 | return "" | ||
38 | } | ||
39 | return value | ||
40 | } | ||
41 | |||
42 | func sysctlGetInt(unknown int, names ...string) int { | ||
43 | for _, name := range names { | ||
44 | value, err := unix.SysctlUint32(name) | ||
45 | if err != nil { | ||
46 | continue | ||
47 | } | ||
48 | if value != 0 { | ||
49 | return int(value) | ||
50 | } | ||
51 | } | ||
52 | return unknown | ||
53 | } | ||
54 | |||
55 | func sysctlGetInt64(unknown int, names ...string) int { | ||
56 | for _, name := range names { | ||
57 | value64, err := unix.SysctlUint64(name) | ||
58 | if err != nil { | ||
59 | continue | ||
60 | } | ||
61 | if int(value64) != unknown { | ||
62 | return int(value64) | ||
63 | } | ||
64 | } | ||
65 | return unknown | ||
66 | } | ||
67 | |||
68 | func setFeature(c *CPUInfo, name string, feature FeatureID) { | ||
69 | c.featureSet.setIf(sysctlGetBool(name), feature) | ||
70 | } | ||
71 | func tryToFillCPUInfoFomSysctl(c *CPUInfo) { | ||
72 | c.BrandName = sysctlGetString("machdep.cpu.brand_string") | ||
73 | |||
74 | if len(c.BrandName) != 0 { | ||
75 | c.VendorString = strings.Fields(c.BrandName)[0] | ||
76 | } | ||
77 | |||
78 | c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu") | ||
79 | c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") / | ||
80 | sysctlGetInt(1, "hw.physicalcpu") | ||
81 | c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count") | ||
82 | c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily") | ||
83 | c.Model = sysctlGetInt(0, "machdep.cpu.model") | ||
84 | c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize") | ||
85 | c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize") | ||
86 | c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize") | ||
87 | c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize") | ||
88 | c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize") | ||
89 | |||
90 | // from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile | ||
91 | setFeature(c, "hw.optional.arm.FEAT_AES", AESARM) | ||
92 | setFeature(c, "hw.optional.AdvSIMD", ASIMD) | ||
93 | setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP) | ||
94 | setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM) | ||
95 | setFeature(c, "hw.optional.FEAT_CRC32", CRC32) | ||
96 | setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP) | ||
97 | // setFeature(c, "", EVTSTRM) | ||
98 | setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA) | ||
99 | setFeature(c, "hw.optional.arm.FEAT_FP", FP) | ||
100 | setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP) | ||
101 | setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA) | ||
102 | setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT) | ||
103 | setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC) | ||
104 | setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL) | ||
105 | setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1) | ||
106 | setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2) | ||
107 | setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3) | ||
108 | setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512) | ||
109 | // setFeature(c, "", SM3) | ||
110 | // setFeature(c, "", SM4) | ||
111 | setFeature(c, "hw.optional.arm.FEAT_SVE", SVE) | ||
112 | |||
113 | // from empirical observation | ||
114 | setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP) | ||
115 | setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS) | ||
116 | setFeature(c, "hw.optional.floatingpoint", FP) | ||
117 | setFeature(c, "hw.optional.armv8_2_sha3", SHA3) | ||
118 | setFeature(c, "hw.optional.armv8_2_sha512", SHA512) | ||
119 | setFeature(c, "hw.optional.armv8_3_compnum", FCMA) | ||
120 | setFeature(c, "hw.optional.armv8_crc32", CRC32) | ||
121 | } | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go new file mode 100644 index 0000000..ee278b9 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go | |||
@@ -0,0 +1,130 @@ | |||
1 | // Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | // Copyright 2018 The Go Authors. All rights reserved. | ||
4 | // Use of this source code is governed by a BSD-style | ||
5 | // license that can be found in the LICENSE file located | ||
6 | // here https://github.com/golang/sys/blob/master/LICENSE | ||
7 | |||
8 | package cpuid | ||
9 | |||
10 | import ( | ||
11 | "encoding/binary" | ||
12 | "io/ioutil" | ||
13 | "runtime" | ||
14 | ) | ||
15 | |||
16 | // HWCAP bits. | ||
17 | const ( | ||
18 | hwcap_FP = 1 << 0 | ||
19 | hwcap_ASIMD = 1 << 1 | ||
20 | hwcap_EVTSTRM = 1 << 2 | ||
21 | hwcap_AES = 1 << 3 | ||
22 | hwcap_PMULL = 1 << 4 | ||
23 | hwcap_SHA1 = 1 << 5 | ||
24 | hwcap_SHA2 = 1 << 6 | ||
25 | hwcap_CRC32 = 1 << 7 | ||
26 | hwcap_ATOMICS = 1 << 8 | ||
27 | hwcap_FPHP = 1 << 9 | ||
28 | hwcap_ASIMDHP = 1 << 10 | ||
29 | hwcap_CPUID = 1 << 11 | ||
30 | hwcap_ASIMDRDM = 1 << 12 | ||
31 | hwcap_JSCVT = 1 << 13 | ||
32 | hwcap_FCMA = 1 << 14 | ||
33 | hwcap_LRCPC = 1 << 15 | ||
34 | hwcap_DCPOP = 1 << 16 | ||
35 | hwcap_SHA3 = 1 << 17 | ||
36 | hwcap_SM3 = 1 << 18 | ||
37 | hwcap_SM4 = 1 << 19 | ||
38 | hwcap_ASIMDDP = 1 << 20 | ||
39 | hwcap_SHA512 = 1 << 21 | ||
40 | hwcap_SVE = 1 << 22 | ||
41 | hwcap_ASIMDFHM = 1 << 23 | ||
42 | ) | ||
43 | |||
44 | func detectOS(c *CPUInfo) bool { | ||
45 | // For now assuming no hyperthreading is reasonable. | ||
46 | c.LogicalCores = runtime.NumCPU() | ||
47 | c.PhysicalCores = c.LogicalCores | ||
48 | c.ThreadsPerCore = 1 | ||
49 | if hwcap == 0 { | ||
50 | // We did not get values from the runtime. | ||
51 | // Try reading /proc/self/auxv | ||
52 | |||
53 | // From https://github.com/golang/sys | ||
54 | const ( | ||
55 | _AT_HWCAP = 16 | ||
56 | _AT_HWCAP2 = 26 | ||
57 | |||
58 | uintSize = int(32 << (^uint(0) >> 63)) | ||
59 | ) | ||
60 | |||
61 | buf, err := ioutil.ReadFile("/proc/self/auxv") | ||
62 | if err != nil { | ||
63 | // e.g. on android /proc/self/auxv is not accessible, so silently | ||
64 | // ignore the error and leave Initialized = false. On some | ||
65 | // architectures (e.g. arm64) doinit() implements a fallback | ||
66 | // readout and will set Initialized = true again. | ||
67 | return false | ||
68 | } | ||
69 | bo := binary.LittleEndian | ||
70 | for len(buf) >= 2*(uintSize/8) { | ||
71 | var tag, val uint | ||
72 | switch uintSize { | ||
73 | case 32: | ||
74 | tag = uint(bo.Uint32(buf[0:])) | ||
75 | val = uint(bo.Uint32(buf[4:])) | ||
76 | buf = buf[8:] | ||
77 | case 64: | ||
78 | tag = uint(bo.Uint64(buf[0:])) | ||
79 | val = uint(bo.Uint64(buf[8:])) | ||
80 | buf = buf[16:] | ||
81 | } | ||
82 | switch tag { | ||
83 | case _AT_HWCAP: | ||
84 | hwcap = val | ||
85 | case _AT_HWCAP2: | ||
86 | // Not used | ||
87 | } | ||
88 | } | ||
89 | if hwcap == 0 { | ||
90 | return false | ||
91 | } | ||
92 | } | ||
93 | |||
94 | // HWCap was populated by the runtime from the auxiliary vector. | ||
95 | // Use HWCap information since reading aarch64 system registers | ||
96 | // is not supported in user space on older linux kernels. | ||
97 | c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM) | ||
98 | c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD) | ||
99 | c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP) | ||
100 | c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP) | ||
101 | c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM) | ||
102 | c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID) | ||
103 | c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32) | ||
104 | c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP) | ||
105 | c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM) | ||
106 | c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA) | ||
107 | c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP) | ||
108 | c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP) | ||
109 | c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT) | ||
110 | c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC) | ||
111 | c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL) | ||
112 | c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1) | ||
113 | c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2) | ||
114 | c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3) | ||
115 | c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512) | ||
116 | c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3) | ||
117 | c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4) | ||
118 | c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE) | ||
119 | |||
120 | // The Samsung S9+ kernel reports support for atomics, but not all cores | ||
121 | // actually support them, resulting in SIGILL. See issue #28431. | ||
122 | // TODO(elias.naur): Only disable the optimization on bad chipsets on android. | ||
123 | c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS) | ||
124 | |||
125 | return true | ||
126 | } | ||
127 | |||
128 | func isSet(hwc uint, value uint) bool { | ||
129 | return hwc&value != 0 | ||
130 | } | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go new file mode 100644 index 0000000..8733ba3 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go | |||
@@ -0,0 +1,16 @@ | |||
1 | // Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //go:build arm64 && !linux && !darwin | ||
4 | // +build arm64,!linux,!darwin | ||
5 | |||
6 | package cpuid | ||
7 | |||
8 | import "runtime" | ||
9 | |||
10 | func detectOS(c *CPUInfo) bool { | ||
11 | c.PhysicalCores = runtime.NumCPU() | ||
12 | // For now assuming 1 thread per core... | ||
13 | c.ThreadsPerCore = 1 | ||
14 | c.LogicalCores = c.PhysicalCores | ||
15 | return false | ||
16 | } | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go new file mode 100644 index 0000000..f8f201b --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go | |||
@@ -0,0 +1,8 @@ | |||
1 | // Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //go:build nounsafe | ||
4 | // +build nounsafe | ||
5 | |||
6 | package cpuid | ||
7 | |||
8 | var hwcap uint | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go new file mode 100644 index 0000000..92af622 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go | |||
@@ -0,0 +1,11 @@ | |||
1 | // Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | //go:build !nounsafe | ||
4 | // +build !nounsafe | ||
5 | |||
6 | package cpuid | ||
7 | |||
8 | import _ "unsafe" // needed for go:linkname | ||
9 | |||
10 | //go:linkname hwcap internal/cpu.HWCap | ||
11 | var hwcap uint | ||
diff --git a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh new file mode 100644 index 0000000..471d986 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh | |||
@@ -0,0 +1,15 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | set -e | ||
4 | |||
5 | go tool dist list | while IFS=/ read os arch; do | ||
6 | echo "Checking $os/$arch..." | ||
7 | echo " normal" | ||
8 | GOARCH=$arch GOOS=$os go build -o /dev/null . | ||
9 | echo " noasm" | ||
10 | GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null . | ||
11 | echo " appengine" | ||
12 | GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null . | ||
13 | echo " noasm,appengine" | ||
14 | GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null . | ||
15 | done | ||