diff options
author | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
---|---|---|
committer | Rutger Broekhoff | 2023-12-29 21:31:53 +0100 |
commit | 404aeae4545d2426c089a5f8d5e82dae56f5212b (patch) | |
tree | 2d84e00af272b39fc04f3795ae06bc48970e57b5 /vendor/github.com/klauspost/cpuid/v2/cpuid.go | |
parent | 209d8b0187ed025dec9ac149ebcced3462877bff (diff) | |
download | gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.tar.gz gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.zip |
Make Nix builds work
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
-rw-r--r-- | vendor/github.com/klauspost/cpuid/v2/cpuid.go | 1473 |
1 files changed, 1473 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go new file mode 100644 index 0000000..15b7603 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go | |||
@@ -0,0 +1,1473 @@ | |||
1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
2 | |||
3 | // Package cpuid provides information about the CPU running the current program. | ||
4 | // | ||
5 | // CPU features are detected on startup, and kept for fast access through the life of the application. | ||
6 | // Currently x86 / x64 (AMD64) as well as arm64 is supported. | ||
7 | // | ||
8 | // You can access the CPU information by accessing the shared CPU variable of the cpuid library. | ||
9 | // | ||
10 | // Package home: https://github.com/klauspost/cpuid | ||
11 | package cpuid | ||
12 | |||
13 | import ( | ||
14 | "flag" | ||
15 | "fmt" | ||
16 | "math" | ||
17 | "math/bits" | ||
18 | "os" | ||
19 | "runtime" | ||
20 | "strings" | ||
21 | ) | ||
22 | |||
23 | // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf | ||
24 | // and Processor Programming Reference (PPR) | ||
25 | |||
26 | // Vendor is a representation of a CPU vendor. | ||
27 | type Vendor int | ||
28 | |||
29 | const ( | ||
30 | VendorUnknown Vendor = iota | ||
31 | Intel | ||
32 | AMD | ||
33 | VIA | ||
34 | Transmeta | ||
35 | NSC | ||
36 | KVM // Kernel-based Virtual Machine | ||
37 | MSVM // Microsoft Hyper-V or Windows Virtual PC | ||
38 | VMware | ||
39 | XenHVM | ||
40 | Bhyve | ||
41 | Hygon | ||
42 | SiS | ||
43 | RDC | ||
44 | |||
45 | Ampere | ||
46 | ARM | ||
47 | Broadcom | ||
48 | Cavium | ||
49 | DEC | ||
50 | Fujitsu | ||
51 | Infineon | ||
52 | Motorola | ||
53 | NVIDIA | ||
54 | AMCC | ||
55 | Qualcomm | ||
56 | Marvell | ||
57 | |||
58 | lastVendor | ||
59 | ) | ||
60 | |||
61 | //go:generate stringer -type=FeatureID,Vendor | ||
62 | |||
63 | // FeatureID is the ID of a specific cpu feature. | ||
64 | type FeatureID int | ||
65 | |||
66 | const ( | ||
67 | // Keep index -1 as unknown | ||
68 | UNKNOWN = -1 | ||
69 | |||
70 | // Add features | ||
71 | ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) | ||
72 | AESNI // Advanced Encryption Standard New Instructions | ||
73 | AMD3DNOW // AMD 3DNOW | ||
74 | AMD3DNOWEXT // AMD 3DNowExt | ||
75 | AMXBF16 // Tile computational operations on BFLOAT16 numbers | ||
76 | AMXFP16 // Tile computational operations on FP16 numbers | ||
77 | AMXINT8 // Tile computational operations on 8-bit integers | ||
78 | AMXTILE // Tile architecture | ||
79 | APX_F // Intel APX | ||
80 | AVX // AVX functions | ||
81 | AVX10 // If set the Intel AVX10 Converged Vector ISA is supported | ||
82 | AVX10_128 // If set indicates that AVX10 128-bit vector support is present | ||
83 | AVX10_256 // If set indicates that AVX10 256-bit vector support is present | ||
84 | AVX10_512 // If set indicates that AVX10 512-bit vector support is present | ||
85 | AVX2 // AVX2 functions | ||
86 | AVX512BF16 // AVX-512 BFLOAT16 Instructions | ||
87 | AVX512BITALG // AVX-512 Bit Algorithms | ||
88 | AVX512BW // AVX-512 Byte and Word Instructions | ||
89 | AVX512CD // AVX-512 Conflict Detection Instructions | ||
90 | AVX512DQ // AVX-512 Doubleword and Quadword Instructions | ||
91 | AVX512ER // AVX-512 Exponential and Reciprocal Instructions | ||
92 | AVX512F // AVX-512 Foundation | ||
93 | AVX512FP16 // AVX-512 FP16 Instructions | ||
94 | AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions | ||
95 | AVX512PF // AVX-512 Prefetch Instructions | ||
96 | AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions | ||
97 | AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 | ||
98 | AVX512VL // AVX-512 Vector Length Extensions | ||
99 | AVX512VNNI // AVX-512 Vector Neural Network Instructions | ||
100 | AVX512VP2INTERSECT // AVX-512 Intersect for D/Q | ||
101 | AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword | ||
102 | AVXIFMA // AVX-IFMA instructions | ||
103 | AVXNECONVERT // AVX-NE-CONVERT instructions | ||
104 | AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one | ||
105 | AVXVNNI // AVX (VEX encoded) VNNI neural network instructions | ||
106 | AVXVNNIINT8 // AVX-VNNI-INT8 instructions | ||
107 | BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 | ||
108 | BMI1 // Bit Manipulation Instruction Set 1 | ||
109 | BMI2 // Bit Manipulation Instruction Set 2 | ||
110 | CETIBT // Intel CET Indirect Branch Tracking | ||
111 | CETSS // Intel CET Shadow Stack | ||
112 | CLDEMOTE // Cache Line Demote | ||
113 | CLMUL // Carry-less Multiplication | ||
114 | CLZERO // CLZERO instruction supported | ||
115 | CMOV // i686 CMOV | ||
116 | CMPCCXADD // CMPCCXADD instructions | ||
117 | CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB | ||
118 | CMPXCHG8 // CMPXCHG8 instruction | ||
119 | CPBOOST // Core Performance Boost | ||
120 | CPPC // AMD: Collaborative Processor Performance Control | ||
121 | CX16 // CMPXCHG16B Instruction | ||
122 | EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ | ||
123 | ENQCMD // Enqueue Command | ||
124 | ERMS // Enhanced REP MOVSB/STOSB | ||
125 | F16C // Half-precision floating-point conversion | ||
126 | FLUSH_L1D // Flush L1D cache | ||
127 | FMA3 // Intel FMA 3. Does not imply AVX. | ||
128 | FMA4 // Bulldozer FMA4 functions | ||
129 | FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide | ||
130 | FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide | ||
131 | FSRM // Fast Short Rep Mov | ||
132 | FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 | ||
133 | FXSROPT // FXSAVE/FXRSTOR optimizations | ||
134 | GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. | ||
135 | HLE // Hardware Lock Elision | ||
136 | HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR | ||
137 | HTT // Hyperthreading (enabled) | ||
138 | HWA // Hardware assert supported. Indicates support for MSRC001_10 | ||
139 | HYBRID_CPU // This part has CPUs of more than one type. | ||
140 | HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors | ||
141 | IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) | ||
142 | IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR | ||
143 | IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) | ||
144 | IBRS // AMD: Indirect Branch Restricted Speculation | ||
145 | IBRS_PREFERRED // AMD: IBRS is preferred over software solution | ||
146 | IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection | ||
147 | IBS // Instruction Based Sampling (AMD) | ||
148 | IBSBRNTRGT // Instruction Based Sampling Feature (AMD) | ||
149 | IBSFETCHSAM // Instruction Based Sampling Feature (AMD) | ||
150 | IBSFFV // Instruction Based Sampling Feature (AMD) | ||
151 | IBSOPCNT // Instruction Based Sampling Feature (AMD) | ||
152 | IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) | ||
153 | IBSOPSAM // Instruction Based Sampling Feature (AMD) | ||
154 | IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) | ||
155 | IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) | ||
156 | IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported | ||
157 | IBS_OPDATA4 // AMD: IBS op data 4 MSR supported | ||
158 | IBS_OPFUSE // AMD: Indicates support for IbsOpFuse | ||
159 | IBS_PREVENTHOST // Disallowing IBS use by the host supported | ||
160 | IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 | ||
161 | IDPRED_CTRL // IPRED_DIS | ||
162 | INT_WBINVD // WBINVD/WBNOINVD are interruptible. | ||
163 | INVLPGB // NVLPGB and TLBSYNC instruction supported | ||
164 | KEYLOCKER // Key locker | ||
165 | KEYLOCKERW // Key locker wide | ||
166 | LAHF // LAHF/SAHF in long mode | ||
167 | LAM // If set, CPU supports Linear Address Masking | ||
168 | LBRVIRT // LBR virtualization | ||
169 | LZCNT // LZCNT instruction | ||
170 | MCAOVERFLOW // MCA overflow recovery support. | ||
171 | MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. | ||
172 | MCOMMIT // MCOMMIT instruction supported | ||
173 | MD_CLEAR // VERW clears CPU buffers | ||
174 | MMX // standard MMX | ||
175 | MMXEXT // SSE integer functions or AMD MMX ext | ||
176 | MOVBE // MOVBE instruction (big-endian) | ||
177 | MOVDIR64B // Move 64 Bytes as Direct Store | ||
178 | MOVDIRI // Move Doubleword as Direct Store | ||
179 | MOVSB_ZL // Fast Zero-Length MOVSB | ||
180 | MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | ||
181 | MPX // Intel MPX (Memory Protection Extensions) | ||
182 | MSRIRC // Instruction Retired Counter MSR available | ||
183 | MSRLIST // Read/Write List of Model Specific Registers | ||
184 | MSR_PAGEFLUSH // Page Flush MSR available | ||
185 | NRIPS // Indicates support for NRIP save on VMEXIT | ||
186 | NX // NX (No-Execute) bit | ||
187 | OSXSAVE // XSAVE enabled by OS | ||
188 | PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption | ||
189 | POPCNT // POPCNT instruction | ||
190 | PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled | ||
191 | PREFETCHI // PREFETCHIT0/1 instructions | ||
192 | PSFD // Predictive Store Forward Disable | ||
193 | RDPRU // RDPRU instruction supported | ||
194 | RDRAND // RDRAND instruction is available | ||
195 | RDSEED // RDSEED instruction is available | ||
196 | RDTSCP // RDTSCP Instruction | ||
197 | RRSBA_CTRL // Restricted RSB Alternate | ||
198 | RTM // Restricted Transactional Memory | ||
199 | RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. | ||
200 | SERIALIZE // Serialize Instruction Execution | ||
201 | SEV // AMD Secure Encrypted Virtualization supported | ||
202 | SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host | ||
203 | SEV_ALTERNATIVE // AMD SEV Alternate Injection supported | ||
204 | SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests | ||
205 | SEV_ES // AMD SEV Encrypted State supported | ||
206 | SEV_RESTRICTED // AMD SEV Restricted Injection supported | ||
207 | SEV_SNP // AMD SEV Secure Nested Paging supported | ||
208 | SGX // Software Guard Extensions | ||
209 | SGXLC // Software Guard Extensions Launch Control | ||
210 | SHA // Intel SHA Extensions | ||
211 | SME // AMD Secure Memory Encryption supported | ||
212 | SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced | ||
213 | SPEC_CTRL_SSBD // Speculative Store Bypass Disable | ||
214 | SRBDS_CTRL // SRBDS mitigation MSR available | ||
215 | SSE // SSE functions | ||
216 | SSE2 // P4 SSE functions | ||
217 | SSE3 // Prescott SSE3 functions | ||
218 | SSE4 // Penryn SSE4.1 functions | ||
219 | SSE42 // Nehalem SSE4.2 functions | ||
220 | SSE4A // AMD Barcelona microarchitecture SSE4a instructions | ||
221 | SSSE3 // Conroe SSSE3 functions | ||
222 | STIBP // Single Thread Indirect Branch Predictors | ||
223 | STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On | ||
224 | STOSB_SHORT // Fast short STOSB | ||
225 | SUCCOR // Software uncorrectable error containment and recovery capability. | ||
226 | SVM // AMD Secure Virtual Machine | ||
227 | SVMDA // Indicates support for the SVM decode assists. | ||
228 | SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control | ||
229 | SVML // AMD SVM lock. Indicates support for SVM-Lock. | ||
230 | SVMNP // AMD SVM nested paging | ||
231 | SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter | ||
232 | SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold | ||
233 | SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. | ||
234 | SYSEE // SYSENTER and SYSEXIT instructions | ||
235 | TBM // AMD Trailing Bit Manipulation | ||
236 | TDX_GUEST // Intel Trust Domain Extensions Guest | ||
237 | TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations | ||
238 | TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. | ||
239 | TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. | ||
240 | TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 | ||
241 | TSXLDTRK // Intel TSX Suspend Load Address Tracking | ||
242 | VAES // Vector AES. AVX(512) versions requires additional checks. | ||
243 | VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. | ||
244 | VMPL // AMD VM Permission Levels supported | ||
245 | VMSA_REGPROT // AMD VMSA Register Protection supported | ||
246 | VMX // Virtual Machine Extensions | ||
247 | VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. | ||
248 | VTE // AMD Virtual Transparent Encryption supported | ||
249 | WAITPKG // TPAUSE, UMONITOR, UMWAIT | ||
250 | WBNOINVD // Write Back and Do Not Invalidate Cache | ||
251 | WRMSRNS // Non-Serializing Write to Model Specific Register | ||
252 | X87 // FPU | ||
253 | XGETBV1 // Supports XGETBV with ECX = 1 | ||
254 | XOP // Bulldozer XOP functions | ||
255 | XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV | ||
256 | XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. | ||
257 | XSAVEOPT // XSAVEOPT available | ||
258 | XSAVES // Supports XSAVES/XRSTORS and IA32_XSS | ||
259 | |||
260 | // ARM features: | ||
261 | AESARM // AES instructions | ||
262 | ARMCPUID // Some CPU ID registers readable at user-level | ||
263 | ASIMD // Advanced SIMD | ||
264 | ASIMDDP // SIMD Dot Product | ||
265 | ASIMDHP // Advanced SIMD half-precision floating point | ||
266 | ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) | ||
267 | ATOMICS // Large System Extensions (LSE) | ||
268 | CRC32 // CRC32/CRC32C instructions | ||
269 | DCPOP // Data cache clean to Point of Persistence (DC CVAP) | ||
270 | EVTSTRM // Generic timer | ||
271 | FCMA // Floatin point complex number addition and multiplication | ||
272 | FP // Single-precision and double-precision floating point | ||
273 | FPHP // Half-precision floating point | ||
274 | GPA // Generic Pointer Authentication | ||
275 | JSCVT // Javascript-style double->int convert (FJCVTZS) | ||
276 | LRCPC // Weaker release consistency (LDAPR, etc) | ||
277 | PMULL // Polynomial Multiply instructions (PMULL/PMULL2) | ||
278 | SHA1 // SHA-1 instructions (SHA1C, etc) | ||
279 | SHA2 // SHA-2 instructions (SHA256H, etc) | ||
280 | SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) | ||
281 | SHA512 // SHA512 instructions | ||
282 | SM3 // SM3 instructions | ||
283 | SM4 // SM4 instructions | ||
284 | SVE // Scalable Vector Extension | ||
285 | // Keep it last. It automatically defines the size of []flagSet | ||
286 | lastID | ||
287 | |||
288 | firstID FeatureID = UNKNOWN + 1 | ||
289 | ) | ||
290 | |||
291 | // CPUInfo contains information about the detected system CPU. | ||
292 | type CPUInfo struct { | ||
293 | BrandName string // Brand name reported by the CPU | ||
294 | VendorID Vendor // Comparable CPU vendor ID | ||
295 | VendorString string // Raw vendor string. | ||
296 | featureSet flagSet // Features of the CPU | ||
297 | PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. | ||
298 | ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. | ||
299 | LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. | ||
300 | Family int // CPU family number | ||
301 | Model int // CPU model number | ||
302 | Stepping int // CPU stepping info | ||
303 | CacheLine int // Cache line size in bytes. Will be 0 if undetectable. | ||
304 | Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. | ||
305 | BoostFreq int64 // Max clock speed, if known, 0 otherwise | ||
306 | Cache struct { | ||
307 | L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected | ||
308 | L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected | ||
309 | L2 int // L2 Cache (per core or shared). Will be -1 if undetected | ||
310 | L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected | ||
311 | } | ||
312 | SGX SGXSupport | ||
313 | AVX10Level uint8 | ||
314 | maxFunc uint32 | ||
315 | maxExFunc uint32 | ||
316 | } | ||
317 | |||
318 | var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) | ||
319 | var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) | ||
320 | var xgetbv func(index uint32) (eax, edx uint32) | ||
321 | var rdtscpAsm func() (eax, ebx, ecx, edx uint32) | ||
322 | var darwinHasAVX512 = func() bool { return false } | ||
323 | |||
324 | // CPU contains information about the CPU as detected on startup, | ||
325 | // or when Detect last was called. | ||
326 | // | ||
327 | // Use this as the primary entry point to you data. | ||
328 | var CPU CPUInfo | ||
329 | |||
330 | func init() { | ||
331 | initCPU() | ||
332 | Detect() | ||
333 | } | ||
334 | |||
335 | // Detect will re-detect current CPU info. | ||
336 | // This will replace the content of the exported CPU variable. | ||
337 | // | ||
338 | // Unless you expect the CPU to change while you are running your program | ||
339 | // you should not need to call this function. | ||
340 | // If you call this, you must ensure that no other goroutine is accessing the | ||
341 | // exported CPU variable. | ||
342 | func Detect() { | ||
343 | // Set defaults | ||
344 | CPU.ThreadsPerCore = 1 | ||
345 | CPU.Cache.L1I = -1 | ||
346 | CPU.Cache.L1D = -1 | ||
347 | CPU.Cache.L2 = -1 | ||
348 | CPU.Cache.L3 = -1 | ||
349 | safe := true | ||
350 | if detectArmFlag != nil { | ||
351 | safe = !*detectArmFlag | ||
352 | } | ||
353 | addInfo(&CPU, safe) | ||
354 | if displayFeats != nil && *displayFeats { | ||
355 | fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) | ||
356 | // Exit with non-zero so tests will print value. | ||
357 | os.Exit(1) | ||
358 | } | ||
359 | if disableFlag != nil { | ||
360 | s := strings.Split(*disableFlag, ",") | ||
361 | for _, feat := range s { | ||
362 | feat := ParseFeature(strings.TrimSpace(feat)) | ||
363 | if feat != UNKNOWN { | ||
364 | CPU.featureSet.unset(feat) | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | |||
370 | // DetectARM will detect ARM64 features. | ||
371 | // This is NOT done automatically since it can potentially crash | ||
372 | // if the OS does not handle the command. | ||
373 | // If in the future this can be done safely this function may not | ||
374 | // do anything. | ||
375 | func DetectARM() { | ||
376 | addInfo(&CPU, false) | ||
377 | } | ||
378 | |||
379 | var detectArmFlag *bool | ||
380 | var displayFeats *bool | ||
381 | var disableFlag *string | ||
382 | |||
383 | // Flags will enable flags. | ||
384 | // This must be called *before* flag.Parse AND | ||
385 | // Detect must be called after the flags have been parsed. | ||
386 | // Note that this means that any detection used in init() functions | ||
387 | // will not contain these flags. | ||
388 | func Flags() { | ||
389 | disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") | ||
390 | displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") | ||
391 | detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") | ||
392 | } | ||
393 | |||
394 | // Supports returns whether the CPU supports all of the requested features. | ||
395 | func (c CPUInfo) Supports(ids ...FeatureID) bool { | ||
396 | for _, id := range ids { | ||
397 | if !c.featureSet.inSet(id) { | ||
398 | return false | ||
399 | } | ||
400 | } | ||
401 | return true | ||
402 | } | ||
403 | |||
404 | // Has allows for checking a single feature. | ||
405 | // Should be inlined by the compiler. | ||
406 | func (c *CPUInfo) Has(id FeatureID) bool { | ||
407 | return c.featureSet.inSet(id) | ||
408 | } | ||
409 | |||
410 | // AnyOf returns whether the CPU supports one or more of the requested features. | ||
411 | func (c CPUInfo) AnyOf(ids ...FeatureID) bool { | ||
412 | for _, id := range ids { | ||
413 | if c.featureSet.inSet(id) { | ||
414 | return true | ||
415 | } | ||
416 | } | ||
417 | return false | ||
418 | } | ||
419 | |||
420 | // Features contains several features combined for a fast check using | ||
421 | // CpuInfo.HasAll | ||
422 | type Features *flagSet | ||
423 | |||
424 | // CombineFeatures allows to combine several features for a close to constant time lookup. | ||
425 | func CombineFeatures(ids ...FeatureID) Features { | ||
426 | var v flagSet | ||
427 | for _, id := range ids { | ||
428 | v.set(id) | ||
429 | } | ||
430 | return &v | ||
431 | } | ||
432 | |||
433 | func (c *CPUInfo) HasAll(f Features) bool { | ||
434 | return c.featureSet.hasSetP(f) | ||
435 | } | ||
436 | |||
437 | // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels | ||
438 | var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) | ||
439 | var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) | ||
440 | var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) | ||
441 | var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) | ||
442 | var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) | ||
443 | |||
444 | // X64Level returns the microarchitecture level detected on the CPU. | ||
445 | // If features are lacking or non x64 mode, 0 is returned. | ||
446 | // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels | ||
447 | func (c CPUInfo) X64Level() int { | ||
448 | if !c.featureSet.hasOneOf(oneOfLevel) { | ||
449 | return 0 | ||
450 | } | ||
451 | if c.featureSet.hasSetP(level4Features) { | ||
452 | return 4 | ||
453 | } | ||
454 | if c.featureSet.hasSetP(level3Features) { | ||
455 | return 3 | ||
456 | } | ||
457 | if c.featureSet.hasSetP(level2Features) { | ||
458 | return 2 | ||
459 | } | ||
460 | if c.featureSet.hasSetP(level1Features) { | ||
461 | return 1 | ||
462 | } | ||
463 | return 0 | ||
464 | } | ||
465 | |||
466 | // Disable will disable one or several features. | ||
467 | func (c *CPUInfo) Disable(ids ...FeatureID) bool { | ||
468 | for _, id := range ids { | ||
469 | c.featureSet.unset(id) | ||
470 | } | ||
471 | return true | ||
472 | } | ||
473 | |||
474 | // Enable will disable one or several features even if they were undetected. | ||
475 | // This is of course not recommended for obvious reasons. | ||
476 | func (c *CPUInfo) Enable(ids ...FeatureID) bool { | ||
477 | for _, id := range ids { | ||
478 | c.featureSet.set(id) | ||
479 | } | ||
480 | return true | ||
481 | } | ||
482 | |||
483 | // IsVendor returns true if vendor is recognized as Intel | ||
484 | func (c CPUInfo) IsVendor(v Vendor) bool { | ||
485 | return c.VendorID == v | ||
486 | } | ||
487 | |||
488 | // FeatureSet returns all available features as strings. | ||
489 | func (c CPUInfo) FeatureSet() []string { | ||
490 | s := make([]string, 0, c.featureSet.nEnabled()) | ||
491 | s = append(s, c.featureSet.Strings()...) | ||
492 | return s | ||
493 | } | ||
494 | |||
495 | // RTCounter returns the 64-bit time-stamp counter | ||
496 | // Uses the RDTSCP instruction. The value 0 is returned | ||
497 | // if the CPU does not support the instruction. | ||
498 | func (c CPUInfo) RTCounter() uint64 { | ||
499 | if !c.Supports(RDTSCP) { | ||
500 | return 0 | ||
501 | } | ||
502 | a, _, _, d := rdtscpAsm() | ||
503 | return uint64(a) | (uint64(d) << 32) | ||
504 | } | ||
505 | |||
506 | // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. | ||
507 | // This variable is OS dependent, but on Linux contains information | ||
508 | // about the current cpu/core the code is running on. | ||
509 | // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. | ||
510 | func (c CPUInfo) Ia32TscAux() uint32 { | ||
511 | if !c.Supports(RDTSCP) { | ||
512 | return 0 | ||
513 | } | ||
514 | _, _, ecx, _ := rdtscpAsm() | ||
515 | return ecx | ||
516 | } | ||
517 | |||
518 | // LogicalCPU will return the Logical CPU the code is currently executing on. | ||
519 | // This is likely to change when the OS re-schedules the running thread | ||
520 | // to another CPU. | ||
521 | // If the current core cannot be detected, -1 will be returned. | ||
522 | func (c CPUInfo) LogicalCPU() int { | ||
523 | if c.maxFunc < 1 { | ||
524 | return -1 | ||
525 | } | ||
526 | _, ebx, _, _ := cpuid(1) | ||
527 | return int(ebx >> 24) | ||
528 | } | ||
529 | |||
530 | // frequencies tries to compute the clock speed of the CPU. If leaf 15 is | ||
531 | // supported, use it, otherwise parse the brand string. Yes, really. | ||
532 | func (c *CPUInfo) frequencies() { | ||
533 | c.Hz, c.BoostFreq = 0, 0 | ||
534 | mfi := maxFunctionID() | ||
535 | if mfi >= 0x15 { | ||
536 | eax, ebx, ecx, _ := cpuid(0x15) | ||
537 | if eax != 0 && ebx != 0 && ecx != 0 { | ||
538 | c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) | ||
539 | } | ||
540 | } | ||
541 | if mfi >= 0x16 { | ||
542 | a, b, _, _ := cpuid(0x16) | ||
543 | // Base... | ||
544 | if a&0xffff > 0 { | ||
545 | c.Hz = int64(a&0xffff) * 1_000_000 | ||
546 | } | ||
547 | // Boost... | ||
548 | if b&0xffff > 0 { | ||
549 | c.BoostFreq = int64(b&0xffff) * 1_000_000 | ||
550 | } | ||
551 | } | ||
552 | if c.Hz > 0 { | ||
553 | return | ||
554 | } | ||
555 | |||
556 | // computeHz determines the official rated speed of a CPU from its brand | ||
557 | // string. This insanity is *actually the official documented way to do | ||
558 | // this according to Intel*, prior to leaf 0x15 existing. The official | ||
559 | // documentation only shows this working for exactly `x.xx` or `xxxx` | ||
560 | // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other | ||
561 | // sizes. | ||
562 | model := c.BrandName | ||
563 | hz := strings.LastIndex(model, "Hz") | ||
564 | if hz < 3 { | ||
565 | return | ||
566 | } | ||
567 | var multiplier int64 | ||
568 | switch model[hz-1] { | ||
569 | case 'M': | ||
570 | multiplier = 1000 * 1000 | ||
571 | case 'G': | ||
572 | multiplier = 1000 * 1000 * 1000 | ||
573 | case 'T': | ||
574 | multiplier = 1000 * 1000 * 1000 * 1000 | ||
575 | } | ||
576 | if multiplier == 0 { | ||
577 | return | ||
578 | } | ||
579 | freq := int64(0) | ||
580 | divisor := int64(0) | ||
581 | decimalShift := int64(1) | ||
582 | var i int | ||
583 | for i = hz - 2; i >= 0 && model[i] != ' '; i-- { | ||
584 | if model[i] >= '0' && model[i] <= '9' { | ||
585 | freq += int64(model[i]-'0') * decimalShift | ||
586 | decimalShift *= 10 | ||
587 | } else if model[i] == '.' { | ||
588 | if divisor != 0 { | ||
589 | return | ||
590 | } | ||
591 | divisor = decimalShift | ||
592 | } else { | ||
593 | return | ||
594 | } | ||
595 | } | ||
596 | // we didn't find a space | ||
597 | if i < 0 { | ||
598 | return | ||
599 | } | ||
600 | if divisor != 0 { | ||
601 | c.Hz = (freq * multiplier) / divisor | ||
602 | return | ||
603 | } | ||
604 | c.Hz = freq * multiplier | ||
605 | } | ||
606 | |||
607 | // VM Will return true if the cpu id indicates we are in | ||
608 | // a virtual machine. | ||
609 | func (c CPUInfo) VM() bool { | ||
610 | return CPU.featureSet.inSet(HYPERVISOR) | ||
611 | } | ||
612 | |||
613 | // flags contains detected cpu features and characteristics | ||
614 | type flags uint64 | ||
615 | |||
616 | // log2(bits_in_uint64) | ||
617 | const flagBitsLog2 = 6 | ||
618 | const flagBits = 1 << flagBitsLog2 | ||
619 | const flagMask = flagBits - 1 | ||
620 | |||
621 | // flagSet contains detected cpu features and characteristics in an array of flags | ||
622 | type flagSet [(lastID + flagMask) / flagBits]flags | ||
623 | |||
624 | func (s *flagSet) inSet(feat FeatureID) bool { | ||
625 | return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 | ||
626 | } | ||
627 | |||
628 | func (s *flagSet) set(feat FeatureID) { | ||
629 | s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) | ||
630 | } | ||
631 | |||
632 | // setIf will set a feature if boolean is true. | ||
633 | func (s *flagSet) setIf(cond bool, features ...FeatureID) { | ||
634 | if cond { | ||
635 | for _, offset := range features { | ||
636 | s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) | ||
637 | } | ||
638 | } | ||
639 | } | ||
640 | |||
641 | func (s *flagSet) unset(offset FeatureID) { | ||
642 | bit := flags(1 << (offset & flagMask)) | ||
643 | s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit | ||
644 | } | ||
645 | |||
646 | // or with another flagset. | ||
647 | func (s *flagSet) or(other flagSet) { | ||
648 | for i, v := range other[:] { | ||
649 | s[i] |= v | ||
650 | } | ||
651 | } | ||
652 | |||
653 | // hasSet returns whether all features are present. | ||
654 | func (s *flagSet) hasSet(other flagSet) bool { | ||
655 | for i, v := range other[:] { | ||
656 | if s[i]&v != v { | ||
657 | return false | ||
658 | } | ||
659 | } | ||
660 | return true | ||
661 | } | ||
662 | |||
663 | // hasSet returns whether all features are present. | ||
664 | func (s *flagSet) hasSetP(other *flagSet) bool { | ||
665 | for i, v := range other[:] { | ||
666 | if s[i]&v != v { | ||
667 | return false | ||
668 | } | ||
669 | } | ||
670 | return true | ||
671 | } | ||
672 | |||
673 | // hasOneOf returns whether one or more features are present. | ||
674 | func (s *flagSet) hasOneOf(other *flagSet) bool { | ||
675 | for i, v := range other[:] { | ||
676 | if s[i]&v != 0 { | ||
677 | return true | ||
678 | } | ||
679 | } | ||
680 | return false | ||
681 | } | ||
682 | |||
683 | // nEnabled will return the number of enabled flags. | ||
684 | func (s *flagSet) nEnabled() (n int) { | ||
685 | for _, v := range s[:] { | ||
686 | n += bits.OnesCount64(uint64(v)) | ||
687 | } | ||
688 | return n | ||
689 | } | ||
690 | |||
691 | func flagSetWith(feat ...FeatureID) flagSet { | ||
692 | var res flagSet | ||
693 | for _, f := range feat { | ||
694 | res.set(f) | ||
695 | } | ||
696 | return res | ||
697 | } | ||
698 | |||
699 | // ParseFeature will parse the string and return the ID of the matching feature. | ||
700 | // Will return UNKNOWN if not found. | ||
701 | func ParseFeature(s string) FeatureID { | ||
702 | s = strings.ToUpper(s) | ||
703 | for i := firstID; i < lastID; i++ { | ||
704 | if i.String() == s { | ||
705 | return i | ||
706 | } | ||
707 | } | ||
708 | return UNKNOWN | ||
709 | } | ||
710 | |||
711 | // Strings returns an array of the detected features for FlagsSet. | ||
712 | func (s flagSet) Strings() []string { | ||
713 | if len(s) == 0 { | ||
714 | return []string{""} | ||
715 | } | ||
716 | r := make([]string, 0) | ||
717 | for i := firstID; i < lastID; i++ { | ||
718 | if s.inSet(i) { | ||
719 | r = append(r, i.String()) | ||
720 | } | ||
721 | } | ||
722 | return r | ||
723 | } | ||
724 | |||
725 | func maxExtendedFunction() uint32 { | ||
726 | eax, _, _, _ := cpuid(0x80000000) | ||
727 | return eax | ||
728 | } | ||
729 | |||
730 | func maxFunctionID() uint32 { | ||
731 | a, _, _, _ := cpuid(0) | ||
732 | return a | ||
733 | } | ||
734 | |||
735 | func brandName() string { | ||
736 | if maxExtendedFunction() >= 0x80000004 { | ||
737 | v := make([]uint32, 0, 48) | ||
738 | for i := uint32(0); i < 3; i++ { | ||
739 | a, b, c, d := cpuid(0x80000002 + i) | ||
740 | v = append(v, a, b, c, d) | ||
741 | } | ||
742 | return strings.Trim(string(valAsString(v...)), " ") | ||
743 | } | ||
744 | return "unknown" | ||
745 | } | ||
746 | |||
747 | func threadsPerCore() int { | ||
748 | mfi := maxFunctionID() | ||
749 | vend, _ := vendorID() | ||
750 | |||
751 | if mfi < 0x4 || (vend != Intel && vend != AMD) { | ||
752 | return 1 | ||
753 | } | ||
754 | |||
755 | if mfi < 0xb { | ||
756 | if vend != Intel { | ||
757 | return 1 | ||
758 | } | ||
759 | _, b, _, d := cpuid(1) | ||
760 | if (d & (1 << 28)) != 0 { | ||
761 | // v will contain logical core count | ||
762 | v := (b >> 16) & 255 | ||
763 | if v > 1 { | ||
764 | a4, _, _, _ := cpuid(4) | ||
765 | // physical cores | ||
766 | v2 := (a4 >> 26) + 1 | ||
767 | if v2 > 0 { | ||
768 | return int(v) / int(v2) | ||
769 | } | ||
770 | } | ||
771 | } | ||
772 | return 1 | ||
773 | } | ||
774 | _, b, _, _ := cpuidex(0xb, 0) | ||
775 | if b&0xffff == 0 { | ||
776 | if vend == AMD { | ||
777 | // Workaround for AMD returning 0, assume 2 if >= Zen 2 | ||
778 | // It will be more correct than not. | ||
779 | fam, _, _ := familyModel() | ||
780 | _, _, _, d := cpuid(1) | ||
781 | if (d&(1<<28)) != 0 && fam >= 23 { | ||
782 | return 2 | ||
783 | } | ||
784 | } | ||
785 | return 1 | ||
786 | } | ||
787 | return int(b & 0xffff) | ||
788 | } | ||
789 | |||
790 | func logicalCores() int { | ||
791 | mfi := maxFunctionID() | ||
792 | v, _ := vendorID() | ||
793 | switch v { | ||
794 | case Intel: | ||
795 | // Use this on old Intel processors | ||
796 | if mfi < 0xb { | ||
797 | if mfi < 1 { | ||
798 | return 0 | ||
799 | } | ||
800 | // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) | ||
801 | // that can be assigned to logical processors in a physical package. | ||
802 | // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. | ||
803 | _, ebx, _, _ := cpuid(1) | ||
804 | logical := (ebx >> 16) & 0xff | ||
805 | return int(logical) | ||
806 | } | ||
807 | _, b, _, _ := cpuidex(0xb, 1) | ||
808 | return int(b & 0xffff) | ||
809 | case AMD, Hygon: | ||
810 | _, b, _, _ := cpuid(1) | ||
811 | return int((b >> 16) & 0xff) | ||
812 | default: | ||
813 | return 0 | ||
814 | } | ||
815 | } | ||
816 | |||
817 | func familyModel() (family, model, stepping int) { | ||
818 | if maxFunctionID() < 0x1 { | ||
819 | return 0, 0, 0 | ||
820 | } | ||
821 | eax, _, _, _ := cpuid(1) | ||
822 | // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. | ||
823 | family = int((eax >> 8) & 0xf) | ||
824 | extFam := family == 0x6 // Intel is 0x6, needs extended model. | ||
825 | if family == 0xf { | ||
826 | // Add ExtFamily | ||
827 | family += int((eax >> 20) & 0xff) | ||
828 | extFam = true | ||
829 | } | ||
830 | // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. | ||
831 | model = int((eax >> 4) & 0xf) | ||
832 | if extFam { | ||
833 | // Add ExtModel | ||
834 | model += int((eax >> 12) & 0xf0) | ||
835 | } | ||
836 | stepping = int(eax & 0xf) | ||
837 | return family, model, stepping | ||
838 | } | ||
839 | |||
840 | func physicalCores() int { | ||
841 | v, _ := vendorID() | ||
842 | switch v { | ||
843 | case Intel: | ||
844 | return logicalCores() / threadsPerCore() | ||
845 | case AMD, Hygon: | ||
846 | lc := logicalCores() | ||
847 | tpc := threadsPerCore() | ||
848 | if lc > 0 && tpc > 0 { | ||
849 | return lc / tpc | ||
850 | } | ||
851 | |||
852 | // The following is inaccurate on AMD EPYC 7742 64-Core Processor | ||
853 | if maxExtendedFunction() >= 0x80000008 { | ||
854 | _, _, c, _ := cpuid(0x80000008) | ||
855 | if c&0xff > 0 { | ||
856 | return int(c&0xff) + 1 | ||
857 | } | ||
858 | } | ||
859 | } | ||
860 | return 0 | ||
861 | } | ||
862 | |||
863 | // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID | ||
864 | var vendorMapping = map[string]Vendor{ | ||
865 | "AMDisbetter!": AMD, | ||
866 | "AuthenticAMD": AMD, | ||
867 | "CentaurHauls": VIA, | ||
868 | "GenuineIntel": Intel, | ||
869 | "TransmetaCPU": Transmeta, | ||
870 | "GenuineTMx86": Transmeta, | ||
871 | "Geode by NSC": NSC, | ||
872 | "VIA VIA VIA ": VIA, | ||
873 | "KVMKVMKVMKVM": KVM, | ||
874 | "Microsoft Hv": MSVM, | ||
875 | "VMwareVMware": VMware, | ||
876 | "XenVMMXenVMM": XenHVM, | ||
877 | "bhyve bhyve ": Bhyve, | ||
878 | "HygonGenuine": Hygon, | ||
879 | "Vortex86 SoC": SiS, | ||
880 | "SiS SiS SiS ": SiS, | ||
881 | "RiseRiseRise": SiS, | ||
882 | "Genuine RDC": RDC, | ||
883 | } | ||
884 | |||
885 | func vendorID() (Vendor, string) { | ||
886 | _, b, c, d := cpuid(0) | ||
887 | v := string(valAsString(b, d, c)) | ||
888 | vend, ok := vendorMapping[v] | ||
889 | if !ok { | ||
890 | return VendorUnknown, v | ||
891 | } | ||
892 | return vend, v | ||
893 | } | ||
894 | |||
895 | func cacheLine() int { | ||
896 | if maxFunctionID() < 0x1 { | ||
897 | return 0 | ||
898 | } | ||
899 | |||
900 | _, ebx, _, _ := cpuid(1) | ||
901 | cache := (ebx & 0xff00) >> 5 // cflush size | ||
902 | if cache == 0 && maxExtendedFunction() >= 0x80000006 { | ||
903 | _, _, ecx, _ := cpuid(0x80000006) | ||
904 | cache = ecx & 0xff // cacheline size | ||
905 | } | ||
906 | // TODO: Read from Cache and TLB Information | ||
907 | return int(cache) | ||
908 | } | ||
909 | |||
910 | func (c *CPUInfo) cacheSize() { | ||
911 | c.Cache.L1D = -1 | ||
912 | c.Cache.L1I = -1 | ||
913 | c.Cache.L2 = -1 | ||
914 | c.Cache.L3 = -1 | ||
915 | vendor, _ := vendorID() | ||
916 | switch vendor { | ||
917 | case Intel: | ||
918 | if maxFunctionID() < 4 { | ||
919 | return | ||
920 | } | ||
921 | c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 | ||
922 | for i := uint32(0); ; i++ { | ||
923 | eax, ebx, ecx, _ := cpuidex(4, i) | ||
924 | cacheType := eax & 15 | ||
925 | if cacheType == 0 { | ||
926 | break | ||
927 | } | ||
928 | cacheLevel := (eax >> 5) & 7 | ||
929 | coherency := int(ebx&0xfff) + 1 | ||
930 | partitions := int((ebx>>12)&0x3ff) + 1 | ||
931 | associativity := int((ebx>>22)&0x3ff) + 1 | ||
932 | sets := int(ecx) + 1 | ||
933 | size := associativity * partitions * coherency * sets | ||
934 | switch cacheLevel { | ||
935 | case 1: | ||
936 | if cacheType == 1 { | ||
937 | // 1 = Data Cache | ||
938 | c.Cache.L1D = size | ||
939 | } else if cacheType == 2 { | ||
940 | // 2 = Instruction Cache | ||
941 | c.Cache.L1I = size | ||
942 | } else { | ||
943 | if c.Cache.L1D < 0 { | ||
944 | c.Cache.L1I = size | ||
945 | } | ||
946 | if c.Cache.L1I < 0 { | ||
947 | c.Cache.L1I = size | ||
948 | } | ||
949 | } | ||
950 | case 2: | ||
951 | c.Cache.L2 = size | ||
952 | case 3: | ||
953 | c.Cache.L3 = size | ||
954 | } | ||
955 | } | ||
956 | case AMD, Hygon: | ||
957 | // Untested. | ||
958 | if maxExtendedFunction() < 0x80000005 { | ||
959 | return | ||
960 | } | ||
961 | _, _, ecx, edx := cpuid(0x80000005) | ||
962 | c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) | ||
963 | c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) | ||
964 | |||
965 | if maxExtendedFunction() < 0x80000006 { | ||
966 | return | ||
967 | } | ||
968 | _, _, ecx, _ = cpuid(0x80000006) | ||
969 | c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) | ||
970 | |||
971 | // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties | ||
972 | if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { | ||
973 | return | ||
974 | } | ||
975 | |||
976 | // Xen Hypervisor is buggy and returns the same entry no matter ECX value. | ||
977 | // Hack: When we encounter the same entry 100 times we break. | ||
978 | nSame := 0 | ||
979 | var last uint32 | ||
980 | for i := uint32(0); i < math.MaxUint32; i++ { | ||
981 | eax, ebx, ecx, _ := cpuidex(0x8000001D, i) | ||
982 | |||
983 | level := (eax >> 5) & 7 | ||
984 | cacheNumSets := ecx + 1 | ||
985 | cacheLineSize := 1 + (ebx & 2047) | ||
986 | cachePhysPartitions := 1 + ((ebx >> 12) & 511) | ||
987 | cacheNumWays := 1 + ((ebx >> 22) & 511) | ||
988 | |||
989 | typ := eax & 15 | ||
990 | size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) | ||
991 | if typ == 0 { | ||
992 | return | ||
993 | } | ||
994 | |||
995 | // Check for the same value repeated. | ||
996 | comb := eax ^ ebx ^ ecx | ||
997 | if comb == last { | ||
998 | nSame++ | ||
999 | if nSame == 100 { | ||
1000 | return | ||
1001 | } | ||
1002 | } | ||
1003 | last = comb | ||
1004 | |||
1005 | switch level { | ||
1006 | case 1: | ||
1007 | switch typ { | ||
1008 | case 1: | ||
1009 | // Data cache | ||
1010 | c.Cache.L1D = size | ||
1011 | case 2: | ||
1012 | // Inst cache | ||
1013 | c.Cache.L1I = size | ||
1014 | default: | ||
1015 | if c.Cache.L1D < 0 { | ||
1016 | c.Cache.L1I = size | ||
1017 | } | ||
1018 | if c.Cache.L1I < 0 { | ||
1019 | c.Cache.L1I = size | ||
1020 | } | ||
1021 | } | ||
1022 | case 2: | ||
1023 | c.Cache.L2 = size | ||
1024 | case 3: | ||
1025 | c.Cache.L3 = size | ||
1026 | } | ||
1027 | } | ||
1028 | } | ||
1029 | } | ||
1030 | |||
1031 | type SGXEPCSection struct { | ||
1032 | BaseAddress uint64 | ||
1033 | EPCSize uint64 | ||
1034 | } | ||
1035 | |||
1036 | type SGXSupport struct { | ||
1037 | Available bool | ||
1038 | LaunchControl bool | ||
1039 | SGX1Supported bool | ||
1040 | SGX2Supported bool | ||
1041 | MaxEnclaveSizeNot64 int64 | ||
1042 | MaxEnclaveSize64 int64 | ||
1043 | EPCSections []SGXEPCSection | ||
1044 | } | ||
1045 | |||
1046 | func hasSGX(available, lc bool) (rval SGXSupport) { | ||
1047 | rval.Available = available | ||
1048 | |||
1049 | if !available { | ||
1050 | return | ||
1051 | } | ||
1052 | |||
1053 | rval.LaunchControl = lc | ||
1054 | |||
1055 | a, _, _, d := cpuidex(0x12, 0) | ||
1056 | rval.SGX1Supported = a&0x01 != 0 | ||
1057 | rval.SGX2Supported = a&0x02 != 0 | ||
1058 | rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 | ||
1059 | rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 | ||
1060 | rval.EPCSections = make([]SGXEPCSection, 0) | ||
1061 | |||
1062 | for subleaf := uint32(2); subleaf < 2+8; subleaf++ { | ||
1063 | eax, ebx, ecx, edx := cpuidex(0x12, subleaf) | ||
1064 | leafType := eax & 0xf | ||
1065 | |||
1066 | if leafType == 0 { | ||
1067 | // Invalid subleaf, stop iterating | ||
1068 | break | ||
1069 | } else if leafType == 1 { | ||
1070 | // EPC Section subleaf | ||
1071 | baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) | ||
1072 | size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) | ||
1073 | |||
1074 | section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} | ||
1075 | rval.EPCSections = append(rval.EPCSections, section) | ||
1076 | } | ||
1077 | } | ||
1078 | |||
1079 | return | ||
1080 | } | ||
1081 | |||
1082 | func support() flagSet { | ||
1083 | var fs flagSet | ||
1084 | mfi := maxFunctionID() | ||
1085 | vend, _ := vendorID() | ||
1086 | if mfi < 0x1 { | ||
1087 | return fs | ||
1088 | } | ||
1089 | family, model, _ := familyModel() | ||
1090 | |||
1091 | _, _, c, d := cpuid(1) | ||
1092 | fs.setIf((d&(1<<0)) != 0, X87) | ||
1093 | fs.setIf((d&(1<<8)) != 0, CMPXCHG8) | ||
1094 | fs.setIf((d&(1<<11)) != 0, SYSEE) | ||
1095 | fs.setIf((d&(1<<15)) != 0, CMOV) | ||
1096 | fs.setIf((d&(1<<23)) != 0, MMX) | ||
1097 | fs.setIf((d&(1<<24)) != 0, FXSR) | ||
1098 | fs.setIf((d&(1<<25)) != 0, FXSROPT) | ||
1099 | fs.setIf((d&(1<<25)) != 0, SSE) | ||
1100 | fs.setIf((d&(1<<26)) != 0, SSE2) | ||
1101 | fs.setIf((c&1) != 0, SSE3) | ||
1102 | fs.setIf((c&(1<<5)) != 0, VMX) | ||
1103 | fs.setIf((c&(1<<9)) != 0, SSSE3) | ||
1104 | fs.setIf((c&(1<<19)) != 0, SSE4) | ||
1105 | fs.setIf((c&(1<<20)) != 0, SSE42) | ||
1106 | fs.setIf((c&(1<<25)) != 0, AESNI) | ||
1107 | fs.setIf((c&(1<<1)) != 0, CLMUL) | ||
1108 | fs.setIf(c&(1<<22) != 0, MOVBE) | ||
1109 | fs.setIf(c&(1<<23) != 0, POPCNT) | ||
1110 | fs.setIf(c&(1<<30) != 0, RDRAND) | ||
1111 | |||
1112 | // This bit has been reserved by Intel & AMD for use by hypervisors, | ||
1113 | // and indicates the presence of a hypervisor. | ||
1114 | fs.setIf(c&(1<<31) != 0, HYPERVISOR) | ||
1115 | fs.setIf(c&(1<<29) != 0, F16C) | ||
1116 | fs.setIf(c&(1<<13) != 0, CX16) | ||
1117 | |||
1118 | if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { | ||
1119 | fs.setIf(threadsPerCore() > 1, HTT) | ||
1120 | } | ||
1121 | if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { | ||
1122 | fs.setIf(threadsPerCore() > 1, HTT) | ||
1123 | } | ||
1124 | fs.setIf(c&1<<26 != 0, XSAVE) | ||
1125 | fs.setIf(c&1<<27 != 0, OSXSAVE) | ||
1126 | // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits | ||
1127 | const avxCheck = 1<<26 | 1<<27 | 1<<28 | ||
1128 | if c&avxCheck == avxCheck { | ||
1129 | // Check for OS support | ||
1130 | eax, _ := xgetbv(0) | ||
1131 | if (eax & 0x6) == 0x6 { | ||
1132 | fs.set(AVX) | ||
1133 | switch vend { | ||
1134 | case Intel: | ||
1135 | // Older than Haswell. | ||
1136 | fs.setIf(family == 6 && model < 60, AVXSLOW) | ||
1137 | case AMD: | ||
1138 | // Older than Zen 2 | ||
1139 | fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) | ||
1140 | } | ||
1141 | } | ||
1142 | } | ||
1143 | // FMA3 can be used with SSE registers, so no OS support is strictly needed. | ||
1144 | // fma3 and OSXSAVE needed. | ||
1145 | const fma3Check = 1<<12 | 1<<27 | ||
1146 | fs.setIf(c&fma3Check == fma3Check, FMA3) | ||
1147 | |||
1148 | // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. | ||
1149 | if mfi >= 7 { | ||
1150 | _, ebx, ecx, edx := cpuidex(7, 0) | ||
1151 | if fs.inSet(AVX) && (ebx&0x00000020) != 0 { | ||
1152 | fs.set(AVX2) | ||
1153 | } | ||
1154 | // CPUID.(EAX=7, ECX=0).EBX | ||
1155 | if (ebx & 0x00000008) != 0 { | ||
1156 | fs.set(BMI1) | ||
1157 | fs.setIf((ebx&0x00000100) != 0, BMI2) | ||
1158 | } | ||
1159 | fs.setIf(ebx&(1<<2) != 0, SGX) | ||
1160 | fs.setIf(ebx&(1<<4) != 0, HLE) | ||
1161 | fs.setIf(ebx&(1<<9) != 0, ERMS) | ||
1162 | fs.setIf(ebx&(1<<11) != 0, RTM) | ||
1163 | fs.setIf(ebx&(1<<14) != 0, MPX) | ||
1164 | fs.setIf(ebx&(1<<18) != 0, RDSEED) | ||
1165 | fs.setIf(ebx&(1<<19) != 0, ADX) | ||
1166 | fs.setIf(ebx&(1<<29) != 0, SHA) | ||
1167 | |||
1168 | // CPUID.(EAX=7, ECX=0).ECX | ||
1169 | fs.setIf(ecx&(1<<5) != 0, WAITPKG) | ||
1170 | fs.setIf(ecx&(1<<7) != 0, CETSS) | ||
1171 | fs.setIf(ecx&(1<<8) != 0, GFNI) | ||
1172 | fs.setIf(ecx&(1<<9) != 0, VAES) | ||
1173 | fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) | ||
1174 | fs.setIf(ecx&(1<<13) != 0, TME) | ||
1175 | fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) | ||
1176 | fs.setIf(ecx&(1<<23) != 0, KEYLOCKER) | ||
1177 | fs.setIf(ecx&(1<<27) != 0, MOVDIRI) | ||
1178 | fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) | ||
1179 | fs.setIf(ecx&(1<<29) != 0, ENQCMD) | ||
1180 | fs.setIf(ecx&(1<<30) != 0, SGXLC) | ||
1181 | |||
1182 | // CPUID.(EAX=7, ECX=0).EDX | ||
1183 | fs.setIf(edx&(1<<4) != 0, FSRM) | ||
1184 | fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) | ||
1185 | fs.setIf(edx&(1<<10) != 0, MD_CLEAR) | ||
1186 | fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) | ||
1187 | fs.setIf(edx&(1<<14) != 0, SERIALIZE) | ||
1188 | fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) | ||
1189 | fs.setIf(edx&(1<<16) != 0, TSXLDTRK) | ||
1190 | fs.setIf(edx&(1<<18) != 0, PCONFIG) | ||
1191 | fs.setIf(edx&(1<<20) != 0, CETIBT) | ||
1192 | fs.setIf(edx&(1<<26) != 0, IBPB) | ||
1193 | fs.setIf(edx&(1<<27) != 0, STIBP) | ||
1194 | fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) | ||
1195 | fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) | ||
1196 | fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) | ||
1197 | fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) | ||
1198 | |||
1199 | // CPUID.(EAX=7, ECX=1).EAX | ||
1200 | eax1, _, _, edx1 := cpuidex(7, 1) | ||
1201 | fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) | ||
1202 | fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) | ||
1203 | fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) | ||
1204 | fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) | ||
1205 | fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) | ||
1206 | fs.setIf(eax1&(1<<22) != 0, HRESET) | ||
1207 | fs.setIf(eax1&(1<<23) != 0, AVXIFMA) | ||
1208 | fs.setIf(eax1&(1<<26) != 0, LAM) | ||
1209 | |||
1210 | // CPUID.(EAX=7, ECX=1).EDX | ||
1211 | fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8) | ||
1212 | fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT) | ||
1213 | fs.setIf(edx1&(1<<14) != 0, PREFETCHI) | ||
1214 | fs.setIf(edx1&(1<<19) != 0, AVX10) | ||
1215 | fs.setIf(edx1&(1<<21) != 0, APX_F) | ||
1216 | |||
1217 | // Only detect AVX-512 features if XGETBV is supported | ||
1218 | if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { | ||
1219 | // Check for OS support | ||
1220 | eax, _ := xgetbv(0) | ||
1221 | |||
1222 | // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and | ||
1223 | // ZMM16-ZMM31 state are enabled by OS) | ||
1224 | /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). | ||
1225 | hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 | ||
1226 | if runtime.GOOS == "darwin" { | ||
1227 | hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() | ||
1228 | } | ||
1229 | if hasAVX512 { | ||
1230 | fs.setIf(ebx&(1<<16) != 0, AVX512F) | ||
1231 | fs.setIf(ebx&(1<<17) != 0, AVX512DQ) | ||
1232 | fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) | ||
1233 | fs.setIf(ebx&(1<<26) != 0, AVX512PF) | ||
1234 | fs.setIf(ebx&(1<<27) != 0, AVX512ER) | ||
1235 | fs.setIf(ebx&(1<<28) != 0, AVX512CD) | ||
1236 | fs.setIf(ebx&(1<<30) != 0, AVX512BW) | ||
1237 | fs.setIf(ebx&(1<<31) != 0, AVX512VL) | ||
1238 | // ecx | ||
1239 | fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) | ||
1240 | fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) | ||
1241 | fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) | ||
1242 | fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) | ||
1243 | fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) | ||
1244 | // edx | ||
1245 | fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) | ||
1246 | fs.setIf(edx&(1<<22) != 0, AMXBF16) | ||
1247 | fs.setIf(edx&(1<<23) != 0, AVX512FP16) | ||
1248 | fs.setIf(edx&(1<<24) != 0, AMXTILE) | ||
1249 | fs.setIf(edx&(1<<25) != 0, AMXINT8) | ||
1250 | // eax1 = CPUID.(EAX=7, ECX=1).EAX | ||
1251 | fs.setIf(eax1&(1<<5) != 0, AVX512BF16) | ||
1252 | fs.setIf(eax1&(1<<19) != 0, WRMSRNS) | ||
1253 | fs.setIf(eax1&(1<<21) != 0, AMXFP16) | ||
1254 | fs.setIf(eax1&(1<<27) != 0, MSRLIST) | ||
1255 | } | ||
1256 | } | ||
1257 | |||
1258 | // CPUID.(EAX=7, ECX=2) | ||
1259 | _, _, _, edx = cpuidex(7, 2) | ||
1260 | fs.setIf(edx&(1<<0) != 0, PSFD) | ||
1261 | fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL) | ||
1262 | fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL) | ||
1263 | fs.setIf(edx&(1<<4) != 0, BHI_CTRL) | ||
1264 | fs.setIf(edx&(1<<5) != 0, MCDT_NO) | ||
1265 | |||
1266 | // Add keylocker features. | ||
1267 | if fs.inSet(KEYLOCKER) && mfi >= 0x19 { | ||
1268 | _, ebx, _, _ := cpuidex(0x19, 0) | ||
1269 | fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4) | ||
1270 | } | ||
1271 | |||
1272 | // Add AVX10 features. | ||
1273 | if fs.inSet(AVX10) && mfi >= 0x24 { | ||
1274 | _, ebx, _, _ := cpuidex(0x24, 0) | ||
1275 | fs.setIf(ebx&(1<<16) != 0, AVX10_128) | ||
1276 | fs.setIf(ebx&(1<<17) != 0, AVX10_256) | ||
1277 | fs.setIf(ebx&(1<<18) != 0, AVX10_512) | ||
1278 | } | ||
1279 | } | ||
1280 | |||
1281 | // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) | ||
1282 | // EAX | ||
1283 | // Bit 00: XSAVEOPT is available. | ||
1284 | // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. | ||
1285 | // Bit 02: Supports XGETBV with ECX = 1 if set. | ||
1286 | // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. | ||
1287 | // Bits 31 - 04: Reserved. | ||
1288 | // EBX | ||
1289 | // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. | ||
1290 | // ECX | ||
1291 | // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. | ||
1292 | // EDX? | ||
1293 | // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. | ||
1294 | if mfi >= 0xd { | ||
1295 | if fs.inSet(XSAVE) { | ||
1296 | eax, _, _, _ := cpuidex(0xd, 1) | ||
1297 | fs.setIf(eax&(1<<0) != 0, XSAVEOPT) | ||
1298 | fs.setIf(eax&(1<<1) != 0, XSAVEC) | ||
1299 | fs.setIf(eax&(1<<2) != 0, XGETBV1) | ||
1300 | fs.setIf(eax&(1<<3) != 0, XSAVES) | ||
1301 | } | ||
1302 | } | ||
1303 | if maxExtendedFunction() >= 0x80000001 { | ||
1304 | _, _, c, d := cpuid(0x80000001) | ||
1305 | if (c & (1 << 5)) != 0 { | ||
1306 | fs.set(LZCNT) | ||
1307 | fs.set(POPCNT) | ||
1308 | } | ||
1309 | // ECX | ||
1310 | fs.setIf((c&(1<<0)) != 0, LAHF) | ||
1311 | fs.setIf((c&(1<<2)) != 0, SVM) | ||
1312 | fs.setIf((c&(1<<6)) != 0, SSE4A) | ||
1313 | fs.setIf((c&(1<<10)) != 0, IBS) | ||
1314 | fs.setIf((c&(1<<22)) != 0, TOPEXT) | ||
1315 | |||
1316 | // EDX | ||
1317 | fs.setIf(d&(1<<11) != 0, SYSCALL) | ||
1318 | fs.setIf(d&(1<<20) != 0, NX) | ||
1319 | fs.setIf(d&(1<<22) != 0, MMXEXT) | ||
1320 | fs.setIf(d&(1<<23) != 0, MMX) | ||
1321 | fs.setIf(d&(1<<24) != 0, FXSR) | ||
1322 | fs.setIf(d&(1<<25) != 0, FXSROPT) | ||
1323 | fs.setIf(d&(1<<27) != 0, RDTSCP) | ||
1324 | fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) | ||
1325 | fs.setIf(d&(1<<31) != 0, AMD3DNOW) | ||
1326 | |||
1327 | /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be | ||
1328 | * used unless the OS has AVX support. */ | ||
1329 | if fs.inSet(AVX) { | ||
1330 | fs.setIf((c&(1<<11)) != 0, XOP) | ||
1331 | fs.setIf((c&(1<<16)) != 0, FMA4) | ||
1332 | } | ||
1333 | |||
1334 | } | ||
1335 | if maxExtendedFunction() >= 0x80000007 { | ||
1336 | _, b, _, d := cpuid(0x80000007) | ||
1337 | fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) | ||
1338 | fs.setIf((b&(1<<1)) != 0, SUCCOR) | ||
1339 | fs.setIf((b&(1<<2)) != 0, HWA) | ||
1340 | fs.setIf((d&(1<<9)) != 0, CPBOOST) | ||
1341 | } | ||
1342 | |||
1343 | if maxExtendedFunction() >= 0x80000008 { | ||
1344 | _, b, _, _ := cpuid(0x80000008) | ||
1345 | fs.setIf(b&(1<<28) != 0, PSFD) | ||
1346 | fs.setIf(b&(1<<27) != 0, CPPC) | ||
1347 | fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) | ||
1348 | fs.setIf(b&(1<<23) != 0, PPIN) | ||
1349 | fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) | ||
1350 | fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) | ||
1351 | fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) | ||
1352 | fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) | ||
1353 | fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) | ||
1354 | fs.setIf(b&(1<<15) != 0, STIBP) | ||
1355 | fs.setIf(b&(1<<14) != 0, IBRS) | ||
1356 | fs.setIf((b&(1<<13)) != 0, INT_WBINVD) | ||
1357 | fs.setIf(b&(1<<12) != 0, IBPB) | ||
1358 | fs.setIf((b&(1<<9)) != 0, WBNOINVD) | ||
1359 | fs.setIf((b&(1<<8)) != 0, MCOMMIT) | ||
1360 | fs.setIf((b&(1<<4)) != 0, RDPRU) | ||
1361 | fs.setIf((b&(1<<3)) != 0, INVLPGB) | ||
1362 | fs.setIf((b&(1<<1)) != 0, MSRIRC) | ||
1363 | fs.setIf((b&(1<<0)) != 0, CLZERO) | ||
1364 | } | ||
1365 | |||
1366 | if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { | ||
1367 | _, _, _, edx := cpuid(0x8000000A) | ||
1368 | fs.setIf((edx>>0)&1 == 1, SVMNP) | ||
1369 | fs.setIf((edx>>1)&1 == 1, LBRVIRT) | ||
1370 | fs.setIf((edx>>2)&1 == 1, SVML) | ||
1371 | fs.setIf((edx>>3)&1 == 1, NRIPS) | ||
1372 | fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) | ||
1373 | fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) | ||
1374 | fs.setIf((edx>>6)&1 == 1, SVMFBASID) | ||
1375 | fs.setIf((edx>>7)&1 == 1, SVMDA) | ||
1376 | fs.setIf((edx>>10)&1 == 1, SVMPF) | ||
1377 | fs.setIf((edx>>12)&1 == 1, SVMPFT) | ||
1378 | } | ||
1379 | |||
1380 | if maxExtendedFunction() >= 0x8000001a { | ||
1381 | eax, _, _, _ := cpuid(0x8000001a) | ||
1382 | fs.setIf((eax>>0)&1 == 1, FP128) | ||
1383 | fs.setIf((eax>>1)&1 == 1, MOVU) | ||
1384 | fs.setIf((eax>>2)&1 == 1, FP256) | ||
1385 | } | ||
1386 | |||
1387 | if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { | ||
1388 | eax, _, _, _ := cpuid(0x8000001b) | ||
1389 | fs.setIf((eax>>0)&1 == 1, IBSFFV) | ||
1390 | fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) | ||
1391 | fs.setIf((eax>>2)&1 == 1, IBSOPSAM) | ||
1392 | fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) | ||
1393 | fs.setIf((eax>>4)&1 == 1, IBSOPCNT) | ||
1394 | fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) | ||
1395 | fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) | ||
1396 | fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) | ||
1397 | fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) | ||
1398 | fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) | ||
1399 | fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. | ||
1400 | fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) | ||
1401 | } | ||
1402 | |||
1403 | if maxExtendedFunction() >= 0x8000001f && vend == AMD { | ||
1404 | a, _, _, _ := cpuid(0x8000001f) | ||
1405 | fs.setIf((a>>0)&1 == 1, SME) | ||
1406 | fs.setIf((a>>1)&1 == 1, SEV) | ||
1407 | fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) | ||
1408 | fs.setIf((a>>3)&1 == 1, SEV_ES) | ||
1409 | fs.setIf((a>>4)&1 == 1, SEV_SNP) | ||
1410 | fs.setIf((a>>5)&1 == 1, VMPL) | ||
1411 | fs.setIf((a>>10)&1 == 1, SME_COHERENT) | ||
1412 | fs.setIf((a>>11)&1 == 1, SEV_64BIT) | ||
1413 | fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) | ||
1414 | fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) | ||
1415 | fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) | ||
1416 | fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) | ||
1417 | fs.setIf((a>>16)&1 == 1, VTE) | ||
1418 | fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) | ||
1419 | } | ||
1420 | |||
1421 | if mfi >= 0x20 { | ||
1422 | // Microsoft has decided to purposefully hide the information | ||
1423 | // of the guest TEE when VMs are being created using Hyper-V. | ||
1424 | // | ||
1425 | // This leads us to check for the Hyper-V cpuid features | ||
1426 | // (0x4000000C), and then for the `ebx` value set. | ||
1427 | // | ||
1428 | // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part | ||
1429 | // we're mostly interested about,according to: | ||
1430 | // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174 | ||
1431 | _, ebx, _, _ := cpuid(0x4000000C) | ||
1432 | fs.setIf(ebx == 0xbe3, TDX_GUEST) | ||
1433 | } | ||
1434 | |||
1435 | if mfi >= 0x21 { | ||
1436 | // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21). | ||
1437 | _, ebx, ecx, edx := cpuid(0x21) | ||
1438 | identity := string(valAsString(ebx, edx, ecx)) | ||
1439 | fs.setIf(identity == "IntelTDX ", TDX_GUEST) | ||
1440 | } | ||
1441 | |||
1442 | return fs | ||
1443 | } | ||
1444 | |||
1445 | func (c *CPUInfo) supportAVX10() uint8 { | ||
1446 | if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) { | ||
1447 | _, ebx, _, _ := cpuidex(0x24, 0) | ||
1448 | return uint8(ebx) | ||
1449 | } | ||
1450 | return 0 | ||
1451 | } | ||
1452 | |||
1453 | func valAsString(values ...uint32) []byte { | ||
1454 | r := make([]byte, 4*len(values)) | ||
1455 | for i, v := range values { | ||
1456 | dst := r[i*4:] | ||
1457 | dst[0] = byte(v & 0xff) | ||
1458 | dst[1] = byte((v >> 8) & 0xff) | ||
1459 | dst[2] = byte((v >> 16) & 0xff) | ||
1460 | dst[3] = byte((v >> 24) & 0xff) | ||
1461 | switch { | ||
1462 | case dst[0] == 0: | ||
1463 | return r[:i*4] | ||
1464 | case dst[1] == 0: | ||
1465 | return r[:i*4+1] | ||
1466 | case dst[2] == 0: | ||
1467 | return r[:i*4+2] | ||
1468 | case dst[3] == 0: | ||
1469 | return r[:i*4+3] | ||
1470 | } | ||
1471 | } | ||
1472 | return r | ||
1473 | } | ||