aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/cpuid/v2/cpuid.go
diff options
context:
space:
mode:
authorLibravatar Rutger Broekhoff2023-12-29 21:31:53 +0100
committerLibravatar Rutger Broekhoff2023-12-29 21:31:53 +0100
commit404aeae4545d2426c089a5f8d5e82dae56f5212b (patch)
tree2d84e00af272b39fc04f3795ae06bc48970e57b5 /vendor/github.com/klauspost/cpuid/v2/cpuid.go
parent209d8b0187ed025dec9ac149ebcced3462877bff (diff)
downloadgitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.tar.gz
gitolfs3-404aeae4545d2426c089a5f8d5e82dae56f5212b.zip
Make Nix builds work
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid.go1473
1 files changed, 1473 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
new file mode 100644
index 0000000..15b7603
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -0,0 +1,1473 @@
1// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
2
3// Package cpuid provides information about the CPU running the current program.
4//
5// CPU features are detected on startup, and kept for fast access through the life of the application.
6// Currently x86 / x64 (AMD64) as well as arm64 is supported.
7//
8// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
9//
10// Package home: https://github.com/klauspost/cpuid
11package cpuid
12
13import (
14 "flag"
15 "fmt"
16 "math"
17 "math/bits"
18 "os"
19 "runtime"
20 "strings"
21)
22
23// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
24// and Processor Programming Reference (PPR)
25
26// Vendor is a representation of a CPU vendor.
27type Vendor int
28
29const (
30 VendorUnknown Vendor = iota
31 Intel
32 AMD
33 VIA
34 Transmeta
35 NSC
36 KVM // Kernel-based Virtual Machine
37 MSVM // Microsoft Hyper-V or Windows Virtual PC
38 VMware
39 XenHVM
40 Bhyve
41 Hygon
42 SiS
43 RDC
44
45 Ampere
46 ARM
47 Broadcom
48 Cavium
49 DEC
50 Fujitsu
51 Infineon
52 Motorola
53 NVIDIA
54 AMCC
55 Qualcomm
56 Marvell
57
58 lastVendor
59)
60
61//go:generate stringer -type=FeatureID,Vendor
62
63// FeatureID is the ID of a specific cpu feature.
64type FeatureID int
65
66const (
67 // Keep index -1 as unknown
68 UNKNOWN = -1
69
70 // Add features
71 ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
72 AESNI // Advanced Encryption Standard New Instructions
73 AMD3DNOW // AMD 3DNOW
74 AMD3DNOWEXT // AMD 3DNowExt
75 AMXBF16 // Tile computational operations on BFLOAT16 numbers
76 AMXFP16 // Tile computational operations on FP16 numbers
77 AMXINT8 // Tile computational operations on 8-bit integers
78 AMXTILE // Tile architecture
79 APX_F // Intel APX
80 AVX // AVX functions
81 AVX10 // If set the Intel AVX10 Converged Vector ISA is supported
82 AVX10_128 // If set indicates that AVX10 128-bit vector support is present
83 AVX10_256 // If set indicates that AVX10 256-bit vector support is present
84 AVX10_512 // If set indicates that AVX10 512-bit vector support is present
85 AVX2 // AVX2 functions
86 AVX512BF16 // AVX-512 BFLOAT16 Instructions
87 AVX512BITALG // AVX-512 Bit Algorithms
88 AVX512BW // AVX-512 Byte and Word Instructions
89 AVX512CD // AVX-512 Conflict Detection Instructions
90 AVX512DQ // AVX-512 Doubleword and Quadword Instructions
91 AVX512ER // AVX-512 Exponential and Reciprocal Instructions
92 AVX512F // AVX-512 Foundation
93 AVX512FP16 // AVX-512 FP16 Instructions
94 AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
95 AVX512PF // AVX-512 Prefetch Instructions
96 AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
97 AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
98 AVX512VL // AVX-512 Vector Length Extensions
99 AVX512VNNI // AVX-512 Vector Neural Network Instructions
100 AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
101 AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
102 AVXIFMA // AVX-IFMA instructions
103 AVXNECONVERT // AVX-NE-CONVERT instructions
104 AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
105 AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
106 AVXVNNIINT8 // AVX-VNNI-INT8 instructions
107 BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
108 BMI1 // Bit Manipulation Instruction Set 1
109 BMI2 // Bit Manipulation Instruction Set 2
110 CETIBT // Intel CET Indirect Branch Tracking
111 CETSS // Intel CET Shadow Stack
112 CLDEMOTE // Cache Line Demote
113 CLMUL // Carry-less Multiplication
114 CLZERO // CLZERO instruction supported
115 CMOV // i686 CMOV
116 CMPCCXADD // CMPCCXADD instructions
117 CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
118 CMPXCHG8 // CMPXCHG8 instruction
119 CPBOOST // Core Performance Boost
120 CPPC // AMD: Collaborative Processor Performance Control
121 CX16 // CMPXCHG16B Instruction
122 EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
123 ENQCMD // Enqueue Command
124 ERMS // Enhanced REP MOVSB/STOSB
125 F16C // Half-precision floating-point conversion
126 FLUSH_L1D // Flush L1D cache
127 FMA3 // Intel FMA 3. Does not imply AVX.
128 FMA4 // Bulldozer FMA4 functions
129 FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
130 FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
131 FSRM // Fast Short Rep Mov
132 FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
133 FXSROPT // FXSAVE/FXRSTOR optimizations
134 GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
135 HLE // Hardware Lock Elision
136 HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
137 HTT // Hyperthreading (enabled)
138 HWA // Hardware assert supported. Indicates support for MSRC001_10
139 HYBRID_CPU // This part has CPUs of more than one type.
140 HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
141 IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
142 IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
143 IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
144 IBRS // AMD: Indirect Branch Restricted Speculation
145 IBRS_PREFERRED // AMD: IBRS is preferred over software solution
146 IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
147 IBS // Instruction Based Sampling (AMD)
148 IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
149 IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
150 IBSFFV // Instruction Based Sampling Feature (AMD)
151 IBSOPCNT // Instruction Based Sampling Feature (AMD)
152 IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
153 IBSOPSAM // Instruction Based Sampling Feature (AMD)
154 IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
155 IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
156 IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
157 IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
158 IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
159 IBS_PREVENTHOST // Disallowing IBS use by the host supported
160 IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
161 IDPRED_CTRL // IPRED_DIS
162 INT_WBINVD // WBINVD/WBNOINVD are interruptible.
163 INVLPGB // NVLPGB and TLBSYNC instruction supported
164 KEYLOCKER // Key locker
165 KEYLOCKERW // Key locker wide
166 LAHF // LAHF/SAHF in long mode
167 LAM // If set, CPU supports Linear Address Masking
168 LBRVIRT // LBR virtualization
169 LZCNT // LZCNT instruction
170 MCAOVERFLOW // MCA overflow recovery support.
171 MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
172 MCOMMIT // MCOMMIT instruction supported
173 MD_CLEAR // VERW clears CPU buffers
174 MMX // standard MMX
175 MMXEXT // SSE integer functions or AMD MMX ext
176 MOVBE // MOVBE instruction (big-endian)
177 MOVDIR64B // Move 64 Bytes as Direct Store
178 MOVDIRI // Move Doubleword as Direct Store
179 MOVSB_ZL // Fast Zero-Length MOVSB
180 MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
181 MPX // Intel MPX (Memory Protection Extensions)
182 MSRIRC // Instruction Retired Counter MSR available
183 MSRLIST // Read/Write List of Model Specific Registers
184 MSR_PAGEFLUSH // Page Flush MSR available
185 NRIPS // Indicates support for NRIP save on VMEXIT
186 NX // NX (No-Execute) bit
187 OSXSAVE // XSAVE enabled by OS
188 PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
189 POPCNT // POPCNT instruction
190 PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
191 PREFETCHI // PREFETCHIT0/1 instructions
192 PSFD // Predictive Store Forward Disable
193 RDPRU // RDPRU instruction supported
194 RDRAND // RDRAND instruction is available
195 RDSEED // RDSEED instruction is available
196 RDTSCP // RDTSCP Instruction
197 RRSBA_CTRL // Restricted RSB Alternate
198 RTM // Restricted Transactional Memory
199 RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
200 SERIALIZE // Serialize Instruction Execution
201 SEV // AMD Secure Encrypted Virtualization supported
202 SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
203 SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
204 SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
205 SEV_ES // AMD SEV Encrypted State supported
206 SEV_RESTRICTED // AMD SEV Restricted Injection supported
207 SEV_SNP // AMD SEV Secure Nested Paging supported
208 SGX // Software Guard Extensions
209 SGXLC // Software Guard Extensions Launch Control
210 SHA // Intel SHA Extensions
211 SME // AMD Secure Memory Encryption supported
212 SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
213 SPEC_CTRL_SSBD // Speculative Store Bypass Disable
214 SRBDS_CTRL // SRBDS mitigation MSR available
215 SSE // SSE functions
216 SSE2 // P4 SSE functions
217 SSE3 // Prescott SSE3 functions
218 SSE4 // Penryn SSE4.1 functions
219 SSE42 // Nehalem SSE4.2 functions
220 SSE4A // AMD Barcelona microarchitecture SSE4a instructions
221 SSSE3 // Conroe SSSE3 functions
222 STIBP // Single Thread Indirect Branch Predictors
223 STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
224 STOSB_SHORT // Fast short STOSB
225 SUCCOR // Software uncorrectable error containment and recovery capability.
226 SVM // AMD Secure Virtual Machine
227 SVMDA // Indicates support for the SVM decode assists.
228 SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
229 SVML // AMD SVM lock. Indicates support for SVM-Lock.
230 SVMNP // AMD SVM nested paging
231 SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
232 SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
233 SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
234 SYSEE // SYSENTER and SYSEXIT instructions
235 TBM // AMD Trailing Bit Manipulation
236 TDX_GUEST // Intel Trust Domain Extensions Guest
237 TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
238 TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
239 TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
240 TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
241 TSXLDTRK // Intel TSX Suspend Load Address Tracking
242 VAES // Vector AES. AVX(512) versions requires additional checks.
243 VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
244 VMPL // AMD VM Permission Levels supported
245 VMSA_REGPROT // AMD VMSA Register Protection supported
246 VMX // Virtual Machine Extensions
247 VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
248 VTE // AMD Virtual Transparent Encryption supported
249 WAITPKG // TPAUSE, UMONITOR, UMWAIT
250 WBNOINVD // Write Back and Do Not Invalidate Cache
251 WRMSRNS // Non-Serializing Write to Model Specific Register
252 X87 // FPU
253 XGETBV1 // Supports XGETBV with ECX = 1
254 XOP // Bulldozer XOP functions
255 XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
256 XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
257 XSAVEOPT // XSAVEOPT available
258 XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
259
260 // ARM features:
261 AESARM // AES instructions
262 ARMCPUID // Some CPU ID registers readable at user-level
263 ASIMD // Advanced SIMD
264 ASIMDDP // SIMD Dot Product
265 ASIMDHP // Advanced SIMD half-precision floating point
266 ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
267 ATOMICS // Large System Extensions (LSE)
268 CRC32 // CRC32/CRC32C instructions
269 DCPOP // Data cache clean to Point of Persistence (DC CVAP)
270 EVTSTRM // Generic timer
271 FCMA // Floatin point complex number addition and multiplication
272 FP // Single-precision and double-precision floating point
273 FPHP // Half-precision floating point
274 GPA // Generic Pointer Authentication
275 JSCVT // Javascript-style double->int convert (FJCVTZS)
276 LRCPC // Weaker release consistency (LDAPR, etc)
277 PMULL // Polynomial Multiply instructions (PMULL/PMULL2)
278 SHA1 // SHA-1 instructions (SHA1C, etc)
279 SHA2 // SHA-2 instructions (SHA256H, etc)
280 SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
281 SHA512 // SHA512 instructions
282 SM3 // SM3 instructions
283 SM4 // SM4 instructions
284 SVE // Scalable Vector Extension
285 // Keep it last. It automatically defines the size of []flagSet
286 lastID
287
288 firstID FeatureID = UNKNOWN + 1
289)
290
291// CPUInfo contains information about the detected system CPU.
292type CPUInfo struct {
293 BrandName string // Brand name reported by the CPU
294 VendorID Vendor // Comparable CPU vendor ID
295 VendorString string // Raw vendor string.
296 featureSet flagSet // Features of the CPU
297 PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
298 ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
299 LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
300 Family int // CPU family number
301 Model int // CPU model number
302 Stepping int // CPU stepping info
303 CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
304 Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
305 BoostFreq int64 // Max clock speed, if known, 0 otherwise
306 Cache struct {
307 L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
308 L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
309 L2 int // L2 Cache (per core or shared). Will be -1 if undetected
310 L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
311 }
312 SGX SGXSupport
313 AVX10Level uint8
314 maxFunc uint32
315 maxExFunc uint32
316}
317
318var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
319var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
320var xgetbv func(index uint32) (eax, edx uint32)
321var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
322var darwinHasAVX512 = func() bool { return false }
323
324// CPU contains information about the CPU as detected on startup,
325// or when Detect last was called.
326//
327// Use this as the primary entry point to you data.
328var CPU CPUInfo
329
330func init() {
331 initCPU()
332 Detect()
333}
334
335// Detect will re-detect current CPU info.
336// This will replace the content of the exported CPU variable.
337//
338// Unless you expect the CPU to change while you are running your program
339// you should not need to call this function.
340// If you call this, you must ensure that no other goroutine is accessing the
341// exported CPU variable.
342func Detect() {
343 // Set defaults
344 CPU.ThreadsPerCore = 1
345 CPU.Cache.L1I = -1
346 CPU.Cache.L1D = -1
347 CPU.Cache.L2 = -1
348 CPU.Cache.L3 = -1
349 safe := true
350 if detectArmFlag != nil {
351 safe = !*detectArmFlag
352 }
353 addInfo(&CPU, safe)
354 if displayFeats != nil && *displayFeats {
355 fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
356 // Exit with non-zero so tests will print value.
357 os.Exit(1)
358 }
359 if disableFlag != nil {
360 s := strings.Split(*disableFlag, ",")
361 for _, feat := range s {
362 feat := ParseFeature(strings.TrimSpace(feat))
363 if feat != UNKNOWN {
364 CPU.featureSet.unset(feat)
365 }
366 }
367 }
368}
369
370// DetectARM will detect ARM64 features.
371// This is NOT done automatically since it can potentially crash
372// if the OS does not handle the command.
373// If in the future this can be done safely this function may not
374// do anything.
375func DetectARM() {
376 addInfo(&CPU, false)
377}
378
379var detectArmFlag *bool
380var displayFeats *bool
381var disableFlag *string
382
383// Flags will enable flags.
384// This must be called *before* flag.Parse AND
385// Detect must be called after the flags have been parsed.
386// Note that this means that any detection used in init() functions
387// will not contain these flags.
388func Flags() {
389 disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
390 displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
391 detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
392}
393
394// Supports returns whether the CPU supports all of the requested features.
395func (c CPUInfo) Supports(ids ...FeatureID) bool {
396 for _, id := range ids {
397 if !c.featureSet.inSet(id) {
398 return false
399 }
400 }
401 return true
402}
403
404// Has allows for checking a single feature.
405// Should be inlined by the compiler.
406func (c *CPUInfo) Has(id FeatureID) bool {
407 return c.featureSet.inSet(id)
408}
409
410// AnyOf returns whether the CPU supports one or more of the requested features.
411func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
412 for _, id := range ids {
413 if c.featureSet.inSet(id) {
414 return true
415 }
416 }
417 return false
418}
419
420// Features contains several features combined for a fast check using
421// CpuInfo.HasAll
422type Features *flagSet
423
424// CombineFeatures allows to combine several features for a close to constant time lookup.
425func CombineFeatures(ids ...FeatureID) Features {
426 var v flagSet
427 for _, id := range ids {
428 v.set(id)
429 }
430 return &v
431}
432
433func (c *CPUInfo) HasAll(f Features) bool {
434 return c.featureSet.hasSetP(f)
435}
436
437// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
438var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
439var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
440var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
441var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
442var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
443
444// X64Level returns the microarchitecture level detected on the CPU.
445// If features are lacking or non x64 mode, 0 is returned.
446// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
447func (c CPUInfo) X64Level() int {
448 if !c.featureSet.hasOneOf(oneOfLevel) {
449 return 0
450 }
451 if c.featureSet.hasSetP(level4Features) {
452 return 4
453 }
454 if c.featureSet.hasSetP(level3Features) {
455 return 3
456 }
457 if c.featureSet.hasSetP(level2Features) {
458 return 2
459 }
460 if c.featureSet.hasSetP(level1Features) {
461 return 1
462 }
463 return 0
464}
465
466// Disable will disable one or several features.
467func (c *CPUInfo) Disable(ids ...FeatureID) bool {
468 for _, id := range ids {
469 c.featureSet.unset(id)
470 }
471 return true
472}
473
474// Enable will disable one or several features even if they were undetected.
475// This is of course not recommended for obvious reasons.
476func (c *CPUInfo) Enable(ids ...FeatureID) bool {
477 for _, id := range ids {
478 c.featureSet.set(id)
479 }
480 return true
481}
482
483// IsVendor returns true if vendor is recognized as Intel
484func (c CPUInfo) IsVendor(v Vendor) bool {
485 return c.VendorID == v
486}
487
488// FeatureSet returns all available features as strings.
489func (c CPUInfo) FeatureSet() []string {
490 s := make([]string, 0, c.featureSet.nEnabled())
491 s = append(s, c.featureSet.Strings()...)
492 return s
493}
494
495// RTCounter returns the 64-bit time-stamp counter
496// Uses the RDTSCP instruction. The value 0 is returned
497// if the CPU does not support the instruction.
498func (c CPUInfo) RTCounter() uint64 {
499 if !c.Supports(RDTSCP) {
500 return 0
501 }
502 a, _, _, d := rdtscpAsm()
503 return uint64(a) | (uint64(d) << 32)
504}
505
506// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
507// This variable is OS dependent, but on Linux contains information
508// about the current cpu/core the code is running on.
509// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
510func (c CPUInfo) Ia32TscAux() uint32 {
511 if !c.Supports(RDTSCP) {
512 return 0
513 }
514 _, _, ecx, _ := rdtscpAsm()
515 return ecx
516}
517
518// LogicalCPU will return the Logical CPU the code is currently executing on.
519// This is likely to change when the OS re-schedules the running thread
520// to another CPU.
521// If the current core cannot be detected, -1 will be returned.
522func (c CPUInfo) LogicalCPU() int {
523 if c.maxFunc < 1 {
524 return -1
525 }
526 _, ebx, _, _ := cpuid(1)
527 return int(ebx >> 24)
528}
529
530// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
531// supported, use it, otherwise parse the brand string. Yes, really.
532func (c *CPUInfo) frequencies() {
533 c.Hz, c.BoostFreq = 0, 0
534 mfi := maxFunctionID()
535 if mfi >= 0x15 {
536 eax, ebx, ecx, _ := cpuid(0x15)
537 if eax != 0 && ebx != 0 && ecx != 0 {
538 c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
539 }
540 }
541 if mfi >= 0x16 {
542 a, b, _, _ := cpuid(0x16)
543 // Base...
544 if a&0xffff > 0 {
545 c.Hz = int64(a&0xffff) * 1_000_000
546 }
547 // Boost...
548 if b&0xffff > 0 {
549 c.BoostFreq = int64(b&0xffff) * 1_000_000
550 }
551 }
552 if c.Hz > 0 {
553 return
554 }
555
556 // computeHz determines the official rated speed of a CPU from its brand
557 // string. This insanity is *actually the official documented way to do
558 // this according to Intel*, prior to leaf 0x15 existing. The official
559 // documentation only shows this working for exactly `x.xx` or `xxxx`
560 // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
561 // sizes.
562 model := c.BrandName
563 hz := strings.LastIndex(model, "Hz")
564 if hz < 3 {
565 return
566 }
567 var multiplier int64
568 switch model[hz-1] {
569 case 'M':
570 multiplier = 1000 * 1000
571 case 'G':
572 multiplier = 1000 * 1000 * 1000
573 case 'T':
574 multiplier = 1000 * 1000 * 1000 * 1000
575 }
576 if multiplier == 0 {
577 return
578 }
579 freq := int64(0)
580 divisor := int64(0)
581 decimalShift := int64(1)
582 var i int
583 for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
584 if model[i] >= '0' && model[i] <= '9' {
585 freq += int64(model[i]-'0') * decimalShift
586 decimalShift *= 10
587 } else if model[i] == '.' {
588 if divisor != 0 {
589 return
590 }
591 divisor = decimalShift
592 } else {
593 return
594 }
595 }
596 // we didn't find a space
597 if i < 0 {
598 return
599 }
600 if divisor != 0 {
601 c.Hz = (freq * multiplier) / divisor
602 return
603 }
604 c.Hz = freq * multiplier
605}
606
607// VM Will return true if the cpu id indicates we are in
608// a virtual machine.
609func (c CPUInfo) VM() bool {
610 return CPU.featureSet.inSet(HYPERVISOR)
611}
612
613// flags contains detected cpu features and characteristics
614type flags uint64
615
616// log2(bits_in_uint64)
617const flagBitsLog2 = 6
618const flagBits = 1 << flagBitsLog2
619const flagMask = flagBits - 1
620
621// flagSet contains detected cpu features and characteristics in an array of flags
622type flagSet [(lastID + flagMask) / flagBits]flags
623
624func (s *flagSet) inSet(feat FeatureID) bool {
625 return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
626}
627
628func (s *flagSet) set(feat FeatureID) {
629 s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
630}
631
632// setIf will set a feature if boolean is true.
633func (s *flagSet) setIf(cond bool, features ...FeatureID) {
634 if cond {
635 for _, offset := range features {
636 s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
637 }
638 }
639}
640
641func (s *flagSet) unset(offset FeatureID) {
642 bit := flags(1 << (offset & flagMask))
643 s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
644}
645
646// or with another flagset.
647func (s *flagSet) or(other flagSet) {
648 for i, v := range other[:] {
649 s[i] |= v
650 }
651}
652
653// hasSet returns whether all features are present.
654func (s *flagSet) hasSet(other flagSet) bool {
655 for i, v := range other[:] {
656 if s[i]&v != v {
657 return false
658 }
659 }
660 return true
661}
662
663// hasSet returns whether all features are present.
664func (s *flagSet) hasSetP(other *flagSet) bool {
665 for i, v := range other[:] {
666 if s[i]&v != v {
667 return false
668 }
669 }
670 return true
671}
672
673// hasOneOf returns whether one or more features are present.
674func (s *flagSet) hasOneOf(other *flagSet) bool {
675 for i, v := range other[:] {
676 if s[i]&v != 0 {
677 return true
678 }
679 }
680 return false
681}
682
683// nEnabled will return the number of enabled flags.
684func (s *flagSet) nEnabled() (n int) {
685 for _, v := range s[:] {
686 n += bits.OnesCount64(uint64(v))
687 }
688 return n
689}
690
691func flagSetWith(feat ...FeatureID) flagSet {
692 var res flagSet
693 for _, f := range feat {
694 res.set(f)
695 }
696 return res
697}
698
699// ParseFeature will parse the string and return the ID of the matching feature.
700// Will return UNKNOWN if not found.
701func ParseFeature(s string) FeatureID {
702 s = strings.ToUpper(s)
703 for i := firstID; i < lastID; i++ {
704 if i.String() == s {
705 return i
706 }
707 }
708 return UNKNOWN
709}
710
711// Strings returns an array of the detected features for FlagsSet.
712func (s flagSet) Strings() []string {
713 if len(s) == 0 {
714 return []string{""}
715 }
716 r := make([]string, 0)
717 for i := firstID; i < lastID; i++ {
718 if s.inSet(i) {
719 r = append(r, i.String())
720 }
721 }
722 return r
723}
724
725func maxExtendedFunction() uint32 {
726 eax, _, _, _ := cpuid(0x80000000)
727 return eax
728}
729
730func maxFunctionID() uint32 {
731 a, _, _, _ := cpuid(0)
732 return a
733}
734
735func brandName() string {
736 if maxExtendedFunction() >= 0x80000004 {
737 v := make([]uint32, 0, 48)
738 for i := uint32(0); i < 3; i++ {
739 a, b, c, d := cpuid(0x80000002 + i)
740 v = append(v, a, b, c, d)
741 }
742 return strings.Trim(string(valAsString(v...)), " ")
743 }
744 return "unknown"
745}
746
747func threadsPerCore() int {
748 mfi := maxFunctionID()
749 vend, _ := vendorID()
750
751 if mfi < 0x4 || (vend != Intel && vend != AMD) {
752 return 1
753 }
754
755 if mfi < 0xb {
756 if vend != Intel {
757 return 1
758 }
759 _, b, _, d := cpuid(1)
760 if (d & (1 << 28)) != 0 {
761 // v will contain logical core count
762 v := (b >> 16) & 255
763 if v > 1 {
764 a4, _, _, _ := cpuid(4)
765 // physical cores
766 v2 := (a4 >> 26) + 1
767 if v2 > 0 {
768 return int(v) / int(v2)
769 }
770 }
771 }
772 return 1
773 }
774 _, b, _, _ := cpuidex(0xb, 0)
775 if b&0xffff == 0 {
776 if vend == AMD {
777 // Workaround for AMD returning 0, assume 2 if >= Zen 2
778 // It will be more correct than not.
779 fam, _, _ := familyModel()
780 _, _, _, d := cpuid(1)
781 if (d&(1<<28)) != 0 && fam >= 23 {
782 return 2
783 }
784 }
785 return 1
786 }
787 return int(b & 0xffff)
788}
789
790func logicalCores() int {
791 mfi := maxFunctionID()
792 v, _ := vendorID()
793 switch v {
794 case Intel:
795 // Use this on old Intel processors
796 if mfi < 0xb {
797 if mfi < 1 {
798 return 0
799 }
800 // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
801 // that can be assigned to logical processors in a physical package.
802 // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
803 _, ebx, _, _ := cpuid(1)
804 logical := (ebx >> 16) & 0xff
805 return int(logical)
806 }
807 _, b, _, _ := cpuidex(0xb, 1)
808 return int(b & 0xffff)
809 case AMD, Hygon:
810 _, b, _, _ := cpuid(1)
811 return int((b >> 16) & 0xff)
812 default:
813 return 0
814 }
815}
816
817func familyModel() (family, model, stepping int) {
818 if maxFunctionID() < 0x1 {
819 return 0, 0, 0
820 }
821 eax, _, _, _ := cpuid(1)
822 // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
823 family = int((eax >> 8) & 0xf)
824 extFam := family == 0x6 // Intel is 0x6, needs extended model.
825 if family == 0xf {
826 // Add ExtFamily
827 family += int((eax >> 20) & 0xff)
828 extFam = true
829 }
830 // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
831 model = int((eax >> 4) & 0xf)
832 if extFam {
833 // Add ExtModel
834 model += int((eax >> 12) & 0xf0)
835 }
836 stepping = int(eax & 0xf)
837 return family, model, stepping
838}
839
840func physicalCores() int {
841 v, _ := vendorID()
842 switch v {
843 case Intel:
844 return logicalCores() / threadsPerCore()
845 case AMD, Hygon:
846 lc := logicalCores()
847 tpc := threadsPerCore()
848 if lc > 0 && tpc > 0 {
849 return lc / tpc
850 }
851
852 // The following is inaccurate on AMD EPYC 7742 64-Core Processor
853 if maxExtendedFunction() >= 0x80000008 {
854 _, _, c, _ := cpuid(0x80000008)
855 if c&0xff > 0 {
856 return int(c&0xff) + 1
857 }
858 }
859 }
860 return 0
861}
862
863// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
864var vendorMapping = map[string]Vendor{
865 "AMDisbetter!": AMD,
866 "AuthenticAMD": AMD,
867 "CentaurHauls": VIA,
868 "GenuineIntel": Intel,
869 "TransmetaCPU": Transmeta,
870 "GenuineTMx86": Transmeta,
871 "Geode by NSC": NSC,
872 "VIA VIA VIA ": VIA,
873 "KVMKVMKVMKVM": KVM,
874 "Microsoft Hv": MSVM,
875 "VMwareVMware": VMware,
876 "XenVMMXenVMM": XenHVM,
877 "bhyve bhyve ": Bhyve,
878 "HygonGenuine": Hygon,
879 "Vortex86 SoC": SiS,
880 "SiS SiS SiS ": SiS,
881 "RiseRiseRise": SiS,
882 "Genuine RDC": RDC,
883}
884
885func vendorID() (Vendor, string) {
886 _, b, c, d := cpuid(0)
887 v := string(valAsString(b, d, c))
888 vend, ok := vendorMapping[v]
889 if !ok {
890 return VendorUnknown, v
891 }
892 return vend, v
893}
894
895func cacheLine() int {
896 if maxFunctionID() < 0x1 {
897 return 0
898 }
899
900 _, ebx, _, _ := cpuid(1)
901 cache := (ebx & 0xff00) >> 5 // cflush size
902 if cache == 0 && maxExtendedFunction() >= 0x80000006 {
903 _, _, ecx, _ := cpuid(0x80000006)
904 cache = ecx & 0xff // cacheline size
905 }
906 // TODO: Read from Cache and TLB Information
907 return int(cache)
908}
909
910func (c *CPUInfo) cacheSize() {
911 c.Cache.L1D = -1
912 c.Cache.L1I = -1
913 c.Cache.L2 = -1
914 c.Cache.L3 = -1
915 vendor, _ := vendorID()
916 switch vendor {
917 case Intel:
918 if maxFunctionID() < 4 {
919 return
920 }
921 c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
922 for i := uint32(0); ; i++ {
923 eax, ebx, ecx, _ := cpuidex(4, i)
924 cacheType := eax & 15
925 if cacheType == 0 {
926 break
927 }
928 cacheLevel := (eax >> 5) & 7
929 coherency := int(ebx&0xfff) + 1
930 partitions := int((ebx>>12)&0x3ff) + 1
931 associativity := int((ebx>>22)&0x3ff) + 1
932 sets := int(ecx) + 1
933 size := associativity * partitions * coherency * sets
934 switch cacheLevel {
935 case 1:
936 if cacheType == 1 {
937 // 1 = Data Cache
938 c.Cache.L1D = size
939 } else if cacheType == 2 {
940 // 2 = Instruction Cache
941 c.Cache.L1I = size
942 } else {
943 if c.Cache.L1D < 0 {
944 c.Cache.L1I = size
945 }
946 if c.Cache.L1I < 0 {
947 c.Cache.L1I = size
948 }
949 }
950 case 2:
951 c.Cache.L2 = size
952 case 3:
953 c.Cache.L3 = size
954 }
955 }
956 case AMD, Hygon:
957 // Untested.
958 if maxExtendedFunction() < 0x80000005 {
959 return
960 }
961 _, _, ecx, edx := cpuid(0x80000005)
962 c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
963 c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
964
965 if maxExtendedFunction() < 0x80000006 {
966 return
967 }
968 _, _, ecx, _ = cpuid(0x80000006)
969 c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
970
971 // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
972 if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
973 return
974 }
975
976 // Xen Hypervisor is buggy and returns the same entry no matter ECX value.
977 // Hack: When we encounter the same entry 100 times we break.
978 nSame := 0
979 var last uint32
980 for i := uint32(0); i < math.MaxUint32; i++ {
981 eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
982
983 level := (eax >> 5) & 7
984 cacheNumSets := ecx + 1
985 cacheLineSize := 1 + (ebx & 2047)
986 cachePhysPartitions := 1 + ((ebx >> 12) & 511)
987 cacheNumWays := 1 + ((ebx >> 22) & 511)
988
989 typ := eax & 15
990 size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
991 if typ == 0 {
992 return
993 }
994
995 // Check for the same value repeated.
996 comb := eax ^ ebx ^ ecx
997 if comb == last {
998 nSame++
999 if nSame == 100 {
1000 return
1001 }
1002 }
1003 last = comb
1004
1005 switch level {
1006 case 1:
1007 switch typ {
1008 case 1:
1009 // Data cache
1010 c.Cache.L1D = size
1011 case 2:
1012 // Inst cache
1013 c.Cache.L1I = size
1014 default:
1015 if c.Cache.L1D < 0 {
1016 c.Cache.L1I = size
1017 }
1018 if c.Cache.L1I < 0 {
1019 c.Cache.L1I = size
1020 }
1021 }
1022 case 2:
1023 c.Cache.L2 = size
1024 case 3:
1025 c.Cache.L3 = size
1026 }
1027 }
1028 }
1029}
1030
1031type SGXEPCSection struct {
1032 BaseAddress uint64
1033 EPCSize uint64
1034}
1035
1036type SGXSupport struct {
1037 Available bool
1038 LaunchControl bool
1039 SGX1Supported bool
1040 SGX2Supported bool
1041 MaxEnclaveSizeNot64 int64
1042 MaxEnclaveSize64 int64
1043 EPCSections []SGXEPCSection
1044}
1045
1046func hasSGX(available, lc bool) (rval SGXSupport) {
1047 rval.Available = available
1048
1049 if !available {
1050 return
1051 }
1052
1053 rval.LaunchControl = lc
1054
1055 a, _, _, d := cpuidex(0x12, 0)
1056 rval.SGX1Supported = a&0x01 != 0
1057 rval.SGX2Supported = a&0x02 != 0
1058 rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
1059 rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
1060 rval.EPCSections = make([]SGXEPCSection, 0)
1061
1062 for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
1063 eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
1064 leafType := eax & 0xf
1065
1066 if leafType == 0 {
1067 // Invalid subleaf, stop iterating
1068 break
1069 } else if leafType == 1 {
1070 // EPC Section subleaf
1071 baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
1072 size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
1073
1074 section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
1075 rval.EPCSections = append(rval.EPCSections, section)
1076 }
1077 }
1078
1079 return
1080}
1081
1082func support() flagSet {
1083 var fs flagSet
1084 mfi := maxFunctionID()
1085 vend, _ := vendorID()
1086 if mfi < 0x1 {
1087 return fs
1088 }
1089 family, model, _ := familyModel()
1090
1091 _, _, c, d := cpuid(1)
1092 fs.setIf((d&(1<<0)) != 0, X87)
1093 fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
1094 fs.setIf((d&(1<<11)) != 0, SYSEE)
1095 fs.setIf((d&(1<<15)) != 0, CMOV)
1096 fs.setIf((d&(1<<23)) != 0, MMX)
1097 fs.setIf((d&(1<<24)) != 0, FXSR)
1098 fs.setIf((d&(1<<25)) != 0, FXSROPT)
1099 fs.setIf((d&(1<<25)) != 0, SSE)
1100 fs.setIf((d&(1<<26)) != 0, SSE2)
1101 fs.setIf((c&1) != 0, SSE3)
1102 fs.setIf((c&(1<<5)) != 0, VMX)
1103 fs.setIf((c&(1<<9)) != 0, SSSE3)
1104 fs.setIf((c&(1<<19)) != 0, SSE4)
1105 fs.setIf((c&(1<<20)) != 0, SSE42)
1106 fs.setIf((c&(1<<25)) != 0, AESNI)
1107 fs.setIf((c&(1<<1)) != 0, CLMUL)
1108 fs.setIf(c&(1<<22) != 0, MOVBE)
1109 fs.setIf(c&(1<<23) != 0, POPCNT)
1110 fs.setIf(c&(1<<30) != 0, RDRAND)
1111
1112 // This bit has been reserved by Intel & AMD for use by hypervisors,
1113 // and indicates the presence of a hypervisor.
1114 fs.setIf(c&(1<<31) != 0, HYPERVISOR)
1115 fs.setIf(c&(1<<29) != 0, F16C)
1116 fs.setIf(c&(1<<13) != 0, CX16)
1117
1118 if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
1119 fs.setIf(threadsPerCore() > 1, HTT)
1120 }
1121 if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
1122 fs.setIf(threadsPerCore() > 1, HTT)
1123 }
1124 fs.setIf(c&1<<26 != 0, XSAVE)
1125 fs.setIf(c&1<<27 != 0, OSXSAVE)
1126 // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
1127 const avxCheck = 1<<26 | 1<<27 | 1<<28
1128 if c&avxCheck == avxCheck {
1129 // Check for OS support
1130 eax, _ := xgetbv(0)
1131 if (eax & 0x6) == 0x6 {
1132 fs.set(AVX)
1133 switch vend {
1134 case Intel:
1135 // Older than Haswell.
1136 fs.setIf(family == 6 && model < 60, AVXSLOW)
1137 case AMD:
1138 // Older than Zen 2
1139 fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
1140 }
1141 }
1142 }
1143 // FMA3 can be used with SSE registers, so no OS support is strictly needed.
1144 // fma3 and OSXSAVE needed.
1145 const fma3Check = 1<<12 | 1<<27
1146 fs.setIf(c&fma3Check == fma3Check, FMA3)
1147
1148 // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
1149 if mfi >= 7 {
1150 _, ebx, ecx, edx := cpuidex(7, 0)
1151 if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
1152 fs.set(AVX2)
1153 }
1154 // CPUID.(EAX=7, ECX=0).EBX
1155 if (ebx & 0x00000008) != 0 {
1156 fs.set(BMI1)
1157 fs.setIf((ebx&0x00000100) != 0, BMI2)
1158 }
1159 fs.setIf(ebx&(1<<2) != 0, SGX)
1160 fs.setIf(ebx&(1<<4) != 0, HLE)
1161 fs.setIf(ebx&(1<<9) != 0, ERMS)
1162 fs.setIf(ebx&(1<<11) != 0, RTM)
1163 fs.setIf(ebx&(1<<14) != 0, MPX)
1164 fs.setIf(ebx&(1<<18) != 0, RDSEED)
1165 fs.setIf(ebx&(1<<19) != 0, ADX)
1166 fs.setIf(ebx&(1<<29) != 0, SHA)
1167
1168 // CPUID.(EAX=7, ECX=0).ECX
1169 fs.setIf(ecx&(1<<5) != 0, WAITPKG)
1170 fs.setIf(ecx&(1<<7) != 0, CETSS)
1171 fs.setIf(ecx&(1<<8) != 0, GFNI)
1172 fs.setIf(ecx&(1<<9) != 0, VAES)
1173 fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
1174 fs.setIf(ecx&(1<<13) != 0, TME)
1175 fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
1176 fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
1177 fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
1178 fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
1179 fs.setIf(ecx&(1<<29) != 0, ENQCMD)
1180 fs.setIf(ecx&(1<<30) != 0, SGXLC)
1181
1182 // CPUID.(EAX=7, ECX=0).EDX
1183 fs.setIf(edx&(1<<4) != 0, FSRM)
1184 fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
1185 fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
1186 fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
1187 fs.setIf(edx&(1<<14) != 0, SERIALIZE)
1188 fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
1189 fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
1190 fs.setIf(edx&(1<<18) != 0, PCONFIG)
1191 fs.setIf(edx&(1<<20) != 0, CETIBT)
1192 fs.setIf(edx&(1<<26) != 0, IBPB)
1193 fs.setIf(edx&(1<<27) != 0, STIBP)
1194 fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
1195 fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
1196 fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
1197 fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
1198
1199 // CPUID.(EAX=7, ECX=1).EAX
1200 eax1, _, _, edx1 := cpuidex(7, 1)
1201 fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
1202 fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
1203 fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
1204 fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
1205 fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
1206 fs.setIf(eax1&(1<<22) != 0, HRESET)
1207 fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
1208 fs.setIf(eax1&(1<<26) != 0, LAM)
1209
1210 // CPUID.(EAX=7, ECX=1).EDX
1211 fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
1212 fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
1213 fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
1214 fs.setIf(edx1&(1<<19) != 0, AVX10)
1215 fs.setIf(edx1&(1<<21) != 0, APX_F)
1216
1217 // Only detect AVX-512 features if XGETBV is supported
1218 if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
1219 // Check for OS support
1220 eax, _ := xgetbv(0)
1221
1222 // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
1223 // ZMM16-ZMM31 state are enabled by OS)
1224 /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
1225 hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
1226 if runtime.GOOS == "darwin" {
1227 hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
1228 }
1229 if hasAVX512 {
1230 fs.setIf(ebx&(1<<16) != 0, AVX512F)
1231 fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
1232 fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
1233 fs.setIf(ebx&(1<<26) != 0, AVX512PF)
1234 fs.setIf(ebx&(1<<27) != 0, AVX512ER)
1235 fs.setIf(ebx&(1<<28) != 0, AVX512CD)
1236 fs.setIf(ebx&(1<<30) != 0, AVX512BW)
1237 fs.setIf(ebx&(1<<31) != 0, AVX512VL)
1238 // ecx
1239 fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
1240 fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
1241 fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
1242 fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
1243 fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
1244 // edx
1245 fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
1246 fs.setIf(edx&(1<<22) != 0, AMXBF16)
1247 fs.setIf(edx&(1<<23) != 0, AVX512FP16)
1248 fs.setIf(edx&(1<<24) != 0, AMXTILE)
1249 fs.setIf(edx&(1<<25) != 0, AMXINT8)
1250 // eax1 = CPUID.(EAX=7, ECX=1).EAX
1251 fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
1252 fs.setIf(eax1&(1<<19) != 0, WRMSRNS)
1253 fs.setIf(eax1&(1<<21) != 0, AMXFP16)
1254 fs.setIf(eax1&(1<<27) != 0, MSRLIST)
1255 }
1256 }
1257
1258 // CPUID.(EAX=7, ECX=2)
1259 _, _, _, edx = cpuidex(7, 2)
1260 fs.setIf(edx&(1<<0) != 0, PSFD)
1261 fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL)
1262 fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL)
1263 fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
1264 fs.setIf(edx&(1<<5) != 0, MCDT_NO)
1265
1266 // Add keylocker features.
1267 if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
1268 _, ebx, _, _ := cpuidex(0x19, 0)
1269 fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
1270 }
1271
1272 // Add AVX10 features.
1273 if fs.inSet(AVX10) && mfi >= 0x24 {
1274 _, ebx, _, _ := cpuidex(0x24, 0)
1275 fs.setIf(ebx&(1<<16) != 0, AVX10_128)
1276 fs.setIf(ebx&(1<<17) != 0, AVX10_256)
1277 fs.setIf(ebx&(1<<18) != 0, AVX10_512)
1278 }
1279 }
1280
1281 // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
1282 // EAX
1283 // Bit 00: XSAVEOPT is available.
1284 // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
1285 // Bit 02: Supports XGETBV with ECX = 1 if set.
1286 // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
1287 // Bits 31 - 04: Reserved.
1288 // EBX
1289 // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
1290 // ECX
1291 // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
1292 // EDX?
1293 // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
1294 if mfi >= 0xd {
1295 if fs.inSet(XSAVE) {
1296 eax, _, _, _ := cpuidex(0xd, 1)
1297 fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
1298 fs.setIf(eax&(1<<1) != 0, XSAVEC)
1299 fs.setIf(eax&(1<<2) != 0, XGETBV1)
1300 fs.setIf(eax&(1<<3) != 0, XSAVES)
1301 }
1302 }
1303 if maxExtendedFunction() >= 0x80000001 {
1304 _, _, c, d := cpuid(0x80000001)
1305 if (c & (1 << 5)) != 0 {
1306 fs.set(LZCNT)
1307 fs.set(POPCNT)
1308 }
1309 // ECX
1310 fs.setIf((c&(1<<0)) != 0, LAHF)
1311 fs.setIf((c&(1<<2)) != 0, SVM)
1312 fs.setIf((c&(1<<6)) != 0, SSE4A)
1313 fs.setIf((c&(1<<10)) != 0, IBS)
1314 fs.setIf((c&(1<<22)) != 0, TOPEXT)
1315
1316 // EDX
1317 fs.setIf(d&(1<<11) != 0, SYSCALL)
1318 fs.setIf(d&(1<<20) != 0, NX)
1319 fs.setIf(d&(1<<22) != 0, MMXEXT)
1320 fs.setIf(d&(1<<23) != 0, MMX)
1321 fs.setIf(d&(1<<24) != 0, FXSR)
1322 fs.setIf(d&(1<<25) != 0, FXSROPT)
1323 fs.setIf(d&(1<<27) != 0, RDTSCP)
1324 fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
1325 fs.setIf(d&(1<<31) != 0, AMD3DNOW)
1326
1327 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
1328 * used unless the OS has AVX support. */
1329 if fs.inSet(AVX) {
1330 fs.setIf((c&(1<<11)) != 0, XOP)
1331 fs.setIf((c&(1<<16)) != 0, FMA4)
1332 }
1333
1334 }
1335 if maxExtendedFunction() >= 0x80000007 {
1336 _, b, _, d := cpuid(0x80000007)
1337 fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
1338 fs.setIf((b&(1<<1)) != 0, SUCCOR)
1339 fs.setIf((b&(1<<2)) != 0, HWA)
1340 fs.setIf((d&(1<<9)) != 0, CPBOOST)
1341 }
1342
1343 if maxExtendedFunction() >= 0x80000008 {
1344 _, b, _, _ := cpuid(0x80000008)
1345 fs.setIf(b&(1<<28) != 0, PSFD)
1346 fs.setIf(b&(1<<27) != 0, CPPC)
1347 fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
1348 fs.setIf(b&(1<<23) != 0, PPIN)
1349 fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
1350 fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
1351 fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
1352 fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
1353 fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
1354 fs.setIf(b&(1<<15) != 0, STIBP)
1355 fs.setIf(b&(1<<14) != 0, IBRS)
1356 fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
1357 fs.setIf(b&(1<<12) != 0, IBPB)
1358 fs.setIf((b&(1<<9)) != 0, WBNOINVD)
1359 fs.setIf((b&(1<<8)) != 0, MCOMMIT)
1360 fs.setIf((b&(1<<4)) != 0, RDPRU)
1361 fs.setIf((b&(1<<3)) != 0, INVLPGB)
1362 fs.setIf((b&(1<<1)) != 0, MSRIRC)
1363 fs.setIf((b&(1<<0)) != 0, CLZERO)
1364 }
1365
1366 if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
1367 _, _, _, edx := cpuid(0x8000000A)
1368 fs.setIf((edx>>0)&1 == 1, SVMNP)
1369 fs.setIf((edx>>1)&1 == 1, LBRVIRT)
1370 fs.setIf((edx>>2)&1 == 1, SVML)
1371 fs.setIf((edx>>3)&1 == 1, NRIPS)
1372 fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
1373 fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
1374 fs.setIf((edx>>6)&1 == 1, SVMFBASID)
1375 fs.setIf((edx>>7)&1 == 1, SVMDA)
1376 fs.setIf((edx>>10)&1 == 1, SVMPF)
1377 fs.setIf((edx>>12)&1 == 1, SVMPFT)
1378 }
1379
1380 if maxExtendedFunction() >= 0x8000001a {
1381 eax, _, _, _ := cpuid(0x8000001a)
1382 fs.setIf((eax>>0)&1 == 1, FP128)
1383 fs.setIf((eax>>1)&1 == 1, MOVU)
1384 fs.setIf((eax>>2)&1 == 1, FP256)
1385 }
1386
1387 if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
1388 eax, _, _, _ := cpuid(0x8000001b)
1389 fs.setIf((eax>>0)&1 == 1, IBSFFV)
1390 fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
1391 fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
1392 fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
1393 fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
1394 fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
1395 fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
1396 fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
1397 fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
1398 fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
1399 fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
1400 fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
1401 }
1402
1403 if maxExtendedFunction() >= 0x8000001f && vend == AMD {
1404 a, _, _, _ := cpuid(0x8000001f)
1405 fs.setIf((a>>0)&1 == 1, SME)
1406 fs.setIf((a>>1)&1 == 1, SEV)
1407 fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
1408 fs.setIf((a>>3)&1 == 1, SEV_ES)
1409 fs.setIf((a>>4)&1 == 1, SEV_SNP)
1410 fs.setIf((a>>5)&1 == 1, VMPL)
1411 fs.setIf((a>>10)&1 == 1, SME_COHERENT)
1412 fs.setIf((a>>11)&1 == 1, SEV_64BIT)
1413 fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
1414 fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
1415 fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
1416 fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
1417 fs.setIf((a>>16)&1 == 1, VTE)
1418 fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
1419 }
1420
1421 if mfi >= 0x20 {
1422 // Microsoft has decided to purposefully hide the information
1423 // of the guest TEE when VMs are being created using Hyper-V.
1424 //
1425 // This leads us to check for the Hyper-V cpuid features
1426 // (0x4000000C), and then for the `ebx` value set.
1427 //
1428 // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
1429 // we're mostly interested about,according to:
1430 // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
1431 _, ebx, _, _ := cpuid(0x4000000C)
1432 fs.setIf(ebx == 0xbe3, TDX_GUEST)
1433 }
1434
1435 if mfi >= 0x21 {
1436 // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
1437 _, ebx, ecx, edx := cpuid(0x21)
1438 identity := string(valAsString(ebx, edx, ecx))
1439 fs.setIf(identity == "IntelTDX ", TDX_GUEST)
1440 }
1441
1442 return fs
1443}
1444
1445func (c *CPUInfo) supportAVX10() uint8 {
1446 if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
1447 _, ebx, _, _ := cpuidex(0x24, 0)
1448 return uint8(ebx)
1449 }
1450 return 0
1451}
1452
1453func valAsString(values ...uint32) []byte {
1454 r := make([]byte, 4*len(values))
1455 for i, v := range values {
1456 dst := r[i*4:]
1457 dst[0] = byte(v & 0xff)
1458 dst[1] = byte((v >> 8) & 0xff)
1459 dst[2] = byte((v >> 16) & 0xff)
1460 dst[3] = byte((v >> 24) & 0xff)
1461 switch {
1462 case dst[0] == 0:
1463 return r[:i*4]
1464 case dst[1] == 0:
1465 return r[:i*4+1]
1466 case dst[2] == 0:
1467 return r[:i*4+2]
1468 case dst[3] == 0:
1469 return r[:i*4+3]
1470 }
1471 }
1472 return r
1473}