diff options
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
| -rw-r--r-- | vendor/github.com/klauspost/cpuid/v2/cpuid.go | 1473 |
1 files changed, 1473 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go new file mode 100644 index 0000000..15b7603 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go | |||
| @@ -0,0 +1,1473 @@ | |||
| 1 | // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. | ||
| 2 | |||
| 3 | // Package cpuid provides information about the CPU running the current program. | ||
| 4 | // | ||
| 5 | // CPU features are detected on startup, and kept for fast access through the life of the application. | ||
| 6 | // Currently x86 / x64 (AMD64) as well as arm64 is supported. | ||
| 7 | // | ||
| 8 | // You can access the CPU information by accessing the shared CPU variable of the cpuid library. | ||
| 9 | // | ||
| 10 | // Package home: https://github.com/klauspost/cpuid | ||
| 11 | package cpuid | ||
| 12 | |||
| 13 | import ( | ||
| 14 | "flag" | ||
| 15 | "fmt" | ||
| 16 | "math" | ||
| 17 | "math/bits" | ||
| 18 | "os" | ||
| 19 | "runtime" | ||
| 20 | "strings" | ||
| 21 | ) | ||
| 22 | |||
| 23 | // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf | ||
| 24 | // and Processor Programming Reference (PPR) | ||
| 25 | |||
| 26 | // Vendor is a representation of a CPU vendor. | ||
| 27 | type Vendor int | ||
| 28 | |||
| 29 | const ( | ||
| 30 | VendorUnknown Vendor = iota | ||
| 31 | Intel | ||
| 32 | AMD | ||
| 33 | VIA | ||
| 34 | Transmeta | ||
| 35 | NSC | ||
| 36 | KVM // Kernel-based Virtual Machine | ||
| 37 | MSVM // Microsoft Hyper-V or Windows Virtual PC | ||
| 38 | VMware | ||
| 39 | XenHVM | ||
| 40 | Bhyve | ||
| 41 | Hygon | ||
| 42 | SiS | ||
| 43 | RDC | ||
| 44 | |||
| 45 | Ampere | ||
| 46 | ARM | ||
| 47 | Broadcom | ||
| 48 | Cavium | ||
| 49 | DEC | ||
| 50 | Fujitsu | ||
| 51 | Infineon | ||
| 52 | Motorola | ||
| 53 | NVIDIA | ||
| 54 | AMCC | ||
| 55 | Qualcomm | ||
| 56 | Marvell | ||
| 57 | |||
| 58 | lastVendor | ||
| 59 | ) | ||
| 60 | |||
| 61 | //go:generate stringer -type=FeatureID,Vendor | ||
| 62 | |||
| 63 | // FeatureID is the ID of a specific cpu feature. | ||
| 64 | type FeatureID int | ||
| 65 | |||
| 66 | const ( | ||
| 67 | // Keep index -1 as unknown | ||
| 68 | UNKNOWN = -1 | ||
| 69 | |||
| 70 | // Add features | ||
| 71 | ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) | ||
| 72 | AESNI // Advanced Encryption Standard New Instructions | ||
| 73 | AMD3DNOW // AMD 3DNOW | ||
| 74 | AMD3DNOWEXT // AMD 3DNowExt | ||
| 75 | AMXBF16 // Tile computational operations on BFLOAT16 numbers | ||
| 76 | AMXFP16 // Tile computational operations on FP16 numbers | ||
| 77 | AMXINT8 // Tile computational operations on 8-bit integers | ||
| 78 | AMXTILE // Tile architecture | ||
| 79 | APX_F // Intel APX | ||
| 80 | AVX // AVX functions | ||
| 81 | AVX10 // If set the Intel AVX10 Converged Vector ISA is supported | ||
| 82 | AVX10_128 // If set indicates that AVX10 128-bit vector support is present | ||
| 83 | AVX10_256 // If set indicates that AVX10 256-bit vector support is present | ||
| 84 | AVX10_512 // If set indicates that AVX10 512-bit vector support is present | ||
| 85 | AVX2 // AVX2 functions | ||
| 86 | AVX512BF16 // AVX-512 BFLOAT16 Instructions | ||
| 87 | AVX512BITALG // AVX-512 Bit Algorithms | ||
| 88 | AVX512BW // AVX-512 Byte and Word Instructions | ||
| 89 | AVX512CD // AVX-512 Conflict Detection Instructions | ||
| 90 | AVX512DQ // AVX-512 Doubleword and Quadword Instructions | ||
| 91 | AVX512ER // AVX-512 Exponential and Reciprocal Instructions | ||
| 92 | AVX512F // AVX-512 Foundation | ||
| 93 | AVX512FP16 // AVX-512 FP16 Instructions | ||
| 94 | AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions | ||
| 95 | AVX512PF // AVX-512 Prefetch Instructions | ||
| 96 | AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions | ||
| 97 | AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 | ||
| 98 | AVX512VL // AVX-512 Vector Length Extensions | ||
| 99 | AVX512VNNI // AVX-512 Vector Neural Network Instructions | ||
| 100 | AVX512VP2INTERSECT // AVX-512 Intersect for D/Q | ||
| 101 | AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword | ||
| 102 | AVXIFMA // AVX-IFMA instructions | ||
| 103 | AVXNECONVERT // AVX-NE-CONVERT instructions | ||
| 104 | AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one | ||
| 105 | AVXVNNI // AVX (VEX encoded) VNNI neural network instructions | ||
| 106 | AVXVNNIINT8 // AVX-VNNI-INT8 instructions | ||
| 107 | BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 | ||
| 108 | BMI1 // Bit Manipulation Instruction Set 1 | ||
| 109 | BMI2 // Bit Manipulation Instruction Set 2 | ||
| 110 | CETIBT // Intel CET Indirect Branch Tracking | ||
| 111 | CETSS // Intel CET Shadow Stack | ||
| 112 | CLDEMOTE // Cache Line Demote | ||
| 113 | CLMUL // Carry-less Multiplication | ||
| 114 | CLZERO // CLZERO instruction supported | ||
| 115 | CMOV // i686 CMOV | ||
| 116 | CMPCCXADD // CMPCCXADD instructions | ||
| 117 | CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB | ||
| 118 | CMPXCHG8 // CMPXCHG8 instruction | ||
| 119 | CPBOOST // Core Performance Boost | ||
| 120 | CPPC // AMD: Collaborative Processor Performance Control | ||
| 121 | CX16 // CMPXCHG16B Instruction | ||
| 122 | EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ | ||
| 123 | ENQCMD // Enqueue Command | ||
| 124 | ERMS // Enhanced REP MOVSB/STOSB | ||
| 125 | F16C // Half-precision floating-point conversion | ||
| 126 | FLUSH_L1D // Flush L1D cache | ||
| 127 | FMA3 // Intel FMA 3. Does not imply AVX. | ||
| 128 | FMA4 // Bulldozer FMA4 functions | ||
| 129 | FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide | ||
| 130 | FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide | ||
| 131 | FSRM // Fast Short Rep Mov | ||
| 132 | FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 | ||
| 133 | FXSROPT // FXSAVE/FXRSTOR optimizations | ||
| 134 | GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. | ||
| 135 | HLE // Hardware Lock Elision | ||
| 136 | HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR | ||
| 137 | HTT // Hyperthreading (enabled) | ||
| 138 | HWA // Hardware assert supported. Indicates support for MSRC001_10 | ||
| 139 | HYBRID_CPU // This part has CPUs of more than one type. | ||
| 140 | HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors | ||
| 141 | IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) | ||
| 142 | IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR | ||
| 143 | IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) | ||
| 144 | IBRS // AMD: Indirect Branch Restricted Speculation | ||
| 145 | IBRS_PREFERRED // AMD: IBRS is preferred over software solution | ||
| 146 | IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection | ||
| 147 | IBS // Instruction Based Sampling (AMD) | ||
| 148 | IBSBRNTRGT // Instruction Based Sampling Feature (AMD) | ||
| 149 | IBSFETCHSAM // Instruction Based Sampling Feature (AMD) | ||
| 150 | IBSFFV // Instruction Based Sampling Feature (AMD) | ||
| 151 | IBSOPCNT // Instruction Based Sampling Feature (AMD) | ||
| 152 | IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) | ||
| 153 | IBSOPSAM // Instruction Based Sampling Feature (AMD) | ||
| 154 | IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) | ||
| 155 | IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) | ||
| 156 | IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported | ||
| 157 | IBS_OPDATA4 // AMD: IBS op data 4 MSR supported | ||
| 158 | IBS_OPFUSE // AMD: Indicates support for IbsOpFuse | ||
| 159 | IBS_PREVENTHOST // Disallowing IBS use by the host supported | ||
| 160 | IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 | ||
| 161 | IDPRED_CTRL // IPRED_DIS | ||
| 162 | INT_WBINVD // WBINVD/WBNOINVD are interruptible. | ||
| 163 | INVLPGB // NVLPGB and TLBSYNC instruction supported | ||
| 164 | KEYLOCKER // Key locker | ||
| 165 | KEYLOCKERW // Key locker wide | ||
| 166 | LAHF // LAHF/SAHF in long mode | ||
| 167 | LAM // If set, CPU supports Linear Address Masking | ||
| 168 | LBRVIRT // LBR virtualization | ||
| 169 | LZCNT // LZCNT instruction | ||
| 170 | MCAOVERFLOW // MCA overflow recovery support. | ||
| 171 | MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. | ||
| 172 | MCOMMIT // MCOMMIT instruction supported | ||
| 173 | MD_CLEAR // VERW clears CPU buffers | ||
| 174 | MMX // standard MMX | ||
| 175 | MMXEXT // SSE integer functions or AMD MMX ext | ||
| 176 | MOVBE // MOVBE instruction (big-endian) | ||
| 177 | MOVDIR64B // Move 64 Bytes as Direct Store | ||
| 178 | MOVDIRI // Move Doubleword as Direct Store | ||
| 179 | MOVSB_ZL // Fast Zero-Length MOVSB | ||
| 180 | MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | ||
| 181 | MPX // Intel MPX (Memory Protection Extensions) | ||
| 182 | MSRIRC // Instruction Retired Counter MSR available | ||
| 183 | MSRLIST // Read/Write List of Model Specific Registers | ||
| 184 | MSR_PAGEFLUSH // Page Flush MSR available | ||
| 185 | NRIPS // Indicates support for NRIP save on VMEXIT | ||
| 186 | NX // NX (No-Execute) bit | ||
| 187 | OSXSAVE // XSAVE enabled by OS | ||
| 188 | PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption | ||
| 189 | POPCNT // POPCNT instruction | ||
| 190 | PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled | ||
| 191 | PREFETCHI // PREFETCHIT0/1 instructions | ||
| 192 | PSFD // Predictive Store Forward Disable | ||
| 193 | RDPRU // RDPRU instruction supported | ||
| 194 | RDRAND // RDRAND instruction is available | ||
| 195 | RDSEED // RDSEED instruction is available | ||
| 196 | RDTSCP // RDTSCP Instruction | ||
| 197 | RRSBA_CTRL // Restricted RSB Alternate | ||
| 198 | RTM // Restricted Transactional Memory | ||
| 199 | RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. | ||
| 200 | SERIALIZE // Serialize Instruction Execution | ||
| 201 | SEV // AMD Secure Encrypted Virtualization supported | ||
| 202 | SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host | ||
| 203 | SEV_ALTERNATIVE // AMD SEV Alternate Injection supported | ||
| 204 | SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests | ||
| 205 | SEV_ES // AMD SEV Encrypted State supported | ||
| 206 | SEV_RESTRICTED // AMD SEV Restricted Injection supported | ||
| 207 | SEV_SNP // AMD SEV Secure Nested Paging supported | ||
| 208 | SGX // Software Guard Extensions | ||
| 209 | SGXLC // Software Guard Extensions Launch Control | ||
| 210 | SHA // Intel SHA Extensions | ||
| 211 | SME // AMD Secure Memory Encryption supported | ||
| 212 | SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced | ||
| 213 | SPEC_CTRL_SSBD // Speculative Store Bypass Disable | ||
| 214 | SRBDS_CTRL // SRBDS mitigation MSR available | ||
| 215 | SSE // SSE functions | ||
| 216 | SSE2 // P4 SSE functions | ||
| 217 | SSE3 // Prescott SSE3 functions | ||
| 218 | SSE4 // Penryn SSE4.1 functions | ||
| 219 | SSE42 // Nehalem SSE4.2 functions | ||
| 220 | SSE4A // AMD Barcelona microarchitecture SSE4a instructions | ||
| 221 | SSSE3 // Conroe SSSE3 functions | ||
| 222 | STIBP // Single Thread Indirect Branch Predictors | ||
| 223 | STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On | ||
| 224 | STOSB_SHORT // Fast short STOSB | ||
| 225 | SUCCOR // Software uncorrectable error containment and recovery capability. | ||
| 226 | SVM // AMD Secure Virtual Machine | ||
| 227 | SVMDA // Indicates support for the SVM decode assists. | ||
| 228 | SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control | ||
| 229 | SVML // AMD SVM lock. Indicates support for SVM-Lock. | ||
| 230 | SVMNP // AMD SVM nested paging | ||
| 231 | SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter | ||
| 232 | SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold | ||
| 233 | SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. | ||
| 234 | SYSEE // SYSENTER and SYSEXIT instructions | ||
| 235 | TBM // AMD Trailing Bit Manipulation | ||
| 236 | TDX_GUEST // Intel Trust Domain Extensions Guest | ||
| 237 | TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations | ||
| 238 | TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. | ||
| 239 | TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. | ||
| 240 | TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 | ||
| 241 | TSXLDTRK // Intel TSX Suspend Load Address Tracking | ||
| 242 | VAES // Vector AES. AVX(512) versions requires additional checks. | ||
| 243 | VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. | ||
| 244 | VMPL // AMD VM Permission Levels supported | ||
| 245 | VMSA_REGPROT // AMD VMSA Register Protection supported | ||
| 246 | VMX // Virtual Machine Extensions | ||
| 247 | VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. | ||
| 248 | VTE // AMD Virtual Transparent Encryption supported | ||
| 249 | WAITPKG // TPAUSE, UMONITOR, UMWAIT | ||
| 250 | WBNOINVD // Write Back and Do Not Invalidate Cache | ||
| 251 | WRMSRNS // Non-Serializing Write to Model Specific Register | ||
| 252 | X87 // FPU | ||
| 253 | XGETBV1 // Supports XGETBV with ECX = 1 | ||
| 254 | XOP // Bulldozer XOP functions | ||
| 255 | XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV | ||
| 256 | XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. | ||
| 257 | XSAVEOPT // XSAVEOPT available | ||
| 258 | XSAVES // Supports XSAVES/XRSTORS and IA32_XSS | ||
| 259 | |||
| 260 | // ARM features: | ||
| 261 | AESARM // AES instructions | ||
| 262 | ARMCPUID // Some CPU ID registers readable at user-level | ||
| 263 | ASIMD // Advanced SIMD | ||
| 264 | ASIMDDP // SIMD Dot Product | ||
| 265 | ASIMDHP // Advanced SIMD half-precision floating point | ||
| 266 | ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) | ||
| 267 | ATOMICS // Large System Extensions (LSE) | ||
| 268 | CRC32 // CRC32/CRC32C instructions | ||
| 269 | DCPOP // Data cache clean to Point of Persistence (DC CVAP) | ||
| 270 | EVTSTRM // Generic timer | ||
| 271 | FCMA // Floatin point complex number addition and multiplication | ||
| 272 | FP // Single-precision and double-precision floating point | ||
| 273 | FPHP // Half-precision floating point | ||
| 274 | GPA // Generic Pointer Authentication | ||
| 275 | JSCVT // Javascript-style double->int convert (FJCVTZS) | ||
| 276 | LRCPC // Weaker release consistency (LDAPR, etc) | ||
| 277 | PMULL // Polynomial Multiply instructions (PMULL/PMULL2) | ||
| 278 | SHA1 // SHA-1 instructions (SHA1C, etc) | ||
| 279 | SHA2 // SHA-2 instructions (SHA256H, etc) | ||
| 280 | SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) | ||
| 281 | SHA512 // SHA512 instructions | ||
| 282 | SM3 // SM3 instructions | ||
| 283 | SM4 // SM4 instructions | ||
| 284 | SVE // Scalable Vector Extension | ||
| 285 | // Keep it last. It automatically defines the size of []flagSet | ||
| 286 | lastID | ||
| 287 | |||
| 288 | firstID FeatureID = UNKNOWN + 1 | ||
| 289 | ) | ||
| 290 | |||
| 291 | // CPUInfo contains information about the detected system CPU. | ||
| 292 | type CPUInfo struct { | ||
| 293 | BrandName string // Brand name reported by the CPU | ||
| 294 | VendorID Vendor // Comparable CPU vendor ID | ||
| 295 | VendorString string // Raw vendor string. | ||
| 296 | featureSet flagSet // Features of the CPU | ||
| 297 | PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. | ||
| 298 | ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. | ||
| 299 | LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. | ||
| 300 | Family int // CPU family number | ||
| 301 | Model int // CPU model number | ||
| 302 | Stepping int // CPU stepping info | ||
| 303 | CacheLine int // Cache line size in bytes. Will be 0 if undetectable. | ||
| 304 | Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. | ||
| 305 | BoostFreq int64 // Max clock speed, if known, 0 otherwise | ||
| 306 | Cache struct { | ||
| 307 | L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected | ||
| 308 | L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected | ||
| 309 | L2 int // L2 Cache (per core or shared). Will be -1 if undetected | ||
| 310 | L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected | ||
| 311 | } | ||
| 312 | SGX SGXSupport | ||
| 313 | AVX10Level uint8 | ||
| 314 | maxFunc uint32 | ||
| 315 | maxExFunc uint32 | ||
| 316 | } | ||
| 317 | |||
| 318 | var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) | ||
| 319 | var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) | ||
| 320 | var xgetbv func(index uint32) (eax, edx uint32) | ||
| 321 | var rdtscpAsm func() (eax, ebx, ecx, edx uint32) | ||
| 322 | var darwinHasAVX512 = func() bool { return false } | ||
| 323 | |||
| 324 | // CPU contains information about the CPU as detected on startup, | ||
| 325 | // or when Detect last was called. | ||
| 326 | // | ||
| 327 | // Use this as the primary entry point to you data. | ||
| 328 | var CPU CPUInfo | ||
| 329 | |||
| 330 | func init() { | ||
| 331 | initCPU() | ||
| 332 | Detect() | ||
| 333 | } | ||
| 334 | |||
| 335 | // Detect will re-detect current CPU info. | ||
| 336 | // This will replace the content of the exported CPU variable. | ||
| 337 | // | ||
| 338 | // Unless you expect the CPU to change while you are running your program | ||
| 339 | // you should not need to call this function. | ||
| 340 | // If you call this, you must ensure that no other goroutine is accessing the | ||
| 341 | // exported CPU variable. | ||
| 342 | func Detect() { | ||
| 343 | // Set defaults | ||
| 344 | CPU.ThreadsPerCore = 1 | ||
| 345 | CPU.Cache.L1I = -1 | ||
| 346 | CPU.Cache.L1D = -1 | ||
| 347 | CPU.Cache.L2 = -1 | ||
| 348 | CPU.Cache.L3 = -1 | ||
| 349 | safe := true | ||
| 350 | if detectArmFlag != nil { | ||
| 351 | safe = !*detectArmFlag | ||
| 352 | } | ||
| 353 | addInfo(&CPU, safe) | ||
| 354 | if displayFeats != nil && *displayFeats { | ||
| 355 | fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) | ||
| 356 | // Exit with non-zero so tests will print value. | ||
| 357 | os.Exit(1) | ||
| 358 | } | ||
| 359 | if disableFlag != nil { | ||
| 360 | s := strings.Split(*disableFlag, ",") | ||
| 361 | for _, feat := range s { | ||
| 362 | feat := ParseFeature(strings.TrimSpace(feat)) | ||
| 363 | if feat != UNKNOWN { | ||
| 364 | CPU.featureSet.unset(feat) | ||
| 365 | } | ||
| 366 | } | ||
| 367 | } | ||
| 368 | } | ||
| 369 | |||
| 370 | // DetectARM will detect ARM64 features. | ||
| 371 | // This is NOT done automatically since it can potentially crash | ||
| 372 | // if the OS does not handle the command. | ||
| 373 | // If in the future this can be done safely this function may not | ||
| 374 | // do anything. | ||
| 375 | func DetectARM() { | ||
| 376 | addInfo(&CPU, false) | ||
| 377 | } | ||
| 378 | |||
| 379 | var detectArmFlag *bool | ||
| 380 | var displayFeats *bool | ||
| 381 | var disableFlag *string | ||
| 382 | |||
| 383 | // Flags will enable flags. | ||
| 384 | // This must be called *before* flag.Parse AND | ||
| 385 | // Detect must be called after the flags have been parsed. | ||
| 386 | // Note that this means that any detection used in init() functions | ||
| 387 | // will not contain these flags. | ||
| 388 | func Flags() { | ||
| 389 | disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") | ||
| 390 | displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") | ||
| 391 | detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") | ||
| 392 | } | ||
| 393 | |||
| 394 | // Supports returns whether the CPU supports all of the requested features. | ||
| 395 | func (c CPUInfo) Supports(ids ...FeatureID) bool { | ||
| 396 | for _, id := range ids { | ||
| 397 | if !c.featureSet.inSet(id) { | ||
| 398 | return false | ||
| 399 | } | ||
| 400 | } | ||
| 401 | return true | ||
| 402 | } | ||
| 403 | |||
| 404 | // Has allows for checking a single feature. | ||
| 405 | // Should be inlined by the compiler. | ||
| 406 | func (c *CPUInfo) Has(id FeatureID) bool { | ||
| 407 | return c.featureSet.inSet(id) | ||
| 408 | } | ||
| 409 | |||
| 410 | // AnyOf returns whether the CPU supports one or more of the requested features. | ||
| 411 | func (c CPUInfo) AnyOf(ids ...FeatureID) bool { | ||
| 412 | for _, id := range ids { | ||
| 413 | if c.featureSet.inSet(id) { | ||
| 414 | return true | ||
| 415 | } | ||
| 416 | } | ||
| 417 | return false | ||
| 418 | } | ||
| 419 | |||
| 420 | // Features contains several features combined for a fast check using | ||
| 421 | // CpuInfo.HasAll | ||
| 422 | type Features *flagSet | ||
| 423 | |||
| 424 | // CombineFeatures allows to combine several features for a close to constant time lookup. | ||
| 425 | func CombineFeatures(ids ...FeatureID) Features { | ||
| 426 | var v flagSet | ||
| 427 | for _, id := range ids { | ||
| 428 | v.set(id) | ||
| 429 | } | ||
| 430 | return &v | ||
| 431 | } | ||
| 432 | |||
| 433 | func (c *CPUInfo) HasAll(f Features) bool { | ||
| 434 | return c.featureSet.hasSetP(f) | ||
| 435 | } | ||
| 436 | |||
| 437 | // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels | ||
| 438 | var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) | ||
| 439 | var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) | ||
| 440 | var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) | ||
| 441 | var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) | ||
| 442 | var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) | ||
| 443 | |||
| 444 | // X64Level returns the microarchitecture level detected on the CPU. | ||
| 445 | // If features are lacking or non x64 mode, 0 is returned. | ||
| 446 | // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels | ||
| 447 | func (c CPUInfo) X64Level() int { | ||
| 448 | if !c.featureSet.hasOneOf(oneOfLevel) { | ||
| 449 | return 0 | ||
| 450 | } | ||
| 451 | if c.featureSet.hasSetP(level4Features) { | ||
| 452 | return 4 | ||
| 453 | } | ||
| 454 | if c.featureSet.hasSetP(level3Features) { | ||
| 455 | return 3 | ||
| 456 | } | ||
| 457 | if c.featureSet.hasSetP(level2Features) { | ||
| 458 | return 2 | ||
| 459 | } | ||
| 460 | if c.featureSet.hasSetP(level1Features) { | ||
| 461 | return 1 | ||
| 462 | } | ||
| 463 | return 0 | ||
| 464 | } | ||
| 465 | |||
| 466 | // Disable will disable one or several features. | ||
| 467 | func (c *CPUInfo) Disable(ids ...FeatureID) bool { | ||
| 468 | for _, id := range ids { | ||
| 469 | c.featureSet.unset(id) | ||
| 470 | } | ||
| 471 | return true | ||
| 472 | } | ||
| 473 | |||
| 474 | // Enable will disable one or several features even if they were undetected. | ||
| 475 | // This is of course not recommended for obvious reasons. | ||
| 476 | func (c *CPUInfo) Enable(ids ...FeatureID) bool { | ||
| 477 | for _, id := range ids { | ||
| 478 | c.featureSet.set(id) | ||
| 479 | } | ||
| 480 | return true | ||
| 481 | } | ||
| 482 | |||
| 483 | // IsVendor returns true if vendor is recognized as Intel | ||
| 484 | func (c CPUInfo) IsVendor(v Vendor) bool { | ||
| 485 | return c.VendorID == v | ||
| 486 | } | ||
| 487 | |||
| 488 | // FeatureSet returns all available features as strings. | ||
| 489 | func (c CPUInfo) FeatureSet() []string { | ||
| 490 | s := make([]string, 0, c.featureSet.nEnabled()) | ||
| 491 | s = append(s, c.featureSet.Strings()...) | ||
| 492 | return s | ||
| 493 | } | ||
| 494 | |||
| 495 | // RTCounter returns the 64-bit time-stamp counter | ||
| 496 | // Uses the RDTSCP instruction. The value 0 is returned | ||
| 497 | // if the CPU does not support the instruction. | ||
| 498 | func (c CPUInfo) RTCounter() uint64 { | ||
| 499 | if !c.Supports(RDTSCP) { | ||
| 500 | return 0 | ||
| 501 | } | ||
| 502 | a, _, _, d := rdtscpAsm() | ||
| 503 | return uint64(a) | (uint64(d) << 32) | ||
| 504 | } | ||
| 505 | |||
| 506 | // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. | ||
| 507 | // This variable is OS dependent, but on Linux contains information | ||
| 508 | // about the current cpu/core the code is running on. | ||
| 509 | // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. | ||
| 510 | func (c CPUInfo) Ia32TscAux() uint32 { | ||
| 511 | if !c.Supports(RDTSCP) { | ||
| 512 | return 0 | ||
| 513 | } | ||
| 514 | _, _, ecx, _ := rdtscpAsm() | ||
| 515 | return ecx | ||
| 516 | } | ||
| 517 | |||
| 518 | // LogicalCPU will return the Logical CPU the code is currently executing on. | ||
| 519 | // This is likely to change when the OS re-schedules the running thread | ||
| 520 | // to another CPU. | ||
| 521 | // If the current core cannot be detected, -1 will be returned. | ||
| 522 | func (c CPUInfo) LogicalCPU() int { | ||
| 523 | if c.maxFunc < 1 { | ||
| 524 | return -1 | ||
| 525 | } | ||
| 526 | _, ebx, _, _ := cpuid(1) | ||
| 527 | return int(ebx >> 24) | ||
| 528 | } | ||
| 529 | |||
| 530 | // frequencies tries to compute the clock speed of the CPU. If leaf 15 is | ||
| 531 | // supported, use it, otherwise parse the brand string. Yes, really. | ||
| 532 | func (c *CPUInfo) frequencies() { | ||
| 533 | c.Hz, c.BoostFreq = 0, 0 | ||
| 534 | mfi := maxFunctionID() | ||
| 535 | if mfi >= 0x15 { | ||
| 536 | eax, ebx, ecx, _ := cpuid(0x15) | ||
| 537 | if eax != 0 && ebx != 0 && ecx != 0 { | ||
| 538 | c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) | ||
| 539 | } | ||
| 540 | } | ||
| 541 | if mfi >= 0x16 { | ||
| 542 | a, b, _, _ := cpuid(0x16) | ||
| 543 | // Base... | ||
| 544 | if a&0xffff > 0 { | ||
| 545 | c.Hz = int64(a&0xffff) * 1_000_000 | ||
| 546 | } | ||
| 547 | // Boost... | ||
| 548 | if b&0xffff > 0 { | ||
| 549 | c.BoostFreq = int64(b&0xffff) * 1_000_000 | ||
| 550 | } | ||
| 551 | } | ||
| 552 | if c.Hz > 0 { | ||
| 553 | return | ||
| 554 | } | ||
| 555 | |||
| 556 | // computeHz determines the official rated speed of a CPU from its brand | ||
| 557 | // string. This insanity is *actually the official documented way to do | ||
| 558 | // this according to Intel*, prior to leaf 0x15 existing. The official | ||
| 559 | // documentation only shows this working for exactly `x.xx` or `xxxx` | ||
| 560 | // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other | ||
| 561 | // sizes. | ||
| 562 | model := c.BrandName | ||
| 563 | hz := strings.LastIndex(model, "Hz") | ||
| 564 | if hz < 3 { | ||
| 565 | return | ||
| 566 | } | ||
| 567 | var multiplier int64 | ||
| 568 | switch model[hz-1] { | ||
| 569 | case 'M': | ||
| 570 | multiplier = 1000 * 1000 | ||
| 571 | case 'G': | ||
| 572 | multiplier = 1000 * 1000 * 1000 | ||
| 573 | case 'T': | ||
| 574 | multiplier = 1000 * 1000 * 1000 * 1000 | ||
| 575 | } | ||
| 576 | if multiplier == 0 { | ||
| 577 | return | ||
| 578 | } | ||
| 579 | freq := int64(0) | ||
| 580 | divisor := int64(0) | ||
| 581 | decimalShift := int64(1) | ||
| 582 | var i int | ||
| 583 | for i = hz - 2; i >= 0 && model[i] != ' '; i-- { | ||
| 584 | if model[i] >= '0' && model[i] <= '9' { | ||
| 585 | freq += int64(model[i]-'0') * decimalShift | ||
| 586 | decimalShift *= 10 | ||
| 587 | } else if model[i] == '.' { | ||
| 588 | if divisor != 0 { | ||
| 589 | return | ||
| 590 | } | ||
| 591 | divisor = decimalShift | ||
| 592 | } else { | ||
| 593 | return | ||
| 594 | } | ||
| 595 | } | ||
| 596 | // we didn't find a space | ||
| 597 | if i < 0 { | ||
| 598 | return | ||
| 599 | } | ||
| 600 | if divisor != 0 { | ||
| 601 | c.Hz = (freq * multiplier) / divisor | ||
| 602 | return | ||
| 603 | } | ||
| 604 | c.Hz = freq * multiplier | ||
| 605 | } | ||
| 606 | |||
| 607 | // VM Will return true if the cpu id indicates we are in | ||
| 608 | // a virtual machine. | ||
| 609 | func (c CPUInfo) VM() bool { | ||
| 610 | return CPU.featureSet.inSet(HYPERVISOR) | ||
| 611 | } | ||
| 612 | |||
| 613 | // flags contains detected cpu features and characteristics | ||
| 614 | type flags uint64 | ||
| 615 | |||
| 616 | // log2(bits_in_uint64) | ||
| 617 | const flagBitsLog2 = 6 | ||
| 618 | const flagBits = 1 << flagBitsLog2 | ||
| 619 | const flagMask = flagBits - 1 | ||
| 620 | |||
| 621 | // flagSet contains detected cpu features and characteristics in an array of flags | ||
| 622 | type flagSet [(lastID + flagMask) / flagBits]flags | ||
| 623 | |||
| 624 | func (s *flagSet) inSet(feat FeatureID) bool { | ||
| 625 | return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 | ||
| 626 | } | ||
| 627 | |||
| 628 | func (s *flagSet) set(feat FeatureID) { | ||
| 629 | s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) | ||
| 630 | } | ||
| 631 | |||
| 632 | // setIf will set a feature if boolean is true. | ||
| 633 | func (s *flagSet) setIf(cond bool, features ...FeatureID) { | ||
| 634 | if cond { | ||
| 635 | for _, offset := range features { | ||
| 636 | s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) | ||
| 637 | } | ||
| 638 | } | ||
| 639 | } | ||
| 640 | |||
| 641 | func (s *flagSet) unset(offset FeatureID) { | ||
| 642 | bit := flags(1 << (offset & flagMask)) | ||
| 643 | s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit | ||
| 644 | } | ||
| 645 | |||
| 646 | // or with another flagset. | ||
| 647 | func (s *flagSet) or(other flagSet) { | ||
| 648 | for i, v := range other[:] { | ||
| 649 | s[i] |= v | ||
| 650 | } | ||
| 651 | } | ||
| 652 | |||
| 653 | // hasSet returns whether all features are present. | ||
| 654 | func (s *flagSet) hasSet(other flagSet) bool { | ||
| 655 | for i, v := range other[:] { | ||
| 656 | if s[i]&v != v { | ||
| 657 | return false | ||
| 658 | } | ||
| 659 | } | ||
| 660 | return true | ||
| 661 | } | ||
| 662 | |||
| 663 | // hasSet returns whether all features are present. | ||
| 664 | func (s *flagSet) hasSetP(other *flagSet) bool { | ||
| 665 | for i, v := range other[:] { | ||
| 666 | if s[i]&v != v { | ||
| 667 | return false | ||
| 668 | } | ||
| 669 | } | ||
| 670 | return true | ||
| 671 | } | ||
| 672 | |||
| 673 | // hasOneOf returns whether one or more features are present. | ||
| 674 | func (s *flagSet) hasOneOf(other *flagSet) bool { | ||
| 675 | for i, v := range other[:] { | ||
| 676 | if s[i]&v != 0 { | ||
| 677 | return true | ||
| 678 | } | ||
| 679 | } | ||
| 680 | return false | ||
| 681 | } | ||
| 682 | |||
| 683 | // nEnabled will return the number of enabled flags. | ||
| 684 | func (s *flagSet) nEnabled() (n int) { | ||
| 685 | for _, v := range s[:] { | ||
| 686 | n += bits.OnesCount64(uint64(v)) | ||
| 687 | } | ||
| 688 | return n | ||
| 689 | } | ||
| 690 | |||
| 691 | func flagSetWith(feat ...FeatureID) flagSet { | ||
| 692 | var res flagSet | ||
| 693 | for _, f := range feat { | ||
| 694 | res.set(f) | ||
| 695 | } | ||
| 696 | return res | ||
| 697 | } | ||
| 698 | |||
| 699 | // ParseFeature will parse the string and return the ID of the matching feature. | ||
| 700 | // Will return UNKNOWN if not found. | ||
| 701 | func ParseFeature(s string) FeatureID { | ||
| 702 | s = strings.ToUpper(s) | ||
| 703 | for i := firstID; i < lastID; i++ { | ||
| 704 | if i.String() == s { | ||
| 705 | return i | ||
| 706 | } | ||
| 707 | } | ||
| 708 | return UNKNOWN | ||
| 709 | } | ||
| 710 | |||
| 711 | // Strings returns an array of the detected features for FlagsSet. | ||
| 712 | func (s flagSet) Strings() []string { | ||
| 713 | if len(s) == 0 { | ||
| 714 | return []string{""} | ||
| 715 | } | ||
| 716 | r := make([]string, 0) | ||
| 717 | for i := firstID; i < lastID; i++ { | ||
| 718 | if s.inSet(i) { | ||
| 719 | r = append(r, i.String()) | ||
| 720 | } | ||
| 721 | } | ||
| 722 | return r | ||
| 723 | } | ||
| 724 | |||
| 725 | func maxExtendedFunction() uint32 { | ||
| 726 | eax, _, _, _ := cpuid(0x80000000) | ||
| 727 | return eax | ||
| 728 | } | ||
| 729 | |||
| 730 | func maxFunctionID() uint32 { | ||
| 731 | a, _, _, _ := cpuid(0) | ||
| 732 | return a | ||
| 733 | } | ||
| 734 | |||
| 735 | func brandName() string { | ||
| 736 | if maxExtendedFunction() >= 0x80000004 { | ||
| 737 | v := make([]uint32, 0, 48) | ||
| 738 | for i := uint32(0); i < 3; i++ { | ||
| 739 | a, b, c, d := cpuid(0x80000002 + i) | ||
| 740 | v = append(v, a, b, c, d) | ||
| 741 | } | ||
| 742 | return strings.Trim(string(valAsString(v...)), " ") | ||
| 743 | } | ||
| 744 | return "unknown" | ||
| 745 | } | ||
| 746 | |||
| 747 | func threadsPerCore() int { | ||
| 748 | mfi := maxFunctionID() | ||
| 749 | vend, _ := vendorID() | ||
| 750 | |||
| 751 | if mfi < 0x4 || (vend != Intel && vend != AMD) { | ||
| 752 | return 1 | ||
| 753 | } | ||
| 754 | |||
| 755 | if mfi < 0xb { | ||
| 756 | if vend != Intel { | ||
| 757 | return 1 | ||
| 758 | } | ||
| 759 | _, b, _, d := cpuid(1) | ||
| 760 | if (d & (1 << 28)) != 0 { | ||
| 761 | // v will contain logical core count | ||
| 762 | v := (b >> 16) & 255 | ||
| 763 | if v > 1 { | ||
| 764 | a4, _, _, _ := cpuid(4) | ||
| 765 | // physical cores | ||
| 766 | v2 := (a4 >> 26) + 1 | ||
| 767 | if v2 > 0 { | ||
| 768 | return int(v) / int(v2) | ||
| 769 | } | ||
| 770 | } | ||
| 771 | } | ||
| 772 | return 1 | ||
| 773 | } | ||
| 774 | _, b, _, _ := cpuidex(0xb, 0) | ||
| 775 | if b&0xffff == 0 { | ||
| 776 | if vend == AMD { | ||
| 777 | // Workaround for AMD returning 0, assume 2 if >= Zen 2 | ||
| 778 | // It will be more correct than not. | ||
| 779 | fam, _, _ := familyModel() | ||
| 780 | _, _, _, d := cpuid(1) | ||
| 781 | if (d&(1<<28)) != 0 && fam >= 23 { | ||
| 782 | return 2 | ||
| 783 | } | ||
| 784 | } | ||
| 785 | return 1 | ||
| 786 | } | ||
| 787 | return int(b & 0xffff) | ||
| 788 | } | ||
| 789 | |||
| 790 | func logicalCores() int { | ||
| 791 | mfi := maxFunctionID() | ||
| 792 | v, _ := vendorID() | ||
| 793 | switch v { | ||
| 794 | case Intel: | ||
| 795 | // Use this on old Intel processors | ||
| 796 | if mfi < 0xb { | ||
| 797 | if mfi < 1 { | ||
| 798 | return 0 | ||
| 799 | } | ||
| 800 | // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) | ||
| 801 | // that can be assigned to logical processors in a physical package. | ||
| 802 | // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. | ||
| 803 | _, ebx, _, _ := cpuid(1) | ||
| 804 | logical := (ebx >> 16) & 0xff | ||
| 805 | return int(logical) | ||
| 806 | } | ||
| 807 | _, b, _, _ := cpuidex(0xb, 1) | ||
| 808 | return int(b & 0xffff) | ||
| 809 | case AMD, Hygon: | ||
| 810 | _, b, _, _ := cpuid(1) | ||
| 811 | return int((b >> 16) & 0xff) | ||
| 812 | default: | ||
| 813 | return 0 | ||
| 814 | } | ||
| 815 | } | ||
| 816 | |||
| 817 | func familyModel() (family, model, stepping int) { | ||
| 818 | if maxFunctionID() < 0x1 { | ||
| 819 | return 0, 0, 0 | ||
| 820 | } | ||
| 821 | eax, _, _, _ := cpuid(1) | ||
| 822 | // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. | ||
| 823 | family = int((eax >> 8) & 0xf) | ||
| 824 | extFam := family == 0x6 // Intel is 0x6, needs extended model. | ||
| 825 | if family == 0xf { | ||
| 826 | // Add ExtFamily | ||
| 827 | family += int((eax >> 20) & 0xff) | ||
| 828 | extFam = true | ||
| 829 | } | ||
| 830 | // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. | ||
| 831 | model = int((eax >> 4) & 0xf) | ||
| 832 | if extFam { | ||
| 833 | // Add ExtModel | ||
| 834 | model += int((eax >> 12) & 0xf0) | ||
| 835 | } | ||
| 836 | stepping = int(eax & 0xf) | ||
| 837 | return family, model, stepping | ||
| 838 | } | ||
| 839 | |||
| 840 | func physicalCores() int { | ||
| 841 | v, _ := vendorID() | ||
| 842 | switch v { | ||
| 843 | case Intel: | ||
| 844 | return logicalCores() / threadsPerCore() | ||
| 845 | case AMD, Hygon: | ||
| 846 | lc := logicalCores() | ||
| 847 | tpc := threadsPerCore() | ||
| 848 | if lc > 0 && tpc > 0 { | ||
| 849 | return lc / tpc | ||
| 850 | } | ||
| 851 | |||
| 852 | // The following is inaccurate on AMD EPYC 7742 64-Core Processor | ||
| 853 | if maxExtendedFunction() >= 0x80000008 { | ||
| 854 | _, _, c, _ := cpuid(0x80000008) | ||
| 855 | if c&0xff > 0 { | ||
| 856 | return int(c&0xff) + 1 | ||
| 857 | } | ||
| 858 | } | ||
| 859 | } | ||
| 860 | return 0 | ||
| 861 | } | ||
| 862 | |||
| 863 | // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID | ||
| 864 | var vendorMapping = map[string]Vendor{ | ||
| 865 | "AMDisbetter!": AMD, | ||
| 866 | "AuthenticAMD": AMD, | ||
| 867 | "CentaurHauls": VIA, | ||
| 868 | "GenuineIntel": Intel, | ||
| 869 | "TransmetaCPU": Transmeta, | ||
| 870 | "GenuineTMx86": Transmeta, | ||
| 871 | "Geode by NSC": NSC, | ||
| 872 | "VIA VIA VIA ": VIA, | ||
| 873 | "KVMKVMKVMKVM": KVM, | ||
| 874 | "Microsoft Hv": MSVM, | ||
| 875 | "VMwareVMware": VMware, | ||
| 876 | "XenVMMXenVMM": XenHVM, | ||
| 877 | "bhyve bhyve ": Bhyve, | ||
| 878 | "HygonGenuine": Hygon, | ||
| 879 | "Vortex86 SoC": SiS, | ||
| 880 | "SiS SiS SiS ": SiS, | ||
| 881 | "RiseRiseRise": SiS, | ||
| 882 | "Genuine RDC": RDC, | ||
| 883 | } | ||
| 884 | |||
| 885 | func vendorID() (Vendor, string) { | ||
| 886 | _, b, c, d := cpuid(0) | ||
| 887 | v := string(valAsString(b, d, c)) | ||
| 888 | vend, ok := vendorMapping[v] | ||
| 889 | if !ok { | ||
| 890 | return VendorUnknown, v | ||
| 891 | } | ||
| 892 | return vend, v | ||
| 893 | } | ||
| 894 | |||
| 895 | func cacheLine() int { | ||
| 896 | if maxFunctionID() < 0x1 { | ||
| 897 | return 0 | ||
| 898 | } | ||
| 899 | |||
| 900 | _, ebx, _, _ := cpuid(1) | ||
| 901 | cache := (ebx & 0xff00) >> 5 // cflush size | ||
| 902 | if cache == 0 && maxExtendedFunction() >= 0x80000006 { | ||
| 903 | _, _, ecx, _ := cpuid(0x80000006) | ||
| 904 | cache = ecx & 0xff // cacheline size | ||
| 905 | } | ||
| 906 | // TODO: Read from Cache and TLB Information | ||
| 907 | return int(cache) | ||
| 908 | } | ||
| 909 | |||
| 910 | func (c *CPUInfo) cacheSize() { | ||
| 911 | c.Cache.L1D = -1 | ||
| 912 | c.Cache.L1I = -1 | ||
| 913 | c.Cache.L2 = -1 | ||
| 914 | c.Cache.L3 = -1 | ||
| 915 | vendor, _ := vendorID() | ||
| 916 | switch vendor { | ||
| 917 | case Intel: | ||
| 918 | if maxFunctionID() < 4 { | ||
| 919 | return | ||
| 920 | } | ||
| 921 | c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 | ||
| 922 | for i := uint32(0); ; i++ { | ||
| 923 | eax, ebx, ecx, _ := cpuidex(4, i) | ||
| 924 | cacheType := eax & 15 | ||
| 925 | if cacheType == 0 { | ||
| 926 | break | ||
| 927 | } | ||
| 928 | cacheLevel := (eax >> 5) & 7 | ||
| 929 | coherency := int(ebx&0xfff) + 1 | ||
| 930 | partitions := int((ebx>>12)&0x3ff) + 1 | ||
| 931 | associativity := int((ebx>>22)&0x3ff) + 1 | ||
| 932 | sets := int(ecx) + 1 | ||
| 933 | size := associativity * partitions * coherency * sets | ||
| 934 | switch cacheLevel { | ||
| 935 | case 1: | ||
| 936 | if cacheType == 1 { | ||
| 937 | // 1 = Data Cache | ||
| 938 | c.Cache.L1D = size | ||
| 939 | } else if cacheType == 2 { | ||
| 940 | // 2 = Instruction Cache | ||
| 941 | c.Cache.L1I = size | ||
| 942 | } else { | ||
| 943 | if c.Cache.L1D < 0 { | ||
| 944 | c.Cache.L1I = size | ||
| 945 | } | ||
| 946 | if c.Cache.L1I < 0 { | ||
| 947 | c.Cache.L1I = size | ||
| 948 | } | ||
| 949 | } | ||
| 950 | case 2: | ||
| 951 | c.Cache.L2 = size | ||
| 952 | case 3: | ||
| 953 | c.Cache.L3 = size | ||
| 954 | } | ||
| 955 | } | ||
| 956 | case AMD, Hygon: | ||
| 957 | // Untested. | ||
| 958 | if maxExtendedFunction() < 0x80000005 { | ||
| 959 | return | ||
| 960 | } | ||
| 961 | _, _, ecx, edx := cpuid(0x80000005) | ||
| 962 | c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) | ||
| 963 | c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) | ||
| 964 | |||
| 965 | if maxExtendedFunction() < 0x80000006 { | ||
| 966 | return | ||
| 967 | } | ||
| 968 | _, _, ecx, _ = cpuid(0x80000006) | ||
| 969 | c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) | ||
| 970 | |||
| 971 | // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties | ||
| 972 | if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { | ||
| 973 | return | ||
| 974 | } | ||
| 975 | |||
| 976 | // Xen Hypervisor is buggy and returns the same entry no matter ECX value. | ||
| 977 | // Hack: When we encounter the same entry 100 times we break. | ||
| 978 | nSame := 0 | ||
| 979 | var last uint32 | ||
| 980 | for i := uint32(0); i < math.MaxUint32; i++ { | ||
| 981 | eax, ebx, ecx, _ := cpuidex(0x8000001D, i) | ||
| 982 | |||
| 983 | level := (eax >> 5) & 7 | ||
| 984 | cacheNumSets := ecx + 1 | ||
| 985 | cacheLineSize := 1 + (ebx & 2047) | ||
| 986 | cachePhysPartitions := 1 + ((ebx >> 12) & 511) | ||
| 987 | cacheNumWays := 1 + ((ebx >> 22) & 511) | ||
| 988 | |||
| 989 | typ := eax & 15 | ||
| 990 | size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) | ||
| 991 | if typ == 0 { | ||
| 992 | return | ||
| 993 | } | ||
| 994 | |||
| 995 | // Check for the same value repeated. | ||
| 996 | comb := eax ^ ebx ^ ecx | ||
| 997 | if comb == last { | ||
| 998 | nSame++ | ||
| 999 | if nSame == 100 { | ||
| 1000 | return | ||
| 1001 | } | ||
| 1002 | } | ||
| 1003 | last = comb | ||
| 1004 | |||
| 1005 | switch level { | ||
| 1006 | case 1: | ||
| 1007 | switch typ { | ||
| 1008 | case 1: | ||
| 1009 | // Data cache | ||
| 1010 | c.Cache.L1D = size | ||
| 1011 | case 2: | ||
| 1012 | // Inst cache | ||
| 1013 | c.Cache.L1I = size | ||
| 1014 | default: | ||
| 1015 | if c.Cache.L1D < 0 { | ||
| 1016 | c.Cache.L1I = size | ||
| 1017 | } | ||
| 1018 | if c.Cache.L1I < 0 { | ||
| 1019 | c.Cache.L1I = size | ||
| 1020 | } | ||
| 1021 | } | ||
| 1022 | case 2: | ||
| 1023 | c.Cache.L2 = size | ||
| 1024 | case 3: | ||
| 1025 | c.Cache.L3 = size | ||
| 1026 | } | ||
| 1027 | } | ||
| 1028 | } | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | type SGXEPCSection struct { | ||
| 1032 | BaseAddress uint64 | ||
| 1033 | EPCSize uint64 | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | type SGXSupport struct { | ||
| 1037 | Available bool | ||
| 1038 | LaunchControl bool | ||
| 1039 | SGX1Supported bool | ||
| 1040 | SGX2Supported bool | ||
| 1041 | MaxEnclaveSizeNot64 int64 | ||
| 1042 | MaxEnclaveSize64 int64 | ||
| 1043 | EPCSections []SGXEPCSection | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | func hasSGX(available, lc bool) (rval SGXSupport) { | ||
| 1047 | rval.Available = available | ||
| 1048 | |||
| 1049 | if !available { | ||
| 1050 | return | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | rval.LaunchControl = lc | ||
| 1054 | |||
| 1055 | a, _, _, d := cpuidex(0x12, 0) | ||
| 1056 | rval.SGX1Supported = a&0x01 != 0 | ||
| 1057 | rval.SGX2Supported = a&0x02 != 0 | ||
| 1058 | rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 | ||
| 1059 | rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 | ||
| 1060 | rval.EPCSections = make([]SGXEPCSection, 0) | ||
| 1061 | |||
| 1062 | for subleaf := uint32(2); subleaf < 2+8; subleaf++ { | ||
| 1063 | eax, ebx, ecx, edx := cpuidex(0x12, subleaf) | ||
| 1064 | leafType := eax & 0xf | ||
| 1065 | |||
| 1066 | if leafType == 0 { | ||
| 1067 | // Invalid subleaf, stop iterating | ||
| 1068 | break | ||
| 1069 | } else if leafType == 1 { | ||
| 1070 | // EPC Section subleaf | ||
| 1071 | baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) | ||
| 1072 | size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) | ||
| 1073 | |||
| 1074 | section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} | ||
| 1075 | rval.EPCSections = append(rval.EPCSections, section) | ||
| 1076 | } | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | return | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | func support() flagSet { | ||
| 1083 | var fs flagSet | ||
| 1084 | mfi := maxFunctionID() | ||
| 1085 | vend, _ := vendorID() | ||
| 1086 | if mfi < 0x1 { | ||
| 1087 | return fs | ||
| 1088 | } | ||
| 1089 | family, model, _ := familyModel() | ||
| 1090 | |||
| 1091 | _, _, c, d := cpuid(1) | ||
| 1092 | fs.setIf((d&(1<<0)) != 0, X87) | ||
| 1093 | fs.setIf((d&(1<<8)) != 0, CMPXCHG8) | ||
| 1094 | fs.setIf((d&(1<<11)) != 0, SYSEE) | ||
| 1095 | fs.setIf((d&(1<<15)) != 0, CMOV) | ||
| 1096 | fs.setIf((d&(1<<23)) != 0, MMX) | ||
| 1097 | fs.setIf((d&(1<<24)) != 0, FXSR) | ||
| 1098 | fs.setIf((d&(1<<25)) != 0, FXSROPT) | ||
| 1099 | fs.setIf((d&(1<<25)) != 0, SSE) | ||
| 1100 | fs.setIf((d&(1<<26)) != 0, SSE2) | ||
| 1101 | fs.setIf((c&1) != 0, SSE3) | ||
| 1102 | fs.setIf((c&(1<<5)) != 0, VMX) | ||
| 1103 | fs.setIf((c&(1<<9)) != 0, SSSE3) | ||
| 1104 | fs.setIf((c&(1<<19)) != 0, SSE4) | ||
| 1105 | fs.setIf((c&(1<<20)) != 0, SSE42) | ||
| 1106 | fs.setIf((c&(1<<25)) != 0, AESNI) | ||
| 1107 | fs.setIf((c&(1<<1)) != 0, CLMUL) | ||
| 1108 | fs.setIf(c&(1<<22) != 0, MOVBE) | ||
| 1109 | fs.setIf(c&(1<<23) != 0, POPCNT) | ||
| 1110 | fs.setIf(c&(1<<30) != 0, RDRAND) | ||
| 1111 | |||
| 1112 | // This bit has been reserved by Intel & AMD for use by hypervisors, | ||
| 1113 | // and indicates the presence of a hypervisor. | ||
| 1114 | fs.setIf(c&(1<<31) != 0, HYPERVISOR) | ||
| 1115 | fs.setIf(c&(1<<29) != 0, F16C) | ||
| 1116 | fs.setIf(c&(1<<13) != 0, CX16) | ||
| 1117 | |||
| 1118 | if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { | ||
| 1119 | fs.setIf(threadsPerCore() > 1, HTT) | ||
| 1120 | } | ||
| 1121 | if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { | ||
| 1122 | fs.setIf(threadsPerCore() > 1, HTT) | ||
| 1123 | } | ||
| 1124 | fs.setIf(c&1<<26 != 0, XSAVE) | ||
| 1125 | fs.setIf(c&1<<27 != 0, OSXSAVE) | ||
| 1126 | // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits | ||
| 1127 | const avxCheck = 1<<26 | 1<<27 | 1<<28 | ||
| 1128 | if c&avxCheck == avxCheck { | ||
| 1129 | // Check for OS support | ||
| 1130 | eax, _ := xgetbv(0) | ||
| 1131 | if (eax & 0x6) == 0x6 { | ||
| 1132 | fs.set(AVX) | ||
| 1133 | switch vend { | ||
| 1134 | case Intel: | ||
| 1135 | // Older than Haswell. | ||
| 1136 | fs.setIf(family == 6 && model < 60, AVXSLOW) | ||
| 1137 | case AMD: | ||
| 1138 | // Older than Zen 2 | ||
| 1139 | fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) | ||
| 1140 | } | ||
| 1141 | } | ||
| 1142 | } | ||
| 1143 | // FMA3 can be used with SSE registers, so no OS support is strictly needed. | ||
| 1144 | // fma3 and OSXSAVE needed. | ||
| 1145 | const fma3Check = 1<<12 | 1<<27 | ||
| 1146 | fs.setIf(c&fma3Check == fma3Check, FMA3) | ||
| 1147 | |||
| 1148 | // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. | ||
| 1149 | if mfi >= 7 { | ||
| 1150 | _, ebx, ecx, edx := cpuidex(7, 0) | ||
| 1151 | if fs.inSet(AVX) && (ebx&0x00000020) != 0 { | ||
| 1152 | fs.set(AVX2) | ||
| 1153 | } | ||
| 1154 | // CPUID.(EAX=7, ECX=0).EBX | ||
| 1155 | if (ebx & 0x00000008) != 0 { | ||
| 1156 | fs.set(BMI1) | ||
| 1157 | fs.setIf((ebx&0x00000100) != 0, BMI2) | ||
| 1158 | } | ||
| 1159 | fs.setIf(ebx&(1<<2) != 0, SGX) | ||
| 1160 | fs.setIf(ebx&(1<<4) != 0, HLE) | ||
| 1161 | fs.setIf(ebx&(1<<9) != 0, ERMS) | ||
| 1162 | fs.setIf(ebx&(1<<11) != 0, RTM) | ||
| 1163 | fs.setIf(ebx&(1<<14) != 0, MPX) | ||
| 1164 | fs.setIf(ebx&(1<<18) != 0, RDSEED) | ||
| 1165 | fs.setIf(ebx&(1<<19) != 0, ADX) | ||
| 1166 | fs.setIf(ebx&(1<<29) != 0, SHA) | ||
| 1167 | |||
| 1168 | // CPUID.(EAX=7, ECX=0).ECX | ||
| 1169 | fs.setIf(ecx&(1<<5) != 0, WAITPKG) | ||
| 1170 | fs.setIf(ecx&(1<<7) != 0, CETSS) | ||
| 1171 | fs.setIf(ecx&(1<<8) != 0, GFNI) | ||
| 1172 | fs.setIf(ecx&(1<<9) != 0, VAES) | ||
| 1173 | fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) | ||
| 1174 | fs.setIf(ecx&(1<<13) != 0, TME) | ||
| 1175 | fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) | ||
| 1176 | fs.setIf(ecx&(1<<23) != 0, KEYLOCKER) | ||
| 1177 | fs.setIf(ecx&(1<<27) != 0, MOVDIRI) | ||
| 1178 | fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) | ||
| 1179 | fs.setIf(ecx&(1<<29) != 0, ENQCMD) | ||
| 1180 | fs.setIf(ecx&(1<<30) != 0, SGXLC) | ||
| 1181 | |||
| 1182 | // CPUID.(EAX=7, ECX=0).EDX | ||
| 1183 | fs.setIf(edx&(1<<4) != 0, FSRM) | ||
| 1184 | fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) | ||
| 1185 | fs.setIf(edx&(1<<10) != 0, MD_CLEAR) | ||
| 1186 | fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) | ||
| 1187 | fs.setIf(edx&(1<<14) != 0, SERIALIZE) | ||
| 1188 | fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) | ||
| 1189 | fs.setIf(edx&(1<<16) != 0, TSXLDTRK) | ||
| 1190 | fs.setIf(edx&(1<<18) != 0, PCONFIG) | ||
| 1191 | fs.setIf(edx&(1<<20) != 0, CETIBT) | ||
| 1192 | fs.setIf(edx&(1<<26) != 0, IBPB) | ||
| 1193 | fs.setIf(edx&(1<<27) != 0, STIBP) | ||
| 1194 | fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) | ||
| 1195 | fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) | ||
| 1196 | fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) | ||
| 1197 | fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) | ||
| 1198 | |||
| 1199 | // CPUID.(EAX=7, ECX=1).EAX | ||
| 1200 | eax1, _, _, edx1 := cpuidex(7, 1) | ||
| 1201 | fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) | ||
| 1202 | fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) | ||
| 1203 | fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) | ||
| 1204 | fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) | ||
| 1205 | fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) | ||
| 1206 | fs.setIf(eax1&(1<<22) != 0, HRESET) | ||
| 1207 | fs.setIf(eax1&(1<<23) != 0, AVXIFMA) | ||
| 1208 | fs.setIf(eax1&(1<<26) != 0, LAM) | ||
| 1209 | |||
| 1210 | // CPUID.(EAX=7, ECX=1).EDX | ||
| 1211 | fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8) | ||
| 1212 | fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT) | ||
| 1213 | fs.setIf(edx1&(1<<14) != 0, PREFETCHI) | ||
| 1214 | fs.setIf(edx1&(1<<19) != 0, AVX10) | ||
| 1215 | fs.setIf(edx1&(1<<21) != 0, APX_F) | ||
| 1216 | |||
| 1217 | // Only detect AVX-512 features if XGETBV is supported | ||
| 1218 | if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { | ||
| 1219 | // Check for OS support | ||
| 1220 | eax, _ := xgetbv(0) | ||
| 1221 | |||
| 1222 | // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and | ||
| 1223 | // ZMM16-ZMM31 state are enabled by OS) | ||
| 1224 | /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). | ||
| 1225 | hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 | ||
| 1226 | if runtime.GOOS == "darwin" { | ||
| 1227 | hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() | ||
| 1228 | } | ||
| 1229 | if hasAVX512 { | ||
| 1230 | fs.setIf(ebx&(1<<16) != 0, AVX512F) | ||
| 1231 | fs.setIf(ebx&(1<<17) != 0, AVX512DQ) | ||
| 1232 | fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) | ||
| 1233 | fs.setIf(ebx&(1<<26) != 0, AVX512PF) | ||
| 1234 | fs.setIf(ebx&(1<<27) != 0, AVX512ER) | ||
| 1235 | fs.setIf(ebx&(1<<28) != 0, AVX512CD) | ||
| 1236 | fs.setIf(ebx&(1<<30) != 0, AVX512BW) | ||
| 1237 | fs.setIf(ebx&(1<<31) != 0, AVX512VL) | ||
| 1238 | // ecx | ||
| 1239 | fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) | ||
| 1240 | fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) | ||
| 1241 | fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) | ||
| 1242 | fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) | ||
| 1243 | fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) | ||
| 1244 | // edx | ||
| 1245 | fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) | ||
| 1246 | fs.setIf(edx&(1<<22) != 0, AMXBF16) | ||
| 1247 | fs.setIf(edx&(1<<23) != 0, AVX512FP16) | ||
| 1248 | fs.setIf(edx&(1<<24) != 0, AMXTILE) | ||
| 1249 | fs.setIf(edx&(1<<25) != 0, AMXINT8) | ||
| 1250 | // eax1 = CPUID.(EAX=7, ECX=1).EAX | ||
| 1251 | fs.setIf(eax1&(1<<5) != 0, AVX512BF16) | ||
| 1252 | fs.setIf(eax1&(1<<19) != 0, WRMSRNS) | ||
| 1253 | fs.setIf(eax1&(1<<21) != 0, AMXFP16) | ||
| 1254 | fs.setIf(eax1&(1<<27) != 0, MSRLIST) | ||
| 1255 | } | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | // CPUID.(EAX=7, ECX=2) | ||
| 1259 | _, _, _, edx = cpuidex(7, 2) | ||
| 1260 | fs.setIf(edx&(1<<0) != 0, PSFD) | ||
| 1261 | fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL) | ||
| 1262 | fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL) | ||
| 1263 | fs.setIf(edx&(1<<4) != 0, BHI_CTRL) | ||
| 1264 | fs.setIf(edx&(1<<5) != 0, MCDT_NO) | ||
| 1265 | |||
| 1266 | // Add keylocker features. | ||
| 1267 | if fs.inSet(KEYLOCKER) && mfi >= 0x19 { | ||
| 1268 | _, ebx, _, _ := cpuidex(0x19, 0) | ||
| 1269 | fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4) | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | // Add AVX10 features. | ||
| 1273 | if fs.inSet(AVX10) && mfi >= 0x24 { | ||
| 1274 | _, ebx, _, _ := cpuidex(0x24, 0) | ||
| 1275 | fs.setIf(ebx&(1<<16) != 0, AVX10_128) | ||
| 1276 | fs.setIf(ebx&(1<<17) != 0, AVX10_256) | ||
| 1277 | fs.setIf(ebx&(1<<18) != 0, AVX10_512) | ||
| 1278 | } | ||
| 1279 | } | ||
| 1280 | |||
| 1281 | // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) | ||
| 1282 | // EAX | ||
| 1283 | // Bit 00: XSAVEOPT is available. | ||
| 1284 | // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. | ||
| 1285 | // Bit 02: Supports XGETBV with ECX = 1 if set. | ||
| 1286 | // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. | ||
| 1287 | // Bits 31 - 04: Reserved. | ||
| 1288 | // EBX | ||
| 1289 | // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. | ||
| 1290 | // ECX | ||
| 1291 | // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. | ||
| 1292 | // EDX? | ||
| 1293 | // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. | ||
| 1294 | if mfi >= 0xd { | ||
| 1295 | if fs.inSet(XSAVE) { | ||
| 1296 | eax, _, _, _ := cpuidex(0xd, 1) | ||
| 1297 | fs.setIf(eax&(1<<0) != 0, XSAVEOPT) | ||
| 1298 | fs.setIf(eax&(1<<1) != 0, XSAVEC) | ||
| 1299 | fs.setIf(eax&(1<<2) != 0, XGETBV1) | ||
| 1300 | fs.setIf(eax&(1<<3) != 0, XSAVES) | ||
| 1301 | } | ||
| 1302 | } | ||
| 1303 | if maxExtendedFunction() >= 0x80000001 { | ||
| 1304 | _, _, c, d := cpuid(0x80000001) | ||
| 1305 | if (c & (1 << 5)) != 0 { | ||
| 1306 | fs.set(LZCNT) | ||
| 1307 | fs.set(POPCNT) | ||
| 1308 | } | ||
| 1309 | // ECX | ||
| 1310 | fs.setIf((c&(1<<0)) != 0, LAHF) | ||
| 1311 | fs.setIf((c&(1<<2)) != 0, SVM) | ||
| 1312 | fs.setIf((c&(1<<6)) != 0, SSE4A) | ||
| 1313 | fs.setIf((c&(1<<10)) != 0, IBS) | ||
| 1314 | fs.setIf((c&(1<<22)) != 0, TOPEXT) | ||
| 1315 | |||
| 1316 | // EDX | ||
| 1317 | fs.setIf(d&(1<<11) != 0, SYSCALL) | ||
| 1318 | fs.setIf(d&(1<<20) != 0, NX) | ||
| 1319 | fs.setIf(d&(1<<22) != 0, MMXEXT) | ||
| 1320 | fs.setIf(d&(1<<23) != 0, MMX) | ||
| 1321 | fs.setIf(d&(1<<24) != 0, FXSR) | ||
| 1322 | fs.setIf(d&(1<<25) != 0, FXSROPT) | ||
| 1323 | fs.setIf(d&(1<<27) != 0, RDTSCP) | ||
| 1324 | fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) | ||
| 1325 | fs.setIf(d&(1<<31) != 0, AMD3DNOW) | ||
| 1326 | |||
| 1327 | /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be | ||
| 1328 | * used unless the OS has AVX support. */ | ||
| 1329 | if fs.inSet(AVX) { | ||
| 1330 | fs.setIf((c&(1<<11)) != 0, XOP) | ||
| 1331 | fs.setIf((c&(1<<16)) != 0, FMA4) | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | } | ||
| 1335 | if maxExtendedFunction() >= 0x80000007 { | ||
| 1336 | _, b, _, d := cpuid(0x80000007) | ||
| 1337 | fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) | ||
| 1338 | fs.setIf((b&(1<<1)) != 0, SUCCOR) | ||
| 1339 | fs.setIf((b&(1<<2)) != 0, HWA) | ||
| 1340 | fs.setIf((d&(1<<9)) != 0, CPBOOST) | ||
| 1341 | } | ||
| 1342 | |||
| 1343 | if maxExtendedFunction() >= 0x80000008 { | ||
| 1344 | _, b, _, _ := cpuid(0x80000008) | ||
| 1345 | fs.setIf(b&(1<<28) != 0, PSFD) | ||
| 1346 | fs.setIf(b&(1<<27) != 0, CPPC) | ||
| 1347 | fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) | ||
| 1348 | fs.setIf(b&(1<<23) != 0, PPIN) | ||
| 1349 | fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) | ||
| 1350 | fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) | ||
| 1351 | fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) | ||
| 1352 | fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) | ||
| 1353 | fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) | ||
| 1354 | fs.setIf(b&(1<<15) != 0, STIBP) | ||
| 1355 | fs.setIf(b&(1<<14) != 0, IBRS) | ||
| 1356 | fs.setIf((b&(1<<13)) != 0, INT_WBINVD) | ||
| 1357 | fs.setIf(b&(1<<12) != 0, IBPB) | ||
| 1358 | fs.setIf((b&(1<<9)) != 0, WBNOINVD) | ||
| 1359 | fs.setIf((b&(1<<8)) != 0, MCOMMIT) | ||
| 1360 | fs.setIf((b&(1<<4)) != 0, RDPRU) | ||
| 1361 | fs.setIf((b&(1<<3)) != 0, INVLPGB) | ||
| 1362 | fs.setIf((b&(1<<1)) != 0, MSRIRC) | ||
| 1363 | fs.setIf((b&(1<<0)) != 0, CLZERO) | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { | ||
| 1367 | _, _, _, edx := cpuid(0x8000000A) | ||
| 1368 | fs.setIf((edx>>0)&1 == 1, SVMNP) | ||
| 1369 | fs.setIf((edx>>1)&1 == 1, LBRVIRT) | ||
| 1370 | fs.setIf((edx>>2)&1 == 1, SVML) | ||
| 1371 | fs.setIf((edx>>3)&1 == 1, NRIPS) | ||
| 1372 | fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) | ||
| 1373 | fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) | ||
| 1374 | fs.setIf((edx>>6)&1 == 1, SVMFBASID) | ||
| 1375 | fs.setIf((edx>>7)&1 == 1, SVMDA) | ||
| 1376 | fs.setIf((edx>>10)&1 == 1, SVMPF) | ||
| 1377 | fs.setIf((edx>>12)&1 == 1, SVMPFT) | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | if maxExtendedFunction() >= 0x8000001a { | ||
| 1381 | eax, _, _, _ := cpuid(0x8000001a) | ||
| 1382 | fs.setIf((eax>>0)&1 == 1, FP128) | ||
| 1383 | fs.setIf((eax>>1)&1 == 1, MOVU) | ||
| 1384 | fs.setIf((eax>>2)&1 == 1, FP256) | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { | ||
| 1388 | eax, _, _, _ := cpuid(0x8000001b) | ||
| 1389 | fs.setIf((eax>>0)&1 == 1, IBSFFV) | ||
| 1390 | fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) | ||
| 1391 | fs.setIf((eax>>2)&1 == 1, IBSOPSAM) | ||
| 1392 | fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) | ||
| 1393 | fs.setIf((eax>>4)&1 == 1, IBSOPCNT) | ||
| 1394 | fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) | ||
| 1395 | fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) | ||
| 1396 | fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) | ||
| 1397 | fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) | ||
| 1398 | fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) | ||
| 1399 | fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. | ||
| 1400 | fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | if maxExtendedFunction() >= 0x8000001f && vend == AMD { | ||
| 1404 | a, _, _, _ := cpuid(0x8000001f) | ||
| 1405 | fs.setIf((a>>0)&1 == 1, SME) | ||
| 1406 | fs.setIf((a>>1)&1 == 1, SEV) | ||
| 1407 | fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) | ||
| 1408 | fs.setIf((a>>3)&1 == 1, SEV_ES) | ||
| 1409 | fs.setIf((a>>4)&1 == 1, SEV_SNP) | ||
| 1410 | fs.setIf((a>>5)&1 == 1, VMPL) | ||
| 1411 | fs.setIf((a>>10)&1 == 1, SME_COHERENT) | ||
| 1412 | fs.setIf((a>>11)&1 == 1, SEV_64BIT) | ||
| 1413 | fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) | ||
| 1414 | fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) | ||
| 1415 | fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) | ||
| 1416 | fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) | ||
| 1417 | fs.setIf((a>>16)&1 == 1, VTE) | ||
| 1418 | fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) | ||
| 1419 | } | ||
| 1420 | |||
| 1421 | if mfi >= 0x20 { | ||
| 1422 | // Microsoft has decided to purposefully hide the information | ||
| 1423 | // of the guest TEE when VMs are being created using Hyper-V. | ||
| 1424 | // | ||
| 1425 | // This leads us to check for the Hyper-V cpuid features | ||
| 1426 | // (0x4000000C), and then for the `ebx` value set. | ||
| 1427 | // | ||
| 1428 | // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part | ||
| 1429 | // we're mostly interested about,according to: | ||
| 1430 | // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174 | ||
| 1431 | _, ebx, _, _ := cpuid(0x4000000C) | ||
| 1432 | fs.setIf(ebx == 0xbe3, TDX_GUEST) | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | if mfi >= 0x21 { | ||
| 1436 | // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21). | ||
| 1437 | _, ebx, ecx, edx := cpuid(0x21) | ||
| 1438 | identity := string(valAsString(ebx, edx, ecx)) | ||
| 1439 | fs.setIf(identity == "IntelTDX ", TDX_GUEST) | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | return fs | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | func (c *CPUInfo) supportAVX10() uint8 { | ||
| 1446 | if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) { | ||
| 1447 | _, ebx, _, _ := cpuidex(0x24, 0) | ||
| 1448 | return uint8(ebx) | ||
| 1449 | } | ||
| 1450 | return 0 | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | func valAsString(values ...uint32) []byte { | ||
| 1454 | r := make([]byte, 4*len(values)) | ||
| 1455 | for i, v := range values { | ||
| 1456 | dst := r[i*4:] | ||
| 1457 | dst[0] = byte(v & 0xff) | ||
| 1458 | dst[1] = byte((v >> 8) & 0xff) | ||
| 1459 | dst[2] = byte((v >> 16) & 0xff) | ||
| 1460 | dst[3] = byte((v >> 24) & 0xff) | ||
| 1461 | switch { | ||
| 1462 | case dst[0] == 0: | ||
| 1463 | return r[:i*4] | ||
| 1464 | case dst[1] == 0: | ||
| 1465 | return r[:i*4+1] | ||
| 1466 | case dst[2] == 0: | ||
| 1467 | return r[:i*4+2] | ||
| 1468 | case dst[3] == 0: | ||
| 1469 | return r[:i*4+3] | ||
| 1470 | } | ||
| 1471 | } | ||
| 1472 | return r | ||
| 1473 | } | ||