From 404aeae4545d2426c089a5f8d5e82dae56f5212b Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Fri, 29 Dec 2023 21:31:53 +0100 Subject: Make Nix builds work --- vendor/github.com/klauspost/cpuid/v2/cpuid.go | 1473 +++++++++++++++++++++++++ 1 file changed, 1473 insertions(+) create mode 100644 vendor/github.com/klauspost/cpuid/v2/cpuid.go (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go') diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go new file mode 100644 index 0000000..15b7603 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -0,0 +1,1473 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// Package cpuid provides information about the CPU running the current program. +// +// CPU features are detected on startup, and kept for fast access through the life of the application. +// Currently x86 / x64 (AMD64) as well as arm64 is supported. +// +// You can access the CPU information by accessing the shared CPU variable of the cpuid library. +// +// Package home: https://github.com/klauspost/cpuid +package cpuid + +import ( + "flag" + "fmt" + "math" + "math/bits" + "os" + "runtime" + "strings" +) + +// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf +// and Processor Programming Reference (PPR) + +// Vendor is a representation of a CPU vendor. +type Vendor int + +const ( + VendorUnknown Vendor = iota + Intel + AMD + VIA + Transmeta + NSC + KVM // Kernel-based Virtual Machine + MSVM // Microsoft Hyper-V or Windows Virtual PC + VMware + XenHVM + Bhyve + Hygon + SiS + RDC + + Ampere + ARM + Broadcom + Cavium + DEC + Fujitsu + Infineon + Motorola + NVIDIA + AMCC + Qualcomm + Marvell + + lastVendor +) + +//go:generate stringer -type=FeatureID,Vendor + +// FeatureID is the ID of a specific cpu feature. +type FeatureID int + +const ( + // Keep index -1 as unknown + UNKNOWN = -1 + + // Add features + ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + AESNI // Advanced Encryption Standard New Instructions + AMD3DNOW // AMD 3DNOW + AMD3DNOWEXT // AMD 3DNowExt + AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXFP16 // Tile computational operations on FP16 numbers + AMXINT8 // Tile computational operations on 8-bit integers + AMXTILE // Tile architecture + APX_F // Intel APX + AVX // AVX functions + AVX10 // If set the Intel AVX10 Converged Vector ISA is supported + AVX10_128 // If set indicates that AVX10 128-bit vector support is present + AVX10_256 // If set indicates that AVX10 256-bit vector support is present + AVX10_512 // If set indicates that AVX10 512-bit vector support is present + AVX2 // AVX2 functions + AVX512BF16 // AVX-512 BFLOAT16 Instructions + AVX512BITALG // AVX-512 Bit Algorithms + AVX512BW // AVX-512 Byte and Word Instructions + AVX512CD // AVX-512 Conflict Detection Instructions + AVX512DQ // AVX-512 Doubleword and Quadword Instructions + AVX512ER // AVX-512 Exponential and Reciprocal Instructions + AVX512F // AVX-512 Foundation + AVX512FP16 // AVX-512 FP16 Instructions + AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF // AVX-512 Prefetch Instructions + AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VL // AVX-512 Vector Length Extensions + AVX512VNNI // AVX-512 Vector Neural Network Instructions + AVX512VP2INTERSECT // AVX-512 Intersect for D/Q + AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + AVXIFMA // AVX-IFMA instructions + AVXNECONVERT // AVX-NE-CONVERT instructions + AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one + AVXVNNI // AVX (VEX encoded) VNNI neural network instructions + AVXVNNIINT8 // AVX-VNNI-INT8 instructions + BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 + BMI1 // Bit Manipulation Instruction Set 1 + BMI2 // Bit Manipulation Instruction Set 2 + CETIBT // Intel CET Indirect Branch Tracking + CETSS // Intel CET Shadow Stack + CLDEMOTE // Cache Line Demote + CLMUL // Carry-less Multiplication + CLZERO // CLZERO instruction supported + CMOV // i686 CMOV + CMPCCXADD // CMPCCXADD instructions + CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB + CMPXCHG8 // CMPXCHG8 instruction + CPBOOST // Core Performance Boost + CPPC // AMD: Collaborative Processor Performance Control + CX16 // CMPXCHG16B Instruction + EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ + ENQCMD // Enqueue Command + ERMS // Enhanced REP MOVSB/STOSB + F16C // Half-precision floating-point conversion + FLUSH_L1D // Flush L1D cache + FMA3 // Intel FMA 3. Does not imply AVX. + FMA4 // Bulldozer FMA4 functions + FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide + FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide + FSRM // Fast Short Rep Mov + FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 + FXSROPT // FXSAVE/FXRSTOR optimizations + GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. + HLE // Hardware Lock Elision + HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR + HTT // Hyperthreading (enabled) + HWA // Hardware assert supported. Indicates support for MSRC001_10 + HYBRID_CPU // This part has CPUs of more than one type. + HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) + IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR + IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBRS // AMD: Indirect Branch Restricted Speculation + IBRS_PREFERRED // AMD: IBRS is preferred over software solution + IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection + IBS // Instruction Based Sampling (AMD) + IBSBRNTRGT // Instruction Based Sampling Feature (AMD) + IBSFETCHSAM // Instruction Based Sampling Feature (AMD) + IBSFFV // Instruction Based Sampling Feature (AMD) + IBSOPCNT // Instruction Based Sampling Feature (AMD) + IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) + IBSOPSAM // Instruction Based Sampling Feature (AMD) + IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) + IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported + IBS_OPDATA4 // AMD: IBS op data 4 MSR supported + IBS_OPFUSE // AMD: Indicates support for IbsOpFuse + IBS_PREVENTHOST // Disallowing IBS use by the host supported + IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 + IDPRED_CTRL // IPRED_DIS + INT_WBINVD // WBINVD/WBNOINVD are interruptible. + INVLPGB // NVLPGB and TLBSYNC instruction supported + KEYLOCKER // Key locker + KEYLOCKERW // Key locker wide + LAHF // LAHF/SAHF in long mode + LAM // If set, CPU supports Linear Address Masking + LBRVIRT // LBR virtualization + LZCNT // LZCNT instruction + MCAOVERFLOW // MCA overflow recovery support. + MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. + MCOMMIT // MCOMMIT instruction supported + MD_CLEAR // VERW clears CPU buffers + MMX // standard MMX + MMXEXT // SSE integer functions or AMD MMX ext + MOVBE // MOVBE instruction (big-endian) + MOVDIR64B // Move 64 Bytes as Direct Store + MOVDIRI // Move Doubleword as Direct Store + MOVSB_ZL // Fast Zero-Length MOVSB + MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD + MPX // Intel MPX (Memory Protection Extensions) + MSRIRC // Instruction Retired Counter MSR available + MSRLIST // Read/Write List of Model Specific Registers + MSR_PAGEFLUSH // Page Flush MSR available + NRIPS // Indicates support for NRIP save on VMEXIT + NX // NX (No-Execute) bit + OSXSAVE // XSAVE enabled by OS + PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption + POPCNT // POPCNT instruction + PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled + PREFETCHI // PREFETCHIT0/1 instructions + PSFD // Predictive Store Forward Disable + RDPRU // RDPRU instruction supported + RDRAND // RDRAND instruction is available + RDSEED // RDSEED instruction is available + RDTSCP // RDTSCP Instruction + RRSBA_CTRL // Restricted RSB Alternate + RTM // Restricted Transactional Memory + RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. + SERIALIZE // Serialize Instruction Execution + SEV // AMD Secure Encrypted Virtualization supported + SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host + SEV_ALTERNATIVE // AMD SEV Alternate Injection supported + SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests + SEV_ES // AMD SEV Encrypted State supported + SEV_RESTRICTED // AMD SEV Restricted Injection supported + SEV_SNP // AMD SEV Secure Nested Paging supported + SGX // Software Guard Extensions + SGXLC // Software Guard Extensions Launch Control + SHA // Intel SHA Extensions + SME // AMD Secure Memory Encryption supported + SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced + SPEC_CTRL_SSBD // Speculative Store Bypass Disable + SRBDS_CTRL // SRBDS mitigation MSR available + SSE // SSE functions + SSE2 // P4 SSE functions + SSE3 // Prescott SSE3 functions + SSE4 // Penryn SSE4.1 functions + SSE42 // Nehalem SSE4.2 functions + SSE4A // AMD Barcelona microarchitecture SSE4a instructions + SSSE3 // Conroe SSSE3 functions + STIBP // Single Thread Indirect Branch Predictors + STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On + STOSB_SHORT // Fast short STOSB + SUCCOR // Software uncorrectable error containment and recovery capability. + SVM // AMD Secure Virtual Machine + SVMDA // Indicates support for the SVM decode assists. + SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control + SVML // AMD SVM lock. Indicates support for SVM-Lock. + SVMNP // AMD SVM nested paging + SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter + SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold + SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. + SYSEE // SYSENTER and SYSEXIT instructions + TBM // AMD Trailing Bit Manipulation + TDX_GUEST // Intel Trust Domain Extensions Guest + TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations + TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. + TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 + TSXLDTRK // Intel TSX Suspend Load Address Tracking + VAES // Vector AES. AVX(512) versions requires additional checks. + VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. + VMPL // AMD VM Permission Levels supported + VMSA_REGPROT // AMD VMSA Register Protection supported + VMX // Virtual Machine Extensions + VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. + VTE // AMD Virtual Transparent Encryption supported + WAITPKG // TPAUSE, UMONITOR, UMWAIT + WBNOINVD // Write Back and Do Not Invalidate Cache + WRMSRNS // Non-Serializing Write to Model Specific Register + X87 // FPU + XGETBV1 // Supports XGETBV with ECX = 1 + XOP // Bulldozer XOP functions + XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV + XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. + XSAVEOPT // XSAVEOPT available + XSAVES // Supports XSAVES/XRSTORS and IA32_XSS + + // ARM features: + AESARM // AES instructions + ARMCPUID // Some CPU ID registers readable at user-level + ASIMD // Advanced SIMD + ASIMDDP // SIMD Dot Product + ASIMDHP // Advanced SIMD half-precision floating point + ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) + ATOMICS // Large System Extensions (LSE) + CRC32 // CRC32/CRC32C instructions + DCPOP // Data cache clean to Point of Persistence (DC CVAP) + EVTSTRM // Generic timer + FCMA // Floatin point complex number addition and multiplication + FP // Single-precision and double-precision floating point + FPHP // Half-precision floating point + GPA // Generic Pointer Authentication + JSCVT // Javascript-style double->int convert (FJCVTZS) + LRCPC // Weaker release consistency (LDAPR, etc) + PMULL // Polynomial Multiply instructions (PMULL/PMULL2) + SHA1 // SHA-1 instructions (SHA1C, etc) + SHA2 // SHA-2 instructions (SHA256H, etc) + SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) + SHA512 // SHA512 instructions + SM3 // SM3 instructions + SM4 // SM4 instructions + SVE // Scalable Vector Extension + // Keep it last. It automatically defines the size of []flagSet + lastID + + firstID FeatureID = UNKNOWN + 1 +) + +// CPUInfo contains information about the detected system CPU. +type CPUInfo struct { + BrandName string // Brand name reported by the CPU + VendorID Vendor // Comparable CPU vendor ID + VendorString string // Raw vendor string. + featureSet flagSet // Features of the CPU + PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. + ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. + LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. + Family int // CPU family number + Model int // CPU model number + Stepping int // CPU stepping info + CacheLine int // Cache line size in bytes. Will be 0 if undetectable. + Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. + BoostFreq int64 // Max clock speed, if known, 0 otherwise + Cache struct { + L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected + L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected + L2 int // L2 Cache (per core or shared). Will be -1 if undetected + L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected + } + SGX SGXSupport + AVX10Level uint8 + maxFunc uint32 + maxExFunc uint32 +} + +var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) +var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) +var xgetbv func(index uint32) (eax, edx uint32) +var rdtscpAsm func() (eax, ebx, ecx, edx uint32) +var darwinHasAVX512 = func() bool { return false } + +// CPU contains information about the CPU as detected on startup, +// or when Detect last was called. +// +// Use this as the primary entry point to you data. +var CPU CPUInfo + +func init() { + initCPU() + Detect() +} + +// Detect will re-detect current CPU info. +// This will replace the content of the exported CPU variable. +// +// Unless you expect the CPU to change while you are running your program +// you should not need to call this function. +// If you call this, you must ensure that no other goroutine is accessing the +// exported CPU variable. +func Detect() { + // Set defaults + CPU.ThreadsPerCore = 1 + CPU.Cache.L1I = -1 + CPU.Cache.L1D = -1 + CPU.Cache.L2 = -1 + CPU.Cache.L3 = -1 + safe := true + if detectArmFlag != nil { + safe = !*detectArmFlag + } + addInfo(&CPU, safe) + if displayFeats != nil && *displayFeats { + fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) + // Exit with non-zero so tests will print value. + os.Exit(1) + } + if disableFlag != nil { + s := strings.Split(*disableFlag, ",") + for _, feat := range s { + feat := ParseFeature(strings.TrimSpace(feat)) + if feat != UNKNOWN { + CPU.featureSet.unset(feat) + } + } + } +} + +// DetectARM will detect ARM64 features. +// This is NOT done automatically since it can potentially crash +// if the OS does not handle the command. +// If in the future this can be done safely this function may not +// do anything. +func DetectARM() { + addInfo(&CPU, false) +} + +var detectArmFlag *bool +var displayFeats *bool +var disableFlag *string + +// Flags will enable flags. +// This must be called *before* flag.Parse AND +// Detect must be called after the flags have been parsed. +// Note that this means that any detection used in init() functions +// will not contain these flags. +func Flags() { + disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") + displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") + detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") +} + +// Supports returns whether the CPU supports all of the requested features. +func (c CPUInfo) Supports(ids ...FeatureID) bool { + for _, id := range ids { + if !c.featureSet.inSet(id) { + return false + } + } + return true +} + +// Has allows for checking a single feature. +// Should be inlined by the compiler. +func (c *CPUInfo) Has(id FeatureID) bool { + return c.featureSet.inSet(id) +} + +// AnyOf returns whether the CPU supports one or more of the requested features. +func (c CPUInfo) AnyOf(ids ...FeatureID) bool { + for _, id := range ids { + if c.featureSet.inSet(id) { + return true + } + } + return false +} + +// Features contains several features combined for a fast check using +// CpuInfo.HasAll +type Features *flagSet + +// CombineFeatures allows to combine several features for a close to constant time lookup. +func CombineFeatures(ids ...FeatureID) Features { + var v flagSet + for _, id := range ids { + v.set(id) + } + return &v +} + +func (c *CPUInfo) HasAll(f Features) bool { + return c.featureSet.hasSetP(f) +} + +// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels +var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) +var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) +var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) +var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) +var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) + +// X64Level returns the microarchitecture level detected on the CPU. +// If features are lacking or non x64 mode, 0 is returned. +// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels +func (c CPUInfo) X64Level() int { + if !c.featureSet.hasOneOf(oneOfLevel) { + return 0 + } + if c.featureSet.hasSetP(level4Features) { + return 4 + } + if c.featureSet.hasSetP(level3Features) { + return 3 + } + if c.featureSet.hasSetP(level2Features) { + return 2 + } + if c.featureSet.hasSetP(level1Features) { + return 1 + } + return 0 +} + +// Disable will disable one or several features. +func (c *CPUInfo) Disable(ids ...FeatureID) bool { + for _, id := range ids { + c.featureSet.unset(id) + } + return true +} + +// Enable will disable one or several features even if they were undetected. +// This is of course not recommended for obvious reasons. +func (c *CPUInfo) Enable(ids ...FeatureID) bool { + for _, id := range ids { + c.featureSet.set(id) + } + return true +} + +// IsVendor returns true if vendor is recognized as Intel +func (c CPUInfo) IsVendor(v Vendor) bool { + return c.VendorID == v +} + +// FeatureSet returns all available features as strings. +func (c CPUInfo) FeatureSet() []string { + s := make([]string, 0, c.featureSet.nEnabled()) + s = append(s, c.featureSet.Strings()...) + return s +} + +// RTCounter returns the 64-bit time-stamp counter +// Uses the RDTSCP instruction. The value 0 is returned +// if the CPU does not support the instruction. +func (c CPUInfo) RTCounter() uint64 { + if !c.Supports(RDTSCP) { + return 0 + } + a, _, _, d := rdtscpAsm() + return uint64(a) | (uint64(d) << 32) +} + +// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. +// This variable is OS dependent, but on Linux contains information +// about the current cpu/core the code is running on. +// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. +func (c CPUInfo) Ia32TscAux() uint32 { + if !c.Supports(RDTSCP) { + return 0 + } + _, _, ecx, _ := rdtscpAsm() + return ecx +} + +// LogicalCPU will return the Logical CPU the code is currently executing on. +// This is likely to change when the OS re-schedules the running thread +// to another CPU. +// If the current core cannot be detected, -1 will be returned. +func (c CPUInfo) LogicalCPU() int { + if c.maxFunc < 1 { + return -1 + } + _, ebx, _, _ := cpuid(1) + return int(ebx >> 24) +} + +// frequencies tries to compute the clock speed of the CPU. If leaf 15 is +// supported, use it, otherwise parse the brand string. Yes, really. +func (c *CPUInfo) frequencies() { + c.Hz, c.BoostFreq = 0, 0 + mfi := maxFunctionID() + if mfi >= 0x15 { + eax, ebx, ecx, _ := cpuid(0x15) + if eax != 0 && ebx != 0 && ecx != 0 { + c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) + } + } + if mfi >= 0x16 { + a, b, _, _ := cpuid(0x16) + // Base... + if a&0xffff > 0 { + c.Hz = int64(a&0xffff) * 1_000_000 + } + // Boost... + if b&0xffff > 0 { + c.BoostFreq = int64(b&0xffff) * 1_000_000 + } + } + if c.Hz > 0 { + return + } + + // computeHz determines the official rated speed of a CPU from its brand + // string. This insanity is *actually the official documented way to do + // this according to Intel*, prior to leaf 0x15 existing. The official + // documentation only shows this working for exactly `x.xx` or `xxxx` + // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other + // sizes. + model := c.BrandName + hz := strings.LastIndex(model, "Hz") + if hz < 3 { + return + } + var multiplier int64 + switch model[hz-1] { + case 'M': + multiplier = 1000 * 1000 + case 'G': + multiplier = 1000 * 1000 * 1000 + case 'T': + multiplier = 1000 * 1000 * 1000 * 1000 + } + if multiplier == 0 { + return + } + freq := int64(0) + divisor := int64(0) + decimalShift := int64(1) + var i int + for i = hz - 2; i >= 0 && model[i] != ' '; i-- { + if model[i] >= '0' && model[i] <= '9' { + freq += int64(model[i]-'0') * decimalShift + decimalShift *= 10 + } else if model[i] == '.' { + if divisor != 0 { + return + } + divisor = decimalShift + } else { + return + } + } + // we didn't find a space + if i < 0 { + return + } + if divisor != 0 { + c.Hz = (freq * multiplier) / divisor + return + } + c.Hz = freq * multiplier +} + +// VM Will return true if the cpu id indicates we are in +// a virtual machine. +func (c CPUInfo) VM() bool { + return CPU.featureSet.inSet(HYPERVISOR) +} + +// flags contains detected cpu features and characteristics +type flags uint64 + +// log2(bits_in_uint64) +const flagBitsLog2 = 6 +const flagBits = 1 << flagBitsLog2 +const flagMask = flagBits - 1 + +// flagSet contains detected cpu features and characteristics in an array of flags +type flagSet [(lastID + flagMask) / flagBits]flags + +func (s *flagSet) inSet(feat FeatureID) bool { + return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 +} + +func (s *flagSet) set(feat FeatureID) { + s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) +} + +// setIf will set a feature if boolean is true. +func (s *flagSet) setIf(cond bool, features ...FeatureID) { + if cond { + for _, offset := range features { + s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) + } + } +} + +func (s *flagSet) unset(offset FeatureID) { + bit := flags(1 << (offset & flagMask)) + s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit +} + +// or with another flagset. +func (s *flagSet) or(other flagSet) { + for i, v := range other[:] { + s[i] |= v + } +} + +// hasSet returns whether all features are present. +func (s *flagSet) hasSet(other flagSet) bool { + for i, v := range other[:] { + if s[i]&v != v { + return false + } + } + return true +} + +// hasSet returns whether all features are present. +func (s *flagSet) hasSetP(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != v { + return false + } + } + return true +} + +// hasOneOf returns whether one or more features are present. +func (s *flagSet) hasOneOf(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != 0 { + return true + } + } + return false +} + +// nEnabled will return the number of enabled flags. +func (s *flagSet) nEnabled() (n int) { + for _, v := range s[:] { + n += bits.OnesCount64(uint64(v)) + } + return n +} + +func flagSetWith(feat ...FeatureID) flagSet { + var res flagSet + for _, f := range feat { + res.set(f) + } + return res +} + +// ParseFeature will parse the string and return the ID of the matching feature. +// Will return UNKNOWN if not found. +func ParseFeature(s string) FeatureID { + s = strings.ToUpper(s) + for i := firstID; i < lastID; i++ { + if i.String() == s { + return i + } + } + return UNKNOWN +} + +// Strings returns an array of the detected features for FlagsSet. +func (s flagSet) Strings() []string { + if len(s) == 0 { + return []string{""} + } + r := make([]string, 0) + for i := firstID; i < lastID; i++ { + if s.inSet(i) { + r = append(r, i.String()) + } + } + return r +} + +func maxExtendedFunction() uint32 { + eax, _, _, _ := cpuid(0x80000000) + return eax +} + +func maxFunctionID() uint32 { + a, _, _, _ := cpuid(0) + return a +} + +func brandName() string { + if maxExtendedFunction() >= 0x80000004 { + v := make([]uint32, 0, 48) + for i := uint32(0); i < 3; i++ { + a, b, c, d := cpuid(0x80000002 + i) + v = append(v, a, b, c, d) + } + return strings.Trim(string(valAsString(v...)), " ") + } + return "unknown" +} + +func threadsPerCore() int { + mfi := maxFunctionID() + vend, _ := vendorID() + + if mfi < 0x4 || (vend != Intel && vend != AMD) { + return 1 + } + + if mfi < 0xb { + if vend != Intel { + return 1 + } + _, b, _, d := cpuid(1) + if (d & (1 << 28)) != 0 { + // v will contain logical core count + v := (b >> 16) & 255 + if v > 1 { + a4, _, _, _ := cpuid(4) + // physical cores + v2 := (a4 >> 26) + 1 + if v2 > 0 { + return int(v) / int(v2) + } + } + } + return 1 + } + _, b, _, _ := cpuidex(0xb, 0) + if b&0xffff == 0 { + if vend == AMD { + // Workaround for AMD returning 0, assume 2 if >= Zen 2 + // It will be more correct than not. + fam, _, _ := familyModel() + _, _, _, d := cpuid(1) + if (d&(1<<28)) != 0 && fam >= 23 { + return 2 + } + } + return 1 + } + return int(b & 0xffff) +} + +func logicalCores() int { + mfi := maxFunctionID() + v, _ := vendorID() + switch v { + case Intel: + // Use this on old Intel processors + if mfi < 0xb { + if mfi < 1 { + return 0 + } + // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) + // that can be assigned to logical processors in a physical package. + // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. + _, ebx, _, _ := cpuid(1) + logical := (ebx >> 16) & 0xff + return int(logical) + } + _, b, _, _ := cpuidex(0xb, 1) + return int(b & 0xffff) + case AMD, Hygon: + _, b, _, _ := cpuid(1) + return int((b >> 16) & 0xff) + default: + return 0 + } +} + +func familyModel() (family, model, stepping int) { + if maxFunctionID() < 0x1 { + return 0, 0, 0 + } + eax, _, _, _ := cpuid(1) + // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. + family = int((eax >> 8) & 0xf) + extFam := family == 0x6 // Intel is 0x6, needs extended model. + if family == 0xf { + // Add ExtFamily + family += int((eax >> 20) & 0xff) + extFam = true + } + // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. + model = int((eax >> 4) & 0xf) + if extFam { + // Add ExtModel + model += int((eax >> 12) & 0xf0) + } + stepping = int(eax & 0xf) + return family, model, stepping +} + +func physicalCores() int { + v, _ := vendorID() + switch v { + case Intel: + return logicalCores() / threadsPerCore() + case AMD, Hygon: + lc := logicalCores() + tpc := threadsPerCore() + if lc > 0 && tpc > 0 { + return lc / tpc + } + + // The following is inaccurate on AMD EPYC 7742 64-Core Processor + if maxExtendedFunction() >= 0x80000008 { + _, _, c, _ := cpuid(0x80000008) + if c&0xff > 0 { + return int(c&0xff) + 1 + } + } + } + return 0 +} + +// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID +var vendorMapping = map[string]Vendor{ + "AMDisbetter!": AMD, + "AuthenticAMD": AMD, + "CentaurHauls": VIA, + "GenuineIntel": Intel, + "TransmetaCPU": Transmeta, + "GenuineTMx86": Transmeta, + "Geode by NSC": NSC, + "VIA VIA VIA ": VIA, + "KVMKVMKVMKVM": KVM, + "Microsoft Hv": MSVM, + "VMwareVMware": VMware, + "XenVMMXenVMM": XenHVM, + "bhyve bhyve ": Bhyve, + "HygonGenuine": Hygon, + "Vortex86 SoC": SiS, + "SiS SiS SiS ": SiS, + "RiseRiseRise": SiS, + "Genuine RDC": RDC, +} + +func vendorID() (Vendor, string) { + _, b, c, d := cpuid(0) + v := string(valAsString(b, d, c)) + vend, ok := vendorMapping[v] + if !ok { + return VendorUnknown, v + } + return vend, v +} + +func cacheLine() int { + if maxFunctionID() < 0x1 { + return 0 + } + + _, ebx, _, _ := cpuid(1) + cache := (ebx & 0xff00) >> 5 // cflush size + if cache == 0 && maxExtendedFunction() >= 0x80000006 { + _, _, ecx, _ := cpuid(0x80000006) + cache = ecx & 0xff // cacheline size + } + // TODO: Read from Cache and TLB Information + return int(cache) +} + +func (c *CPUInfo) cacheSize() { + c.Cache.L1D = -1 + c.Cache.L1I = -1 + c.Cache.L2 = -1 + c.Cache.L3 = -1 + vendor, _ := vendorID() + switch vendor { + case Intel: + if maxFunctionID() < 4 { + return + } + c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 + for i := uint32(0); ; i++ { + eax, ebx, ecx, _ := cpuidex(4, i) + cacheType := eax & 15 + if cacheType == 0 { + break + } + cacheLevel := (eax >> 5) & 7 + coherency := int(ebx&0xfff) + 1 + partitions := int((ebx>>12)&0x3ff) + 1 + associativity := int((ebx>>22)&0x3ff) + 1 + sets := int(ecx) + 1 + size := associativity * partitions * coherency * sets + switch cacheLevel { + case 1: + if cacheType == 1 { + // 1 = Data Cache + c.Cache.L1D = size + } else if cacheType == 2 { + // 2 = Instruction Cache + c.Cache.L1I = size + } else { + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + case AMD, Hygon: + // Untested. + if maxExtendedFunction() < 0x80000005 { + return + } + _, _, ecx, edx := cpuid(0x80000005) + c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) + c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) + + if maxExtendedFunction() < 0x80000006 { + return + } + _, _, ecx, _ = cpuid(0x80000006) + c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) + + // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties + if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { + return + } + + // Xen Hypervisor is buggy and returns the same entry no matter ECX value. + // Hack: When we encounter the same entry 100 times we break. + nSame := 0 + var last uint32 + for i := uint32(0); i < math.MaxUint32; i++ { + eax, ebx, ecx, _ := cpuidex(0x8000001D, i) + + level := (eax >> 5) & 7 + cacheNumSets := ecx + 1 + cacheLineSize := 1 + (ebx & 2047) + cachePhysPartitions := 1 + ((ebx >> 12) & 511) + cacheNumWays := 1 + ((ebx >> 22) & 511) + + typ := eax & 15 + size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) + if typ == 0 { + return + } + + // Check for the same value repeated. + comb := eax ^ ebx ^ ecx + if comb == last { + nSame++ + if nSame == 100 { + return + } + } + last = comb + + switch level { + case 1: + switch typ { + case 1: + // Data cache + c.Cache.L1D = size + case 2: + // Inst cache + c.Cache.L1I = size + default: + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + } +} + +type SGXEPCSection struct { + BaseAddress uint64 + EPCSize uint64 +} + +type SGXSupport struct { + Available bool + LaunchControl bool + SGX1Supported bool + SGX2Supported bool + MaxEnclaveSizeNot64 int64 + MaxEnclaveSize64 int64 + EPCSections []SGXEPCSection +} + +func hasSGX(available, lc bool) (rval SGXSupport) { + rval.Available = available + + if !available { + return + } + + rval.LaunchControl = lc + + a, _, _, d := cpuidex(0x12, 0) + rval.SGX1Supported = a&0x01 != 0 + rval.SGX2Supported = a&0x02 != 0 + rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 + rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 + rval.EPCSections = make([]SGXEPCSection, 0) + + for subleaf := uint32(2); subleaf < 2+8; subleaf++ { + eax, ebx, ecx, edx := cpuidex(0x12, subleaf) + leafType := eax & 0xf + + if leafType == 0 { + // Invalid subleaf, stop iterating + break + } else if leafType == 1 { + // EPC Section subleaf + baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) + size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) + + section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} + rval.EPCSections = append(rval.EPCSections, section) + } + } + + return +} + +func support() flagSet { + var fs flagSet + mfi := maxFunctionID() + vend, _ := vendorID() + if mfi < 0x1 { + return fs + } + family, model, _ := familyModel() + + _, _, c, d := cpuid(1) + fs.setIf((d&(1<<0)) != 0, X87) + fs.setIf((d&(1<<8)) != 0, CMPXCHG8) + fs.setIf((d&(1<<11)) != 0, SYSEE) + fs.setIf((d&(1<<15)) != 0, CMOV) + fs.setIf((d&(1<<23)) != 0, MMX) + fs.setIf((d&(1<<24)) != 0, FXSR) + fs.setIf((d&(1<<25)) != 0, FXSROPT) + fs.setIf((d&(1<<25)) != 0, SSE) + fs.setIf((d&(1<<26)) != 0, SSE2) + fs.setIf((c&1) != 0, SSE3) + fs.setIf((c&(1<<5)) != 0, VMX) + fs.setIf((c&(1<<9)) != 0, SSSE3) + fs.setIf((c&(1<<19)) != 0, SSE4) + fs.setIf((c&(1<<20)) != 0, SSE42) + fs.setIf((c&(1<<25)) != 0, AESNI) + fs.setIf((c&(1<<1)) != 0, CLMUL) + fs.setIf(c&(1<<22) != 0, MOVBE) + fs.setIf(c&(1<<23) != 0, POPCNT) + fs.setIf(c&(1<<30) != 0, RDRAND) + + // This bit has been reserved by Intel & AMD for use by hypervisors, + // and indicates the presence of a hypervisor. + fs.setIf(c&(1<<31) != 0, HYPERVISOR) + fs.setIf(c&(1<<29) != 0, F16C) + fs.setIf(c&(1<<13) != 0, CX16) + + if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { + fs.setIf(threadsPerCore() > 1, HTT) + } + if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { + fs.setIf(threadsPerCore() > 1, HTT) + } + fs.setIf(c&1<<26 != 0, XSAVE) + fs.setIf(c&1<<27 != 0, OSXSAVE) + // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits + const avxCheck = 1<<26 | 1<<27 | 1<<28 + if c&avxCheck == avxCheck { + // Check for OS support + eax, _ := xgetbv(0) + if (eax & 0x6) == 0x6 { + fs.set(AVX) + switch vend { + case Intel: + // Older than Haswell. + fs.setIf(family == 6 && model < 60, AVXSLOW) + case AMD: + // Older than Zen 2 + fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) + } + } + } + // FMA3 can be used with SSE registers, so no OS support is strictly needed. + // fma3 and OSXSAVE needed. + const fma3Check = 1<<12 | 1<<27 + fs.setIf(c&fma3Check == fma3Check, FMA3) + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 { + _, ebx, ecx, edx := cpuidex(7, 0) + if fs.inSet(AVX) && (ebx&0x00000020) != 0 { + fs.set(AVX2) + } + // CPUID.(EAX=7, ECX=0).EBX + if (ebx & 0x00000008) != 0 { + fs.set(BMI1) + fs.setIf((ebx&0x00000100) != 0, BMI2) + } + fs.setIf(ebx&(1<<2) != 0, SGX) + fs.setIf(ebx&(1<<4) != 0, HLE) + fs.setIf(ebx&(1<<9) != 0, ERMS) + fs.setIf(ebx&(1<<11) != 0, RTM) + fs.setIf(ebx&(1<<14) != 0, MPX) + fs.setIf(ebx&(1<<18) != 0, RDSEED) + fs.setIf(ebx&(1<<19) != 0, ADX) + fs.setIf(ebx&(1<<29) != 0, SHA) + + // CPUID.(EAX=7, ECX=0).ECX + fs.setIf(ecx&(1<<5) != 0, WAITPKG) + fs.setIf(ecx&(1<<7) != 0, CETSS) + fs.setIf(ecx&(1<<8) != 0, GFNI) + fs.setIf(ecx&(1<<9) != 0, VAES) + fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) + fs.setIf(ecx&(1<<13) != 0, TME) + fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) + fs.setIf(ecx&(1<<23) != 0, KEYLOCKER) + fs.setIf(ecx&(1<<27) != 0, MOVDIRI) + fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) + fs.setIf(ecx&(1<<29) != 0, ENQCMD) + fs.setIf(ecx&(1<<30) != 0, SGXLC) + + // CPUID.(EAX=7, ECX=0).EDX + fs.setIf(edx&(1<<4) != 0, FSRM) + fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) + fs.setIf(edx&(1<<10) != 0, MD_CLEAR) + fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) + fs.setIf(edx&(1<<14) != 0, SERIALIZE) + fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) + fs.setIf(edx&(1<<16) != 0, TSXLDTRK) + fs.setIf(edx&(1<<18) != 0, PCONFIG) + fs.setIf(edx&(1<<20) != 0, CETIBT) + fs.setIf(edx&(1<<26) != 0, IBPB) + fs.setIf(edx&(1<<27) != 0, STIBP) + fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) + fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) + fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) + fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) + + // CPUID.(EAX=7, ECX=1).EAX + eax1, _, _, edx1 := cpuidex(7, 1) + fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) + fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) + fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) + fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) + fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) + fs.setIf(eax1&(1<<22) != 0, HRESET) + fs.setIf(eax1&(1<<23) != 0, AVXIFMA) + fs.setIf(eax1&(1<<26) != 0, LAM) + + // CPUID.(EAX=7, ECX=1).EDX + fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8) + fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT) + fs.setIf(edx1&(1<<14) != 0, PREFETCHI) + fs.setIf(edx1&(1<<19) != 0, AVX10) + fs.setIf(edx1&(1<<21) != 0, APX_F) + + // Only detect AVX-512 features if XGETBV is supported + if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { + // Check for OS support + eax, _ := xgetbv(0) + + // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and + // ZMM16-ZMM31 state are enabled by OS) + /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). + hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 + if runtime.GOOS == "darwin" { + hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() + } + if hasAVX512 { + fs.setIf(ebx&(1<<16) != 0, AVX512F) + fs.setIf(ebx&(1<<17) != 0, AVX512DQ) + fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) + fs.setIf(ebx&(1<<26) != 0, AVX512PF) + fs.setIf(ebx&(1<<27) != 0, AVX512ER) + fs.setIf(ebx&(1<<28) != 0, AVX512CD) + fs.setIf(ebx&(1<<30) != 0, AVX512BW) + fs.setIf(ebx&(1<<31) != 0, AVX512VL) + // ecx + fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) + fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) + fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) + fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) + fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) + // edx + fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) + fs.setIf(edx&(1<<22) != 0, AMXBF16) + fs.setIf(edx&(1<<23) != 0, AVX512FP16) + fs.setIf(edx&(1<<24) != 0, AMXTILE) + fs.setIf(edx&(1<<25) != 0, AMXINT8) + // eax1 = CPUID.(EAX=7, ECX=1).EAX + fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + fs.setIf(eax1&(1<<19) != 0, WRMSRNS) + fs.setIf(eax1&(1<<21) != 0, AMXFP16) + fs.setIf(eax1&(1<<27) != 0, MSRLIST) + } + } + + // CPUID.(EAX=7, ECX=2) + _, _, _, edx = cpuidex(7, 2) + fs.setIf(edx&(1<<0) != 0, PSFD) + fs.setIf(edx&(1<<1) != 0, IDPRED_CTRL) + fs.setIf(edx&(1<<2) != 0, RRSBA_CTRL) + fs.setIf(edx&(1<<4) != 0, BHI_CTRL) + fs.setIf(edx&(1<<5) != 0, MCDT_NO) + + // Add keylocker features. + if fs.inSet(KEYLOCKER) && mfi >= 0x19 { + _, ebx, _, _ := cpuidex(0x19, 0) + fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4) + } + + // Add AVX10 features. + if fs.inSet(AVX10) && mfi >= 0x24 { + _, ebx, _, _ := cpuidex(0x24, 0) + fs.setIf(ebx&(1<<16) != 0, AVX10_128) + fs.setIf(ebx&(1<<17) != 0, AVX10_256) + fs.setIf(ebx&(1<<18) != 0, AVX10_512) + } + } + + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) + // EAX + // Bit 00: XSAVEOPT is available. + // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. + // Bit 02: Supports XGETBV with ECX = 1 if set. + // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. + // Bits 31 - 04: Reserved. + // EBX + // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. + // ECX + // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. + // EDX? + // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. + if mfi >= 0xd { + if fs.inSet(XSAVE) { + eax, _, _, _ := cpuidex(0xd, 1) + fs.setIf(eax&(1<<0) != 0, XSAVEOPT) + fs.setIf(eax&(1<<1) != 0, XSAVEC) + fs.setIf(eax&(1<<2) != 0, XGETBV1) + fs.setIf(eax&(1<<3) != 0, XSAVES) + } + } + if maxExtendedFunction() >= 0x80000001 { + _, _, c, d := cpuid(0x80000001) + if (c & (1 << 5)) != 0 { + fs.set(LZCNT) + fs.set(POPCNT) + } + // ECX + fs.setIf((c&(1<<0)) != 0, LAHF) + fs.setIf((c&(1<<2)) != 0, SVM) + fs.setIf((c&(1<<6)) != 0, SSE4A) + fs.setIf((c&(1<<10)) != 0, IBS) + fs.setIf((c&(1<<22)) != 0, TOPEXT) + + // EDX + fs.setIf(d&(1<<11) != 0, SYSCALL) + fs.setIf(d&(1<<20) != 0, NX) + fs.setIf(d&(1<<22) != 0, MMXEXT) + fs.setIf(d&(1<<23) != 0, MMX) + fs.setIf(d&(1<<24) != 0, FXSR) + fs.setIf(d&(1<<25) != 0, FXSROPT) + fs.setIf(d&(1<<27) != 0, RDTSCP) + fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) + fs.setIf(d&(1<<31) != 0, AMD3DNOW) + + /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be + * used unless the OS has AVX support. */ + if fs.inSet(AVX) { + fs.setIf((c&(1<<11)) != 0, XOP) + fs.setIf((c&(1<<16)) != 0, FMA4) + } + + } + if maxExtendedFunction() >= 0x80000007 { + _, b, _, d := cpuid(0x80000007) + fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) + fs.setIf((b&(1<<1)) != 0, SUCCOR) + fs.setIf((b&(1<<2)) != 0, HWA) + fs.setIf((d&(1<<9)) != 0, CPBOOST) + } + + if maxExtendedFunction() >= 0x80000008 { + _, b, _, _ := cpuid(0x80000008) + fs.setIf(b&(1<<28) != 0, PSFD) + fs.setIf(b&(1<<27) != 0, CPPC) + fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) + fs.setIf(b&(1<<23) != 0, PPIN) + fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) + fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) + fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) + fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) + fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) + fs.setIf(b&(1<<15) != 0, STIBP) + fs.setIf(b&(1<<14) != 0, IBRS) + fs.setIf((b&(1<<13)) != 0, INT_WBINVD) + fs.setIf(b&(1<<12) != 0, IBPB) + fs.setIf((b&(1<<9)) != 0, WBNOINVD) + fs.setIf((b&(1<<8)) != 0, MCOMMIT) + fs.setIf((b&(1<<4)) != 0, RDPRU) + fs.setIf((b&(1<<3)) != 0, INVLPGB) + fs.setIf((b&(1<<1)) != 0, MSRIRC) + fs.setIf((b&(1<<0)) != 0, CLZERO) + } + + if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { + _, _, _, edx := cpuid(0x8000000A) + fs.setIf((edx>>0)&1 == 1, SVMNP) + fs.setIf((edx>>1)&1 == 1, LBRVIRT) + fs.setIf((edx>>2)&1 == 1, SVML) + fs.setIf((edx>>3)&1 == 1, NRIPS) + fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) + fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) + fs.setIf((edx>>6)&1 == 1, SVMFBASID) + fs.setIf((edx>>7)&1 == 1, SVMDA) + fs.setIf((edx>>10)&1 == 1, SVMPF) + fs.setIf((edx>>12)&1 == 1, SVMPFT) + } + + if maxExtendedFunction() >= 0x8000001a { + eax, _, _, _ := cpuid(0x8000001a) + fs.setIf((eax>>0)&1 == 1, FP128) + fs.setIf((eax>>1)&1 == 1, MOVU) + fs.setIf((eax>>2)&1 == 1, FP256) + } + + if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { + eax, _, _, _ := cpuid(0x8000001b) + fs.setIf((eax>>0)&1 == 1, IBSFFV) + fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) + fs.setIf((eax>>2)&1 == 1, IBSOPSAM) + fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) + fs.setIf((eax>>4)&1 == 1, IBSOPCNT) + fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) + fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) + fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) + fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) + fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) + fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. + fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) + } + + if maxExtendedFunction() >= 0x8000001f && vend == AMD { + a, _, _, _ := cpuid(0x8000001f) + fs.setIf((a>>0)&1 == 1, SME) + fs.setIf((a>>1)&1 == 1, SEV) + fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) + fs.setIf((a>>3)&1 == 1, SEV_ES) + fs.setIf((a>>4)&1 == 1, SEV_SNP) + fs.setIf((a>>5)&1 == 1, VMPL) + fs.setIf((a>>10)&1 == 1, SME_COHERENT) + fs.setIf((a>>11)&1 == 1, SEV_64BIT) + fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) + fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) + fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) + fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) + fs.setIf((a>>16)&1 == 1, VTE) + fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) + } + + if mfi >= 0x20 { + // Microsoft has decided to purposefully hide the information + // of the guest TEE when VMs are being created using Hyper-V. + // + // This leads us to check for the Hyper-V cpuid features + // (0x4000000C), and then for the `ebx` value set. + // + // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part + // we're mostly interested about,according to: + // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174 + _, ebx, _, _ := cpuid(0x4000000C) + fs.setIf(ebx == 0xbe3, TDX_GUEST) + } + + if mfi >= 0x21 { + // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21). + _, ebx, ecx, edx := cpuid(0x21) + identity := string(valAsString(ebx, edx, ecx)) + fs.setIf(identity == "IntelTDX ", TDX_GUEST) + } + + return fs +} + +func (c *CPUInfo) supportAVX10() uint8 { + if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) { + _, ebx, _, _ := cpuidex(0x24, 0) + return uint8(ebx) + } + return 0 +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} -- cgit v1.2.3