Skip to main content

strat9_kernel/arch/x86_64/
cpuid.rs

1//! CPU feature detection via CPUID instruction.
2//!
3//! Provides a `CpuInfo` struct populated at boot time with vendor, model,
4//! feature flags, and XSAVE geometry. All subsequent queries go through
5//! `host()` which returns the cached result.
6
7use crate::sync::SpinLock;
8use alloc::string::String;
9use bitflags::bitflags;
10use core::sync::atomic::{AtomicBool, Ordering};
11
12bitflags! {
13    /// CPU feature flags detected via CPUID.
14    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
15    pub struct CpuFeatures: u64 {
16        // ── Leaf 0x01 ECX ──
17        const SSE3      = 1 << 0;
18        const SSSE3     = 1 << 1;
19        const FMA       = 1 << 2;
20        const SSE4_1    = 1 << 3;
21        const SSE4_2    = 1 << 4;
22        const POPCNT    = 1 << 5;
23        const AES_NI    = 1 << 6;
24        const XSAVE     = 1 << 7;
25        const AVX       = 1 << 8;
26        const F16C      = 1 << 9;
27        const VMX       = 1 << 10;
28        const X2APIC    = 1 << 11;
29        // ── Leaf 0x01 EDX ──
30        const FPU       = 1 << 16;
31        const TSC       = 1 << 17;
32        const APIC      = 1 << 18;
33        const SSE       = 1 << 19;
34        const SSE2      = 1 << 20;
35        const FXSR      = 1 << 21;
36        // ── Leaf 0x07 EBX ──
37        const AVX2      = 1 << 32;
38        const AVX512F   = 1 << 33;
39        const AVX512BW  = 1 << 34;
40        const AVX512VL  = 1 << 35;
41        const SHA       = 1 << 36;
42        // ── Leaf 0x80000001 EDX ──
43        const NX        = 1 << 48;
44        const PAGES_1G  = 1 << 49;
45        const RDTSCP    = 1 << 50;
46        const LONG_MODE = 1 << 51;
47        // ── Leaf 0x80000001 ECX ──
48        const SVM       = 1 << 56;
49    }
50}
51
52/// XCR0 component bits.
53pub const XCR0_X87: u64 = 1 << 0;
54pub const XCR0_SSE: u64 = 1 << 1;
55pub const XCR0_AVX: u64 = 1 << 2;
56pub const XCR0_OPMASK: u64 = 1 << 5;
57pub const XCR0_ZMM_HI256: u64 = 1 << 6;
58pub const XCR0_HI16_ZMM: u64 = 1 << 7;
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum CpuVendor {
62    Intel,
63    Amd,
64    Unknown,
65}
66
67/// Cached CPU identification and feature information.
68#[derive(Debug, Clone)]
69pub struct CpuInfo {
70    pub vendor: CpuVendor,
71    pub features: CpuFeatures,
72    pub max_xcr0: u64,
73    pub xsave_size: usize,
74    pub family: u8,
75    pub model: u8,
76    pub stepping: u8,
77    pub model_name: [u8; 48],
78    model_name_len: usize,
79}
80
81impl CpuInfo {
82    /// Return the model name as a `&str`.
83    pub fn model_name_str(&self) -> &str {
84        let bytes = &self.model_name[..self.model_name_len];
85        core::str::from_utf8(bytes).unwrap_or("Unknown")
86    }
87
88    /// Return a vendor id string (e.g. "GenuineIntel").
89    pub fn vendor_string(&self) -> &'static str {
90        match self.vendor {
91            CpuVendor::Intel => "GenuineIntel",
92            CpuVendor::Amd => "AuthenticAMD",
93            CpuVendor::Unknown => "Unknown",
94        }
95    }
96}
97
98static HOST_CPU: SpinLock<Option<CpuInfo>> = SpinLock::new(None);
99static INITIALIZED: AtomicBool = AtomicBool::new(false);
100
101/// Detect and cache CPU information. Must be called once at BSP boot.
102pub fn init() {
103    let info = detect();
104    log::info!(
105        "[CPUID] {} {} (family={} model={} stepping={})",
106        info.vendor_string(),
107        info.model_name_str(),
108        info.family,
109        info.model,
110        info.stepping,
111    );
112    log::info!(
113        "[CPUID] features={:?}, max_xcr0={:#x}, xsave_size={}",
114        info.features,
115        info.max_xcr0,
116        info.xsave_size,
117    );
118    *HOST_CPU.lock() = Some(info);
119    INITIALIZED.store(true, Ordering::Release);
120}
121
122/// Return a clone of the cached host CPU info. Panics if `init()` not called.
123pub fn host() -> CpuInfo {
124    HOST_CPU
125        .lock()
126        .clone()
127        .expect("cpuid::init() not called yet")
128}
129
130/// Whether XSAVE is supported by the host.
131pub fn host_uses_xsave() -> bool {
132    INITIALIZED.load(Ordering::Acquire)
133        && HOST_CPU
134            .lock()
135            .as_ref()
136            .map_or(false, |h| h.features.contains(CpuFeatures::XSAVE))
137}
138
139/// Detect CPU features by interrogating CPUID leaves.
140fn detect() -> CpuInfo {
141    let cpuid = super::cpuid;
142
143    // ── Vendor (leaf 0) ──
144    let (max_leaf, ebx0, ecx0, edx0) = cpuid(0, 0);
145    let vendor = match (ebx0, edx0, ecx0) {
146        (0x756E_6547, 0x4965_6E69, 0x6C65_746E) => CpuVendor::Intel,
147        (0x6874_7541, 0x6974_6E65, 0x444D_4163) => CpuVendor::Amd,
148        _ => CpuVendor::Unknown,
149    };
150
151    let mut features = CpuFeatures::empty();
152
153    // ── Leaf 0x01: main feature bits ──
154    let (eax1, _ebx1, ecx1, edx1) = if max_leaf >= 1 {
155        cpuid(1, 0)
156    } else {
157        (0, 0, 0, 0)
158    };
159
160    let stepping = (eax1 & 0xF) as u8;
161    let base_family = (eax1 >> 8) & 0xF;
162    let base_model = (eax1 >> 4) & 0xF;
163    let ext_model = (eax1 >> 16) & 0xF;
164    let ext_family = (eax1 >> 20) & 0xFF;
165    let mut family_full: u16 = base_family as u16;
166    let mut model: u8 = base_model as u8;
167    if base_family == 6 || base_family == 15 {
168        model |= (ext_model << 4) as u8;
169    }
170    if base_family == 15 {
171        family_full += ext_family as u16;
172    }
173    let family = family_full as u8;
174
175    if ecx1 & (1 << 0) != 0 {
176        features |= CpuFeatures::SSE3;
177    }
178    if ecx1 & (1 << 9) != 0 {
179        features |= CpuFeatures::SSSE3;
180    }
181    if ecx1 & (1 << 12) != 0 {
182        features |= CpuFeatures::FMA;
183    }
184    if ecx1 & (1 << 19) != 0 {
185        features |= CpuFeatures::SSE4_1;
186    }
187    if ecx1 & (1 << 20) != 0 {
188        features |= CpuFeatures::SSE4_2;
189    }
190    if ecx1 & (1 << 23) != 0 {
191        features |= CpuFeatures::POPCNT;
192    }
193    if ecx1 & (1 << 25) != 0 {
194        features |= CpuFeatures::AES_NI;
195    }
196    if ecx1 & (1 << 26) != 0 {
197        features |= CpuFeatures::XSAVE;
198    }
199    if ecx1 & (1 << 28) != 0 {
200        features |= CpuFeatures::AVX;
201    }
202    if ecx1 & (1 << 21) != 0 {
203        features |= CpuFeatures::X2APIC;
204    }
205    if ecx1 & (1 << 29) != 0 {
206        features |= CpuFeatures::F16C;
207    }
208    if ecx1 & (1 << 5) != 0 {
209        features |= CpuFeatures::VMX;
210    }
211
212    if edx1 & (1 << 0) != 0 {
213        features |= CpuFeatures::FPU;
214    }
215    if edx1 & (1 << 4) != 0 {
216        features |= CpuFeatures::TSC;
217    }
218    if edx1 & (1 << 9) != 0 {
219        features |= CpuFeatures::APIC;
220    }
221    if edx1 & (1 << 24) != 0 {
222        features |= CpuFeatures::FXSR;
223    }
224    if edx1 & (1 << 25) != 0 {
225        features |= CpuFeatures::SSE;
226    }
227    if edx1 & (1 << 26) != 0 {
228        features |= CpuFeatures::SSE2;
229    }
230
231    // ── Leaf 0x07: extended features ──
232    if max_leaf >= 7 {
233        let (_eax7, ebx7, _ecx7, _edx7) = cpuid(7, 0);
234        if ebx7 & (1 << 5) != 0 {
235            features |= CpuFeatures::AVX2;
236        }
237        if ebx7 & (1 << 16) != 0 {
238            features |= CpuFeatures::AVX512F;
239        }
240        if ebx7 & (1 << 29) != 0 {
241            features |= CpuFeatures::SHA;
242        }
243        if ebx7 & (1 << 30) != 0 {
244            features |= CpuFeatures::AVX512BW;
245        }
246        if ebx7 & (1 << 31) != 0 {
247            features |= CpuFeatures::AVX512VL;
248        }
249    }
250
251    // ── Leaf 0x0D: XSAVE geometry ──
252    let (mut max_xcr0, mut xsave_size) = (XCR0_X87 | XCR0_SSE, 512usize);
253    if features.contains(CpuFeatures::XSAVE) && max_leaf >= 0x0D {
254        let (eax_d, ebx_d, _ecx_d, edx_d) = cpuid(0x0D, 0);
255        max_xcr0 = ((edx_d as u64) << 32) | eax_d as u64;
256        xsave_size = ebx_d as usize;
257    }
258
259    // ── Leaf 0x80000001: extended features (AMD-V, NX, 1G pages) ──
260    let (max_ext, _, _, _) = cpuid(0x8000_0000, 0);
261    if max_ext >= 0x8000_0001 {
262        let (_eax_e, _ebx_e, ecx_e, edx_e) = cpuid(0x8000_0001, 0);
263        if edx_e & (1 << 20) != 0 {
264            features |= CpuFeatures::NX;
265        }
266        if edx_e & (1 << 26) != 0 {
267            features |= CpuFeatures::PAGES_1G;
268        }
269        if edx_e & (1 << 27) != 0 {
270            features |= CpuFeatures::RDTSCP;
271        }
272        if edx_e & (1 << 29) != 0 {
273            features |= CpuFeatures::LONG_MODE;
274        }
275        if ecx_e & (1 << 2) != 0 {
276            features |= CpuFeatures::SVM;
277        }
278    }
279
280    // ── Leaves 0x80000002-0x80000004: brand string ──
281    let mut model_name = [0u8; 48];
282    let mut model_name_len = 0usize;
283    if max_ext >= 0x8000_0004 {
284        for (i, leaf) in (0x8000_0002u32..=0x8000_0004).enumerate() {
285            let (a, b, c, d) = cpuid(leaf, 0);
286            let offset = i * 16;
287            model_name[offset..offset + 4].copy_from_slice(&a.to_le_bytes());
288            model_name[offset + 4..offset + 8].copy_from_slice(&b.to_le_bytes());
289            model_name[offset + 8..offset + 12].copy_from_slice(&c.to_le_bytes());
290            model_name[offset + 12..offset + 16].copy_from_slice(&d.to_le_bytes());
291        }
292        model_name_len = model_name
293            .iter()
294            .rposition(|&b| b != 0 && b != b' ')
295            .map_or(0, |p| p + 1);
296    }
297
298    CpuInfo {
299        vendor,
300        features,
301        max_xcr0,
302        xsave_size,
303        family,
304        model,
305        stepping,
306        model_name,
307        model_name_len,
308    }
309}
310
311/// Compute the XCR0 mask for a given set of allowed features,
312/// clamped to what the host actually supports.
313pub fn xcr0_for_features(features: CpuFeatures) -> u64 {
314    let mut xcr0 = XCR0_X87 | XCR0_SSE;
315    if features.contains(CpuFeatures::AVX) {
316        xcr0 |= XCR0_AVX;
317    }
318    if features.contains(CpuFeatures::AVX512F) {
319        xcr0 |= XCR0_OPMASK | XCR0_ZMM_HI256 | XCR0_HI16_ZMM;
320    }
321    let h = host();
322    xcr0 & h.max_xcr0
323}
324
325/// Compute the XSAVE area size needed for a given XCR0 mask.
326/// Falls back to 512 (FXSAVE) if XSAVE is not supported.
327pub fn xsave_size_for_xcr0(xcr0: u64) -> usize {
328    if !host_uses_xsave() {
329        return 512;
330    }
331    // CPUID leaf 0x0D, sub-leaf 0: ECX gives the size for the *current* XCR0.
332    // Since we may not want to switch XCR0 just to query, use a conservative
333    // computation from the host's max xsave_size clamped down.
334    let h = host();
335    if xcr0 == h.max_xcr0 {
336        return h.xsave_size;
337    }
338    // Minimal sizes per component
339    let mut size = 576usize; // legacy area (512) + xsave header (64)
340    if xcr0 & XCR0_AVX != 0 {
341        size = size.max(832); // +256 for YMM
342    }
343    if xcr0 & (XCR0_OPMASK | XCR0_ZMM_HI256 | XCR0_HI16_ZMM) != 0 {
344        size = size.max(2688); // full AVX-512
345    }
346    size.min(h.xsave_size)
347}
348
349/// Return the host's default XCR0 mask (all supported features).
350/// Safe to call before `init()` — returns `XCR0_X87 | XCR0_SSE` if not yet initialized.
351pub fn host_default_xcr0() -> u64 {
352    if INITIALIZED.load(Ordering::Acquire) {
353        HOST_CPU.lock().as_ref().map_or(XCR0_X87 | XCR0_SSE, |h| {
354            // IMPORTANT: do not call xcr0_for_features() here: it calls host()
355            // and would deadlock while HOST_CPU lock is already held.
356            let mut xcr0 = XCR0_X87 | XCR0_SSE;
357            if h.features.contains(CpuFeatures::AVX) {
358                xcr0 |= XCR0_AVX;
359            }
360            if h.features.contains(CpuFeatures::AVX512F) {
361                xcr0 |= XCR0_OPMASK | XCR0_ZMM_HI256 | XCR0_HI16_ZMM;
362            }
363            // Clamp to host-supported bits.
364            xcr0 & h.max_xcr0
365        })
366    } else {
367        XCR0_X87 | XCR0_SSE
368    }
369}
370
371/// Build a Linux-style `flags` string from CPU features.
372pub fn features_to_flags_string(f: CpuFeatures) -> String {
373    let mut flags = String::new();
374    let table: &[(CpuFeatures, &str)] = &[
375        (CpuFeatures::FPU, "fpu"),
376        (CpuFeatures::TSC, "tsc"),
377        (CpuFeatures::APIC, "apic"),
378        (CpuFeatures::FXSR, "fxsr"),
379        (CpuFeatures::SSE, "sse"),
380        (CpuFeatures::SSE2, "sse2"),
381        (CpuFeatures::SSE3, "sse3"),
382        (CpuFeatures::SSSE3, "ssse3"),
383        (CpuFeatures::SSE4_1, "sse4_1"),
384        (CpuFeatures::SSE4_2, "sse4_2"),
385        (CpuFeatures::POPCNT, "popcnt"),
386        (CpuFeatures::AES_NI, "aes"),
387        (CpuFeatures::XSAVE, "xsave"),
388        (CpuFeatures::AVX, "avx"),
389        (CpuFeatures::F16C, "f16c"),
390        (CpuFeatures::FMA, "fma"),
391        (CpuFeatures::AVX2, "avx2"),
392        (CpuFeatures::AVX512F, "avx512f"),
393        (CpuFeatures::AVX512BW, "avx512bw"),
394        (CpuFeatures::AVX512VL, "avx512vl"),
395        (CpuFeatures::SHA, "sha_ni"),
396        (CpuFeatures::X2APIC, "x2apic"),
397        (CpuFeatures::NX, "nx"),
398        (CpuFeatures::PAGES_1G, "pdpe1gb"),
399        (CpuFeatures::RDTSCP, "rdtscp"),
400        (CpuFeatures::LONG_MODE, "lm"),
401        (CpuFeatures::VMX, "vmx"),
402        (CpuFeatures::SVM, "svm"),
403    ];
404    for &(feat, name) in table {
405        if f.contains(feat) {
406            if !flags.is_empty() {
407                flags.push(' ');
408            }
409            flags.push_str(name);
410        }
411    }
412    flags
413}