Skip to main content

strat9_kernel/arch/x86_64/
percpu.rs

1//! Per-CPU data (x86_64)
2//!
3//! Minimal per-CPU tracking for SMP bring-up. This keeps CPU identity,
4//! online state, and per-CPU kernel stack top pointers.
5
6use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering};
7
8/// Maximum number of CPUs supported for now.
9pub const MAX_CPUS: usize = 32;
10
11/// Offsets used by the SYSCALL entry (must match `PerCpuArch` layout).
12/// Note: cpu_index is at offset 0 (8 bytes).
13pub const USER_RSP_OFFSET: usize = 8;
14pub const KERNEL_RSP_OFFSET: usize = 16;
15
16/// Minimal per-CPU block accessed from assembly via GS base.
17///
18/// **Layout invariant** (`#[repr(C)]`):
19/// - `cpu_index` at offset  0 (8 bytes) — read by assembly as `gs:[0]`
20/// - `user_rsp`  at offset  8 (8 bytes)
21/// - `kernel_rsp` at offset 16 (8 bytes)
22///
23/// `AtomicU64` is `#[repr(C, align(8))]` and has the same size/alignment as
24/// `u64`, so the offsets declared in `USER_RSP_OFFSET`/`KERNEL_RSP_OFFSET` are
25/// preserved. The inline assembly in `current_cpu_index()` reads the raw 8
26/// bytes from `gs:[0]`, which are the inner `u64` of `AtomicU64`.
27#[repr(C)]
28pub struct PerCpuArch {
29    /// CPU index for this block. Written once during init via `AtomicU64::store`
30    /// (no raw-pointer cast needed), then only ever read. `AtomicU64` provides
31    /// proper `UnsafeCell` interior mutability so reads through `&PerCpuArch`
32    /// are not UB even though the field was written after the struct was placed
33    /// in a `static`.
34    pub cpu_index: AtomicU64, // offset 0 — read by assembly: `mov rax, gs:[0]`
35    pub user_rsp: AtomicU64,   // offset 8
36    pub kernel_rsp: AtomicU64, // offset 16
37}
38
39const _: () = assert!(core::mem::offset_of!(PerCpuArch, user_rsp) == USER_RSP_OFFSET);
40const _: () = assert!(core::mem::offset_of!(PerCpuArch, kernel_rsp) == KERNEL_RSP_OFFSET);
41
42/// Per-CPU state.
43#[repr(C)]
44pub struct PerCpu {
45    pub arch: PerCpuArch,
46    present: AtomicBool,
47    online: AtomicBool,
48    apic_id: AtomicU32,
49    kernel_stack_top: AtomicU64,
50    tlb_ready: AtomicBool,
51    /// Preemption-disable depth counter.
52    /// When > 0, `maybe_preempt()` and `yield_task()` are no-ops on this CPU.
53    pub preempt_count: AtomicU32,
54}
55
56impl PerCpu {
57    /// Creates a new instance.
58    pub const fn new() -> Self {
59        Self {
60            arch: PerCpuArch {
61                cpu_index: AtomicU64::new(0),
62                user_rsp: AtomicU64::new(0),
63                kernel_rsp: AtomicU64::new(0),
64            },
65            present: AtomicBool::new(false),
66            online: AtomicBool::new(false),
67            apic_id: AtomicU32::new(0),
68            kernel_stack_top: AtomicU64::new(0),
69            tlb_ready: AtomicBool::new(false),
70            preempt_count: AtomicU32::new(0),
71        }
72    }
73
74    /// Performs the apic id operation.
75    pub fn apic_id(&self) -> u32 {
76        self.apic_id.load(Ordering::Acquire)
77    }
78
79    /// Performs the online operation.
80    pub fn online(&self) -> bool {
81        self.online.load(Ordering::Acquire)
82    }
83}
84
85static CPU_COUNT: AtomicUsize = AtomicUsize::new(0);
86static PERCPU: [PerCpu; MAX_CPUS] = [const { PerCpu::new() }; MAX_CPUS];
87
88/// Set to `true` once the BSP has called `init_gs_base`.
89///
90/// Used by `current_cpu_index()` to skip the serialising `rdmsr` null-guard
91/// on every hot-path call once GS is permanently initialised.
92///
93/// **Invariant**: APs always call `init_gs_base` *before* the first call to
94/// `current_cpu_index()` on that AP (see `smp_main` in `smp.rs`).  Once
95/// `BSP_GS_INITIALIZED` is true, any CPU that could possibly call
96/// `current_cpu_index()` must already have a valid GS base, so the direct
97/// `gs:[0]` read on the fast path is safe.
98static BSP_GS_INITIALIZED: AtomicBool = AtomicBool::new(false);
99
100/// Initialize the boot CPU (BSP) entry.
101pub fn init_boot_cpu(apic_id: u32) -> usize {
102    let cpu = &PERCPU[0];
103    cpu.present.store(true, Ordering::Release);
104    cpu.online.store(true, Ordering::Release);
105    cpu.apic_id.store(apic_id, Ordering::Release);
106    // AtomicU64::store through shared ref — no unsafe needed, UnsafeCell provides
107    // interior mutability so this write is well-defined.
108    cpu.arch.cpu_index.store(0, Ordering::Release);
109    cpu.tlb_ready.store(false, Ordering::Release);
110    CPU_COUNT.store(1, Ordering::Release);
111    0
112}
113
114/// Register a new CPU by APIC ID, returning its CPU index.
115pub fn register_cpu(apic_id: u32) -> Option<usize> {
116    for (idx, cpu) in PERCPU.iter().enumerate() {
117        if cpu
118            .present
119            .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
120            .is_ok()
121        {
122            cpu.online.store(false, Ordering::Release);
123            cpu.apic_id.store(apic_id, Ordering::Release);
124            cpu.arch.cpu_index.store(idx as u64, Ordering::Release);
125            cpu.tlb_ready.store(false, Ordering::Release);
126            CPU_COUNT.fetch_add(1, Ordering::AcqRel);
127            return Some(idx);
128        }
129    }
130    None
131}
132
133/// Mark a CPU as online by APIC ID.
134pub fn mark_online_by_apic(apic_id: u32) -> Option<usize> {
135    for (idx, cpu) in PERCPU.iter().enumerate() {
136        if cpu.present.load(Ordering::Acquire) && cpu.apic_id.load(Ordering::Acquire) == apic_id {
137            cpu.online.store(true, Ordering::Release);
138            cpu.tlb_ready.store(false, Ordering::Release);
139            // Re-confirm index in arch block (no-op if already set, safe to repeat).
140            cpu.arch.cpu_index.store(idx as u64, Ordering::Release);
141            return Some(idx);
142        }
143    }
144    None
145}
146
147/// Set the per-CPU kernel stack top for the given CPU index.
148pub fn set_kernel_stack_top(index: usize, rsp: u64) {
149    if let Some(cpu) = PERCPU.get(index) {
150        cpu.kernel_stack_top.store(rsp, Ordering::Release);
151    }
152}
153
154/// Get the per-CPU kernel stack top for the given CPU index.
155pub fn kernel_stack_top(index: usize) -> Option<u64> {
156    PERCPU
157        .get(index)
158        .map(|cpu| cpu.kernel_stack_top.load(Ordering::Acquire))
159}
160
161/// Set the per-CPU SYSCALL kernel RSP (used by syscall entry).
162pub fn set_kernel_rsp_for_cpu(index: usize, rsp: u64) {
163    if let Some(cpu) = PERCPU.get(index) {
164        cpu.arch.kernel_rsp.store(rsp, Ordering::Release);
165    }
166}
167
168/// Set the per-CPU SYSCALL kernel RSP for the current CPU.
169pub fn set_kernel_rsp_current(rsp: u64) {
170    let cpu_index = current_cpu_index();
171    set_kernel_rsp_for_cpu(cpu_index, rsp);
172}
173
174/// Initialize GS base for this CPU to point at its per-CPU block.
175///
176/// Sets `IA32_GS_BASE` (0xC000_0101, current GS base) to
177/// `&PERCPU[cpu_index].arch` for kernel execution, and initializes
178/// `IA32_KERNEL_GS_BASE` (0xC000_0102) to 0 as the initial user GS base.
179///
180/// For the BSP (cpu_index == 0) this also sets `BSP_GS_INITIALIZED`, enabling
181/// the fast (non-serialising) path in `current_cpu_index()`.
182///
183/// **Ordering for APs**: in `smp_main`, `init_gs_base` is called before the
184/// first `current_cpu_index()` can execute on that AP, so it is safe to take
185/// the fast path on the AP after the BSP flag is visible.
186pub fn init_gs_base(cpu_index: usize) {
187    let base = &PERCPU[cpu_index].arch as *const PerCpuArch as u64;
188    // IA32_GS_BASE = 0xC000_0101, IA32_KERNEL_GS_BASE = 0xC000_0102.
189    // Keep GS_BASE on kernel per-CPU for Ring 0; seed KERNEL_GS_BASE with 0
190    // so the first Ring 0->3 transition can restore a non-kernel user GS.
191    crate::arch::x86_64::wrmsr(0xC000_0101, base);
192    crate::arch::x86_64::wrmsr(0xC000_0102, 0);
193
194    if cpu_index == 0 {
195        // Release ordering: all prior init writes must be visible before any
196        // CPU sees the flag and takes the fast path in current_cpu_index().
197        BSP_GS_INITIALIZED.store(true, Ordering::Release);
198    }
199}
200
201/// Find a CPU index by APIC ID.
202pub fn cpu_index_by_apic(apic_id: u32) -> Option<usize> {
203    for (idx, cpu) in PERCPU.iter().enumerate() {
204        if cpu.present.load(Ordering::Acquire) && cpu.apic_id.load(Ordering::Acquire) == apic_id {
205            return Some(idx);
206        }
207    }
208    None
209}
210
211/// Get the total number of CPUs that have been registered.
212pub fn cpu_count() -> usize {
213    CPU_COUNT.load(Ordering::Acquire)
214}
215
216/// Get the total number of CPUs (public alias for OSTD compatibility).
217pub fn get_cpu_count() -> usize {
218    cpu_count()
219}
220
221// ─── Preemption helpers ───────────────────────────────────────────────────────
222
223/// Increment the preemption-disable depth for the current CPU.
224/// When depth > 0, the scheduler will not preempt this CPU.
225///
226/// Safe to call from any Ring-0 context **after** `init_gs_base` has run on
227/// this CPU.  If called before GS is initialised (early boot), `current_cpu_index`
228/// returns 0, which is always the BSP slot.  On the BSP itself that is correct;
229/// on an AP before its GS base is set the call is a no-op because the AP's
230/// scheduler is not yet running — incrementing slot 0 would be wrong, so we
231/// verify the GS index matches the slot we are about to touch.
232#[inline]
233pub fn preempt_disable() {
234    let idx = current_cpu_index();
235    // SAFETY: idx is clamped to [0, MAX_CPUS-1] by current_cpu_index().
236    // Additional guard: if GS is not yet set on this CPU, cpu_index will be
237    // the BSP's slot (0).  Skip if the slot's stored cpu_index disagrees with
238    // idx — that means GS isn't set here yet.
239    if PERCPU[idx].arch.cpu_index.load(Ordering::Relaxed) as usize == idx {
240        PERCPU[idx].preempt_count.fetch_add(1, Ordering::Relaxed);
241    }
242}
243
244/// Decrement the preemption-disable depth for the current CPU.
245/// Must be paired with exactly one prior call to `preempt_disable`.
246///
247/// Includes an underflow guard: if the counter is already 0 (mismatched call),
248/// the decrement is skipped and a warning is emitted rather than letting u32
249/// wrap to u32::MAX and disabling the scheduler permanently.
250#[inline]
251pub fn preempt_enable() {
252    let idx = current_cpu_index();
253    if PERCPU[idx].arch.cpu_index.load(Ordering::Relaxed) as usize == idx {
254        // Load first: prevent wrapping from 0 to u32::MAX.
255        let prev = PERCPU[idx].preempt_count.load(Ordering::Relaxed);
256        if prev > 0 {
257            PERCPU[idx].preempt_count.fetch_sub(1, Ordering::Relaxed);
258        } else {
259            // Mismatched preempt_enable — log and leave count at 0.
260            log::warn!("preempt_enable: underflow on CPU {} (mismatched call)", idx);
261        }
262    }
263}
264
265/// Returns `true` if preemption is currently allowed on this CPU
266/// (preempt_count == 0).
267#[inline]
268pub fn is_preemptible() -> bool {
269    let idx = current_cpu_index();
270    if PERCPU[idx].arch.cpu_index.load(Ordering::Relaxed) as usize != idx {
271        return true; // Early boot: no scheduler active, preemption is allowed.
272    }
273    PERCPU[idx].preempt_count.load(Ordering::Relaxed) == 0
274}
275
276/// Get the APIC ID for a given CPU index, or `None` if not present.
277pub fn apic_id_by_cpu_index(index: usize) -> Option<u32> {
278    PERCPU
279        .get(index)
280        .filter(|cpu| cpu.present.load(Ordering::Acquire))
281        .map(|cpu| cpu.apic_id.load(Ordering::Acquire))
282}
283
284/// Resolve current CPU index via a GS-relative load from offset 0.
285///
286/// **Hot path** (after `init_gs_base` has run on the BSP): a single
287/// non-serialising `mov rax, gs:[0]` — no MSR, no pipeline stall.
288///
289/// **Slow/early-boot path** (before `init_gs_base` is called the first time):
290/// reads `IA32_GS_BASE` via `rdmsr` as a null-guard; if the GS base is 0
291/// (hardware reset value, before our `wrmsr`) the function returns 0 (BSP
292/// slot) without touching GS.  Once the BSP calls `init_gs_base`, the slow
293/// path is never taken again.
294///
295/// **Corrupt-GS defence**: the returned index is clamped to [0, MAX_CPUS-1]
296/// so a bogus GS value can never produce an out-of-bounds array access.
297///
298/// **Invariant for APs**: `smp_main` always calls `init_gs_base` before the
299/// first `current_cpu_index()` on each AP, so the hot path is safe for APs.
300#[inline]
301pub fn current_cpu_index() -> usize {
302    // SAFETY:
303    // Hot path — `gs:[0]` reads `PerCpuArch::cpu_index` (AtomicU64, repr(C),
304    // offset 0).  Valid because `BSP_GS_INITIALIZED` is only set after the BSP's
305    // GS base has been written, and APs always write their GS base before
306    // executing any code that calls this function.
307    //
308    // Slow path — `rdmsr` is a privileged Ring-0 instruction, always valid here.
309    // The derference of `gs_base` is guarded by the != 0 check.
310    unsafe {
311        if BSP_GS_INITIALIZED.load(Ordering::Acquire) {
312            // Fast path: GS is valid on every CPU that can run kernel code now.
313            let idx: u64;
314            core::arch::asm!(
315                "mov {idx}, gs:[0]",
316                idx = out(reg) idx,
317                options(nostack, preserves_flags, readonly),
318            );
319            return (idx as usize).min(MAX_CPUS - 1);
320        }
321
322        // Slow path: early boot — GS not yet set on any CPU.
323        // `rdmsr` reads IA32_GS_BASE (0xC000_0101) as a null-guard.
324        let lo: u32;
325        let hi: u32;
326        core::arch::asm!(
327            "rdmsr",
328            in("ecx")  0xC000_0101u32,
329            out("eax") lo,
330            out("edx") hi,
331            options(nostack, preserves_flags),
332        );
333        let gs_base = (lo as u64) | ((hi as u64) << 32);
334        if gs_base == 0 {
335            return 0; // GS not yet initialised — early boot, return BSP slot.
336        }
337
338        // GS is set but BSP_GS_INITIALIZED wasn't yet visible (narrow race
339        // between wrmsr and the store; take the GS-relative read here too).
340        let idx: u64;
341        core::arch::asm!(
342            "mov {idx}, gs:[0]",
343            idx = out(reg) idx,
344            options(nostack, preserves_flags, readonly),
345        );
346        (idx as usize).min(MAX_CPUS - 1)
347    }
348}
349
350/// Alias for current_cpu_index.
351#[inline]
352pub fn current_cpu_index_fast() -> usize {
353    current_cpu_index()
354}
355
356/// Resolve current CPU index from GS base (compatibility).
357pub fn cpu_index_from_gs() -> Option<usize> {
358    Some(current_cpu_index())
359}
360
361/// Access the per-CPU array (read-only).
362pub fn percpu() -> &'static [PerCpu; MAX_CPUS] {
363    &PERCPU
364}
365
366/// Mark current CPU as ready to handle TLB shootdown IPIs.
367pub fn mark_tlb_ready_current() {
368    let idx = current_cpu_index();
369    PERCPU[idx].tlb_ready.store(true, Ordering::Release);
370}
371
372/// Returns true iff CPU `index` is online and ready for TLB shootdown.
373pub fn tlb_ready(index: usize) -> bool {
374    PERCPU
375        .get(index)
376        .map(|cpu| {
377            cpu.present.load(Ordering::Acquire)
378                && cpu.online.load(Ordering::Acquire)
379                && cpu.tlb_ready.load(Ordering::Acquire)
380        })
381        .unwrap_or(false)
382}