strat9_kernel/arch/x86_64/percpu.rs
1//! Per-CPU data (x86_64)
2//!
3//! Minimal per-CPU tracking for SMP bring-up. This keeps CPU identity,
4//! online state, and per-CPU kernel stack top pointers.
5
6use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering};
7
8/// Maximum number of CPUs supported for now.
9pub const MAX_CPUS: usize = 32;
10
11/// Offsets used by the SYSCALL entry (must match `PerCpuArch` layout).
12/// Note: cpu_index is at offset 0 (8 bytes).
13pub const USER_RSP_OFFSET: usize = 8;
14pub const KERNEL_RSP_OFFSET: usize = 16;
15
16/// Minimal per-CPU block accessed from assembly via GS base.
17///
18/// **Layout invariant** (`#[repr(C)]`):
19/// - `cpu_index` at offset 0 (8 bytes) — read by assembly as `gs:[0]`
20/// - `user_rsp` at offset 8 (8 bytes)
21/// - `kernel_rsp` at offset 16 (8 bytes)
22///
23/// `AtomicU64` is `#[repr(C, align(8))]` and has the same size/alignment as
24/// `u64`, so the offsets declared in `USER_RSP_OFFSET`/`KERNEL_RSP_OFFSET` are
25/// preserved. The inline assembly in `current_cpu_index()` reads the raw 8
26/// bytes from `gs:[0]`, which are the inner `u64` of `AtomicU64`.
27#[repr(C)]
28pub struct PerCpuArch {
29 /// CPU index for this block. Written once during init via `AtomicU64::store`
30 /// (no raw-pointer cast needed), then only ever read. `AtomicU64` provides
31 /// proper `UnsafeCell` interior mutability so reads through `&PerCpuArch`
32 /// are not UB even though the field was written after the struct was placed
33 /// in a `static`.
34 pub cpu_index: AtomicU64, // offset 0 — read by assembly: `mov rax, gs:[0]`
35 pub user_rsp: AtomicU64, // offset 8
36 pub kernel_rsp: AtomicU64, // offset 16
37}
38
39const _: () = assert!(core::mem::offset_of!(PerCpuArch, user_rsp) == USER_RSP_OFFSET);
40const _: () = assert!(core::mem::offset_of!(PerCpuArch, kernel_rsp) == KERNEL_RSP_OFFSET);
41
42/// Per-CPU state.
43#[repr(C)]
44pub struct PerCpu {
45 pub arch: PerCpuArch,
46 present: AtomicBool,
47 online: AtomicBool,
48 apic_id: AtomicU32,
49 kernel_stack_top: AtomicU64,
50 tlb_ready: AtomicBool,
51 /// Preemption-disable depth counter.
52 /// When > 0, `maybe_preempt()` and `yield_task()` are no-ops on this CPU.
53 pub preempt_count: AtomicU32,
54}
55
56impl PerCpu {
57 /// Creates a new instance.
58 pub const fn new() -> Self {
59 Self {
60 arch: PerCpuArch {
61 cpu_index: AtomicU64::new(0),
62 user_rsp: AtomicU64::new(0),
63 kernel_rsp: AtomicU64::new(0),
64 },
65 present: AtomicBool::new(false),
66 online: AtomicBool::new(false),
67 apic_id: AtomicU32::new(0),
68 kernel_stack_top: AtomicU64::new(0),
69 tlb_ready: AtomicBool::new(false),
70 preempt_count: AtomicU32::new(0),
71 }
72 }
73
74 /// Performs the apic id operation.
75 pub fn apic_id(&self) -> u32 {
76 self.apic_id.load(Ordering::Acquire)
77 }
78
79 /// Performs the online operation.
80 pub fn online(&self) -> bool {
81 self.online.load(Ordering::Acquire)
82 }
83}
84
85static CPU_COUNT: AtomicUsize = AtomicUsize::new(0);
86static PERCPU: [PerCpu; MAX_CPUS] = [const { PerCpu::new() }; MAX_CPUS];
87
88/// Set to `true` once the BSP has called `init_gs_base`.
89///
90/// Used by `current_cpu_index()` to skip the serialising `rdmsr` null-guard
91/// on every hot-path call once GS is permanently initialised.
92///
93/// **Invariant**: APs always call `init_gs_base` *before* the first call to
94/// `current_cpu_index()` on that AP (see `smp_main` in `smp.rs`). Once
95/// `BSP_GS_INITIALIZED` is true, any CPU that could possibly call
96/// `current_cpu_index()` must already have a valid GS base, so the direct
97/// `gs:[0]` read on the fast path is safe.
98static BSP_GS_INITIALIZED: AtomicBool = AtomicBool::new(false);
99
100/// Initialize the boot CPU (BSP) entry.
101pub fn init_boot_cpu(apic_id: u32) -> usize {
102 let cpu = &PERCPU[0];
103 cpu.present.store(true, Ordering::Release);
104 cpu.online.store(true, Ordering::Release);
105 cpu.apic_id.store(apic_id, Ordering::Release);
106 // AtomicU64::store through shared ref — no unsafe needed, UnsafeCell provides
107 // interior mutability so this write is well-defined.
108 cpu.arch.cpu_index.store(0, Ordering::Release);
109 cpu.tlb_ready.store(false, Ordering::Release);
110 CPU_COUNT.store(1, Ordering::Release);
111 0
112}
113
114/// Register a new CPU by APIC ID, returning its CPU index.
115pub fn register_cpu(apic_id: u32) -> Option<usize> {
116 for (idx, cpu) in PERCPU.iter().enumerate() {
117 if cpu
118 .present
119 .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
120 .is_ok()
121 {
122 cpu.online.store(false, Ordering::Release);
123 cpu.apic_id.store(apic_id, Ordering::Release);
124 cpu.arch.cpu_index.store(idx as u64, Ordering::Release);
125 cpu.tlb_ready.store(false, Ordering::Release);
126 CPU_COUNT.fetch_add(1, Ordering::AcqRel);
127 return Some(idx);
128 }
129 }
130 None
131}
132
133/// Mark a CPU as online by APIC ID.
134pub fn mark_online_by_apic(apic_id: u32) -> Option<usize> {
135 for (idx, cpu) in PERCPU.iter().enumerate() {
136 if cpu.present.load(Ordering::Acquire) && cpu.apic_id.load(Ordering::Acquire) == apic_id {
137 cpu.online.store(true, Ordering::Release);
138 cpu.tlb_ready.store(false, Ordering::Release);
139 // Re-confirm index in arch block (no-op if already set, safe to repeat).
140 cpu.arch.cpu_index.store(idx as u64, Ordering::Release);
141 return Some(idx);
142 }
143 }
144 None
145}
146
147/// Set the per-CPU kernel stack top for the given CPU index.
148pub fn set_kernel_stack_top(index: usize, rsp: u64) {
149 if let Some(cpu) = PERCPU.get(index) {
150 cpu.kernel_stack_top.store(rsp, Ordering::Release);
151 }
152}
153
154/// Get the per-CPU kernel stack top for the given CPU index.
155pub fn kernel_stack_top(index: usize) -> Option<u64> {
156 PERCPU
157 .get(index)
158 .map(|cpu| cpu.kernel_stack_top.load(Ordering::Acquire))
159}
160
161/// Set the per-CPU SYSCALL kernel RSP (used by syscall entry).
162pub fn set_kernel_rsp_for_cpu(index: usize, rsp: u64) {
163 if let Some(cpu) = PERCPU.get(index) {
164 cpu.arch.kernel_rsp.store(rsp, Ordering::Release);
165 }
166}
167
168/// Set the per-CPU SYSCALL kernel RSP for the current CPU.
169pub fn set_kernel_rsp_current(rsp: u64) {
170 let cpu_index = current_cpu_index();
171 set_kernel_rsp_for_cpu(cpu_index, rsp);
172}
173
174/// Initialize GS base for this CPU to point at its per-CPU block.
175///
176/// Sets `IA32_GS_BASE` (0xC000_0101, current GS base) to
177/// `&PERCPU[cpu_index].arch` for kernel execution, and initializes
178/// `IA32_KERNEL_GS_BASE` (0xC000_0102) to 0 as the initial user GS base.
179///
180/// For the BSP (cpu_index == 0) this also sets `BSP_GS_INITIALIZED`, enabling
181/// the fast (non-serialising) path in `current_cpu_index()`.
182///
183/// **Ordering for APs**: in `smp_main`, `init_gs_base` is called before the
184/// first `current_cpu_index()` can execute on that AP, so it is safe to take
185/// the fast path on the AP after the BSP flag is visible.
186pub fn init_gs_base(cpu_index: usize) {
187 let base = &PERCPU[cpu_index].arch as *const PerCpuArch as u64;
188 // IA32_GS_BASE = 0xC000_0101, IA32_KERNEL_GS_BASE = 0xC000_0102.
189 // Keep GS_BASE on kernel per-CPU for Ring 0; seed KERNEL_GS_BASE with 0
190 // so the first Ring 0->3 transition can restore a non-kernel user GS.
191 crate::arch::x86_64::wrmsr(0xC000_0101, base);
192 crate::arch::x86_64::wrmsr(0xC000_0102, 0);
193
194 if cpu_index == 0 {
195 // Release ordering: all prior init writes must be visible before any
196 // CPU sees the flag and takes the fast path in current_cpu_index().
197 BSP_GS_INITIALIZED.store(true, Ordering::Release);
198 }
199}
200
201/// Find a CPU index by APIC ID.
202pub fn cpu_index_by_apic(apic_id: u32) -> Option<usize> {
203 for (idx, cpu) in PERCPU.iter().enumerate() {
204 if cpu.present.load(Ordering::Acquire) && cpu.apic_id.load(Ordering::Acquire) == apic_id {
205 return Some(idx);
206 }
207 }
208 None
209}
210
211/// Get the total number of CPUs that have been registered.
212pub fn cpu_count() -> usize {
213 CPU_COUNT.load(Ordering::Acquire)
214}
215
216/// Get the total number of CPUs (public alias for OSTD compatibility).
217pub fn get_cpu_count() -> usize {
218 cpu_count()
219}
220
221// ─── Preemption helpers ───────────────────────────────────────────────────────
222
223/// Increment the preemption-disable depth for the current CPU.
224/// When depth > 0, the scheduler will not preempt this CPU.
225///
226/// Safe to call from any Ring-0 context **after** `init_gs_base` has run on
227/// this CPU. If called before GS is initialised (early boot), `current_cpu_index`
228/// returns 0, which is always the BSP slot. On the BSP itself that is correct;
229/// on an AP before its GS base is set the call is a no-op because the AP's
230/// scheduler is not yet running — incrementing slot 0 would be wrong, so we
231/// verify the GS index matches the slot we are about to touch.
232#[inline]
233pub fn preempt_disable() {
234 let idx = current_cpu_index();
235 // SAFETY: idx is clamped to [0, MAX_CPUS-1] by current_cpu_index().
236 // Additional guard: if GS is not yet set on this CPU, cpu_index will be
237 // the BSP's slot (0). Skip if the slot's stored cpu_index disagrees with
238 // idx — that means GS isn't set here yet.
239 if PERCPU[idx].arch.cpu_index.load(Ordering::Relaxed) as usize == idx {
240 PERCPU[idx].preempt_count.fetch_add(1, Ordering::Relaxed);
241 }
242}
243
244/// Decrement the preemption-disable depth for the current CPU.
245/// Must be paired with exactly one prior call to `preempt_disable`.
246///
247/// Includes an underflow guard: if the counter is already 0 (mismatched call),
248/// the decrement is skipped and a warning is emitted rather than letting u32
249/// wrap to u32::MAX and disabling the scheduler permanently.
250#[inline]
251pub fn preempt_enable() {
252 let idx = current_cpu_index();
253 if PERCPU[idx].arch.cpu_index.load(Ordering::Relaxed) as usize == idx {
254 // Load first: prevent wrapping from 0 to u32::MAX.
255 let prev = PERCPU[idx].preempt_count.load(Ordering::Relaxed);
256 if prev > 0 {
257 PERCPU[idx].preempt_count.fetch_sub(1, Ordering::Relaxed);
258 } else {
259 // Mismatched preempt_enable — log and leave count at 0.
260 log::warn!("preempt_enable: underflow on CPU {} (mismatched call)", idx);
261 }
262 }
263}
264
265/// Returns `true` if preemption is currently allowed on this CPU
266/// (preempt_count == 0).
267#[inline]
268pub fn is_preemptible() -> bool {
269 let idx = current_cpu_index();
270 if PERCPU[idx].arch.cpu_index.load(Ordering::Relaxed) as usize != idx {
271 return true; // Early boot: no scheduler active, preemption is allowed.
272 }
273 PERCPU[idx].preempt_count.load(Ordering::Relaxed) == 0
274}
275
276/// Get the APIC ID for a given CPU index, or `None` if not present.
277pub fn apic_id_by_cpu_index(index: usize) -> Option<u32> {
278 PERCPU
279 .get(index)
280 .filter(|cpu| cpu.present.load(Ordering::Acquire))
281 .map(|cpu| cpu.apic_id.load(Ordering::Acquire))
282}
283
284/// Resolve current CPU index via a GS-relative load from offset 0.
285///
286/// **Hot path** (after `init_gs_base` has run on the BSP): a single
287/// non-serialising `mov rax, gs:[0]` — no MSR, no pipeline stall.
288///
289/// **Slow/early-boot path** (before `init_gs_base` is called the first time):
290/// reads `IA32_GS_BASE` via `rdmsr` as a null-guard; if the GS base is 0
291/// (hardware reset value, before our `wrmsr`) the function returns 0 (BSP
292/// slot) without touching GS. Once the BSP calls `init_gs_base`, the slow
293/// path is never taken again.
294///
295/// **Corrupt-GS defence**: the returned index is clamped to [0, MAX_CPUS-1]
296/// so a bogus GS value can never produce an out-of-bounds array access.
297///
298/// **Invariant for APs**: `smp_main` always calls `init_gs_base` before the
299/// first `current_cpu_index()` on each AP, so the hot path is safe for APs.
300#[inline]
301pub fn current_cpu_index() -> usize {
302 // SAFETY:
303 // Hot path — `gs:[0]` reads `PerCpuArch::cpu_index` (AtomicU64, repr(C),
304 // offset 0). Valid because `BSP_GS_INITIALIZED` is only set after the BSP's
305 // GS base has been written, and APs always write their GS base before
306 // executing any code that calls this function.
307 //
308 // Slow path — `rdmsr` is a privileged Ring-0 instruction, always valid here.
309 // The derference of `gs_base` is guarded by the != 0 check.
310 unsafe {
311 if BSP_GS_INITIALIZED.load(Ordering::Acquire) {
312 // Fast path: GS is valid on every CPU that can run kernel code now.
313 let idx: u64;
314 core::arch::asm!(
315 "mov {idx}, gs:[0]",
316 idx = out(reg) idx,
317 options(nostack, preserves_flags, readonly),
318 );
319 return (idx as usize).min(MAX_CPUS - 1);
320 }
321
322 // Slow path: early boot — GS not yet set on any CPU.
323 // `rdmsr` reads IA32_GS_BASE (0xC000_0101) as a null-guard.
324 let lo: u32;
325 let hi: u32;
326 core::arch::asm!(
327 "rdmsr",
328 in("ecx") 0xC000_0101u32,
329 out("eax") lo,
330 out("edx") hi,
331 options(nostack, preserves_flags),
332 );
333 let gs_base = (lo as u64) | ((hi as u64) << 32);
334 if gs_base == 0 {
335 return 0; // GS not yet initialised — early boot, return BSP slot.
336 }
337
338 // GS is set but BSP_GS_INITIALIZED wasn't yet visible (narrow race
339 // between wrmsr and the store; take the GS-relative read here too).
340 let idx: u64;
341 core::arch::asm!(
342 "mov {idx}, gs:[0]",
343 idx = out(reg) idx,
344 options(nostack, preserves_flags, readonly),
345 );
346 (idx as usize).min(MAX_CPUS - 1)
347 }
348}
349
350/// Alias for current_cpu_index.
351#[inline]
352pub fn current_cpu_index_fast() -> usize {
353 current_cpu_index()
354}
355
356/// Resolve current CPU index from GS base (compatibility).
357pub fn cpu_index_from_gs() -> Option<usize> {
358 Some(current_cpu_index())
359}
360
361/// Access the per-CPU array (read-only).
362pub fn percpu() -> &'static [PerCpu; MAX_CPUS] {
363 &PERCPU
364}
365
366/// Mark current CPU as ready to handle TLB shootdown IPIs.
367pub fn mark_tlb_ready_current() {
368 let idx = current_cpu_index();
369 PERCPU[idx].tlb_ready.store(true, Ordering::Release);
370}
371
372/// Returns true iff CPU `index` is online and ready for TLB shootdown.
373pub fn tlb_ready(index: usize) -> bool {
374 PERCPU
375 .get(index)
376 .map(|cpu| {
377 cpu.present.load(Ordering::Acquire)
378 && cpu.online.load(Ordering::Acquire)
379 && cpu.tlb_ready.load(Ordering::Acquire)
380 })
381 .unwrap_or(false)
382}