Skip to main content

strat9_kernel/process/
scheduler.rs

1//! Scheduler implementation
2//!
3//! Implements a per-CPU round-robin scheduler for Strat9-OS with support for
4//! cooperative and preemptive multitasking.
5//!
6//! ## Preemption design
7//!
8//! The timer interrupt (100Hz) calls `maybe_preempt()` which picks the next
9//! task and performs a context switch. Interrupts are disabled while the
10//! scheduler lock is held to prevent deadlock on single-core systems:
11//!
12//! - `yield_task()`: CLI → lock → pick next → TSS/CR3 → unlock → switch_context → restore IF
13//! - Timer handler: CPU already cleared IF → lock → pick next → TSS/CR3 → unlock → switch_context
14//!
15//! Each task has its own 16KB kernel stack. Callee-saved registers are
16//! pushed/popped by `switch_context()`. `CpuContext` only stores `saved_rsp`.
17//!
18//! TODO(v3 scheduler):
19//! - API stabilization before adding more features:
20//!   - freeze scheduler command syntax.
21//!   - add a small machine-friendly output format (key=value) for scripts/debug.
22//! - observability v2:
23//!   - per-class latency/wait histograms.
24//!   - one structured dump format (instead of free-form text logs) for top/debug.
25//! - targeted scheduler tests (high priority):
26//!   - config validation/reject paths (class/policy map).
27//!   - ready-task migration on class-table updates.
28//!   - SMP steal/preempt non-regression.
29//! - only then: CPU affinity (first truly useful advanced scheduler feature).
30//!
31//! Legacy backlog:
32//! - class registry v2:
33//!   - dynamic add/remove/reorder with validation and safe reject path.
34//!   - policy->class mapping as runtime registry (not only static enum mapping).
35//! - atomic class-table migration:
36//!   - RCU/STW swap + migration of queued tasks across classes.
37//!   - preserve per-task accounting (vruntime, rt budget, wake deadlines).
38//! - balancing v2:
39//!   - dedicated balancer module, per-class steal policy, CPU affinity masks.
40//!   - NUMA-aware placement (future) and stronger anti-thrashing controls.
41//! - SMP hardening:
42//!   - explicit lock hierarchy doc + assertions.
43//!   - improved resched IPI batching/coalescing policy tuning.
44//! - observability v2:
45//!   - latency/wait-time histograms per class + structured trace dump.
46//!   - shell/top integration over stable snapshot API.
47//! - tests:
48//!   - deterministic migration/policy-remap/SMP-steal suites.
49//!   - fairness/starvation long-run regression in test ISO.
50//!
51//! Optimization roadmap (stability-first, incremental):
52//! 1) Lock contention reduction (highest ROI, low risk)
53//!    - keep scheduler critical sections minimal: compute decisions under lock,
54//!      execute expensive side effects (IPI, signal delivery, cleanup) after unlock.
55//!    - split hot paths into tiny helpers with explicit "lock held / lock free" contract.
56//!    - add/track contention counters in every try_lock fallback path.
57//! 2) Wakeup path scalability (only after strong guards)
58//!    - re-introduce deadline index behind a runtime feature flag (default OFF).
59//!    - enforce single writer API for wake deadlines (no direct field stores in syscalls).
60//!    - add strict invariants:
61//!      - if task has deadline != 0, index contains task exactly once.
62//!      - on wake/kill/exit/resume, deadline is removed from index and field cleared.
63//!    - keep safe fallback scan path available and switchable at runtime.
64//! 3) Scheduler observability for regressions
65//!    - keep stable key=value output for scripts (`scheduler metrics kv`, `scheduler dump kv`).
66//!    - expose blocked-task ids and per-cpu preempt causes to diagnose stalls quickly.
67//!    - include boot-phase and lock-miss counters in all dump modes.
68//! 4) Balancing/pick optimizations
69//!    - tune steal hysteresis/cooldown with metrics, avoid ping-pong migration.
70//!    - avoid counting idle task as runnable load for CPU selection.
71//!    - add bounded per-tick work budgets to prevent long interrupt latency tails.
72//! 5) Safety rails before each optimization lands
73//!    - ship each optimization in one isolated patchset with rollback switch.
74//!    - validate with targeted scenarios:
75//!      - boot + shell responsiveness,
76//!      - timeout-heavy workload (poll/futex/nanosleep),
77//!      - SMP preempt/steal stress.
78//!    - if any regression appears, disable feature first, debug second.
79
80use super::task::{Pid, Task, TaskId, TaskPriority, TaskState, Tid};
81use crate::{
82    arch::x86_64::{apic, percpu, restore_flags, save_flags_and_cli, timer, timer::NS_PER_TICK},
83    sync::SpinLock,
84};
85use alloc::{collections::BTreeMap, sync::Arc, vec::Vec};
86use core::sync::atomic::{AtomicBool, AtomicU64, Ordering};
87
88/// Per-CPU scheduler tick counters used for CPU usage estimation.
89///
90/// - `CPU_TOTAL_TICKS[cpu]`: all timer ticks observed on `cpu`.
91/// - `CPU_IDLE_TICKS[cpu]`: ticks where the idle task was running on `cpu`.
92///
93/// CPU usage over a time window:
94/// `usage = 1 - (delta_idle / delta_total)`.
95static CPU_TOTAL_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
96    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
97static CPU_IDLE_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
98    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
99static CPU_RT_RUNTIME_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
100    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
101static CPU_FAIR_RUNTIME_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
102    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
103static CPU_SWITCH_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
104    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
105static CPU_PREEMPT_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
106    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
107static CPU_STEAL_IN_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
108    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
109static CPU_STEAL_OUT_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
110    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
111static CPU_TRY_LOCK_FAIL_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
112    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
113static RESCHED_IPI_PENDING: [AtomicBool; crate::arch::x86_64::percpu::MAX_CPUS] =
114    [const { AtomicBool::new(false) }; crate::arch::x86_64::percpu::MAX_CPUS];
115static LAST_STEAL_TICK: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
116    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
117
118const STEAL_IMBALANCE_MIN: usize = 2;
119const STEAL_COOLDOWN_TICKS: u64 = 2;
120
121/// Performs the active cpu count operation.
122#[inline]
123fn active_cpu_count() -> usize {
124    percpu::cpu_count()
125        .max(1)
126        .min(crate::arch::x86_64::percpu::MAX_CPUS)
127}
128
129/// Performs the cpu is valid operation.
130#[inline]
131fn cpu_is_valid(cpu: usize) -> bool {
132    cpu < crate::arch::x86_64::percpu::MAX_CPUS
133}
134
135#[derive(Clone, Copy)]
136pub struct CpuUsageSnapshot {
137    pub cpu_count: usize,
138    pub total_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
139    pub idle_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
140}
141
142#[derive(Clone, Copy)]
143pub struct SchedulerMetricsSnapshot {
144    pub cpu_count: usize,
145    pub rt_runtime_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
146    pub fair_runtime_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
147    pub idle_runtime_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
148    pub switch_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
149    pub preempt_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
150    pub steal_in_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
151    pub steal_out_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
152    pub try_lock_fail_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
153}
154
155#[derive(Clone, Copy)]
156pub struct SchedulerStateSnapshot {
157    pub initialized: bool,
158    pub boot_phase: u8,
159    pub cpu_count: usize,
160    pub pick_order: [crate::process::sched::SchedClassId; 3],
161    pub steal_order: [crate::process::sched::SchedClassId; 2],
162    pub blocked_tasks: usize,
163    pub current_task: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
164    pub rq_rt: [usize; crate::arch::x86_64::percpu::MAX_CPUS],
165    pub rq_fair: [usize; crate::arch::x86_64::percpu::MAX_CPUS],
166    pub rq_idle: [usize; crate::arch::x86_64::percpu::MAX_CPUS],
167    pub need_resched: [bool; crate::arch::x86_64::percpu::MAX_CPUS],
168}
169
170/// Performs the cpu usage snapshot operation.
171pub fn cpu_usage_snapshot() -> CpuUsageSnapshot {
172    let cpu_count = active_cpu_count();
173    let mut total_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
174    let mut idle_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
175    for i in 0..cpu_count {
176        total_ticks[i] = CPU_TOTAL_TICKS[i].load(Ordering::Relaxed);
177        idle_ticks[i] = CPU_IDLE_TICKS[i].load(Ordering::Relaxed);
178    }
179    CpuUsageSnapshot {
180        cpu_count,
181        total_ticks,
182        idle_ticks,
183    }
184}
185
186/// Performs the scheduler metrics snapshot operation.
187pub fn scheduler_metrics_snapshot() -> SchedulerMetricsSnapshot {
188    let cpu_count = active_cpu_count();
189    let mut rt_runtime_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
190    let mut fair_runtime_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
191    let mut idle_runtime_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
192    let mut switch_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
193    let mut preempt_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
194    let mut steal_in_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
195    let mut steal_out_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
196    let mut try_lock_fail_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
197    for i in 0..cpu_count {
198        rt_runtime_ticks[i] = CPU_RT_RUNTIME_TICKS[i].load(Ordering::Relaxed);
199        fair_runtime_ticks[i] = CPU_FAIR_RUNTIME_TICKS[i].load(Ordering::Relaxed);
200        idle_runtime_ticks[i] = CPU_IDLE_TICKS[i].load(Ordering::Relaxed);
201        switch_count[i] = CPU_SWITCH_COUNT[i].load(Ordering::Relaxed);
202        preempt_count[i] = CPU_PREEMPT_COUNT[i].load(Ordering::Relaxed);
203        steal_in_count[i] = CPU_STEAL_IN_COUNT[i].load(Ordering::Relaxed);
204        steal_out_count[i] = CPU_STEAL_OUT_COUNT[i].load(Ordering::Relaxed);
205        try_lock_fail_count[i] = CPU_TRY_LOCK_FAIL_COUNT[i].load(Ordering::Relaxed);
206    }
207    SchedulerMetricsSnapshot {
208        cpu_count,
209        rt_runtime_ticks,
210        fair_runtime_ticks,
211        idle_runtime_ticks,
212        switch_count,
213        preempt_count,
214        steal_in_count,
215        steal_out_count,
216        try_lock_fail_count,
217    }
218}
219
220/// Performs the reset scheduler metrics operation.
221pub fn reset_scheduler_metrics() {
222    let cpu_count = active_cpu_count();
223    for i in 0..cpu_count {
224        CPU_RT_RUNTIME_TICKS[i].store(0, Ordering::Relaxed);
225        CPU_FAIR_RUNTIME_TICKS[i].store(0, Ordering::Relaxed);
226        CPU_IDLE_TICKS[i].store(0, Ordering::Relaxed);
227        CPU_SWITCH_COUNT[i].store(0, Ordering::Relaxed);
228        CPU_PREEMPT_COUNT[i].store(0, Ordering::Relaxed);
229        CPU_STEAL_IN_COUNT[i].store(0, Ordering::Relaxed);
230        CPU_STEAL_OUT_COUNT[i].store(0, Ordering::Relaxed);
231        CPU_TRY_LOCK_FAIL_COUNT[i].store(0, Ordering::Relaxed);
232    }
233}
234
235/// Performs the note try lock fail on cpu operation.
236#[inline]
237pub(crate) fn note_try_lock_fail_on_cpu(cpu: usize) {
238    if cpu_is_valid(cpu) {
239        CPU_TRY_LOCK_FAIL_COUNT[cpu].fetch_add(1, Ordering::Relaxed);
240    }
241}
242
243/// Performs the note try lock fail operation.
244#[inline]
245pub fn note_try_lock_fail() {
246    note_try_lock_fail_on_cpu(current_cpu_index());
247}
248
249// ─── Cross-CPU IPI helpers ────────────────────────────────────────────────────
250
251/// Send a reschedule IPI to `cpu_index`.
252/// No-op if APIC is not initialized, or if `cpu_index` is the current CPU
253/// (the caller already handles the local-CPU case via `yield_cpu`).
254fn send_resched_ipi_to_cpu(cpu_index: usize) {
255    if !cpu_is_valid(cpu_index) {
256        return;
257    }
258    if !apic::is_initialized() {
259        return;
260    }
261    let my_cpu = current_cpu_index();
262    if let Some(target_apic) = percpu::apic_id_by_cpu_index(cpu_index) {
263        if let Some(my_apic) = percpu::apic_id_by_cpu_index(my_cpu) {
264            if target_apic != my_apic {
265                if RESCHED_IPI_PENDING[cpu_index].swap(true, Ordering::AcqRel) {
266                    return;
267                }
268                apic::send_resched_ipi(target_apic);
269            }
270        }
271    }
272}
273
274/// The global scheduler instance
275pub(crate) static SCHEDULER: SpinLock<Option<Scheduler>> = SpinLock::new(None);
276
277/// Returns the scheduler lock address for deadlock tracing.
278pub fn debug_scheduler_lock_addr() -> usize {
279    &SCHEDULER as *const _ as usize
280}
281
282/// Global tick counter (safe to increment from interrupt context)
283static TICK_COUNT: AtomicU64 = AtomicU64::new(0);
284/// Verbose scheduler trace switch.
285static SCHED_VERBOSE: AtomicBool = AtomicBool::new(false);
286
287/// Performs the sched trace operation.
288#[inline]
289fn sched_trace(args: core::fmt::Arguments<'_>) {
290    if SCHED_VERBOSE.load(Ordering::Relaxed) {
291        log::debug!("[sched] {}", args);
292    }
293}
294
295/// Information needed to perform a context switch after releasing the lock.
296pub(super) struct SwitchTarget {
297    pub(super) old_rsp_ptr: *mut u64,
298    pub(super) new_rsp_ptr: *const u64,
299    pub(super) old_fpu_ptr: *mut u8,
300    pub(super) new_fpu_ptr: *const u8,
301    pub(super) old_xcr0: u64,
302    pub(super) new_xcr0: u64,
303}
304
305// SAFETY: The pointers in SwitchTarget point into Arc<Task> objects
306// that are kept alive by the scheduler. The scheduler lock ensures
307// exclusive access when computing these pointers.
308unsafe impl Send for SwitchTarget {}
309
310/// Result of a non-blocking wait on child exit.
311pub enum WaitChildResult {
312    Reaped {
313        child: TaskId,
314        pid: Pid,
315        status: i32,
316    },
317    NoChildren,
318    StillRunning,
319}
320
321/// Performs the current cpu index operation.
322fn current_cpu_index() -> usize {
323    crate::arch::x86_64::percpu::current_cpu_index()
324}
325
326struct PerCpuClassRqSet {
327    real_time: crate::process::sched::real_time::RealTimeClassRq,
328    fair: crate::process::sched::fair::FairClassRq,
329    idle: crate::process::sched::idle::IdleClassRq,
330}
331
332impl PerCpuClassRqSet {
333    /// Creates a new instance.
334    fn new() -> Self {
335        Self {
336            real_time: crate::process::sched::real_time::RealTimeClassRq::new(),
337            fair: crate::process::sched::fair::FairClassRq::new(),
338            idle: crate::process::sched::idle::IdleClassRq::new(),
339        }
340    }
341
342    /// Performs the enqueue operation.
343    fn enqueue(&mut self, class: crate::process::sched::SchedClassId, task: Arc<Task>) {
344        use crate::process::sched::SchedClassRq;
345        match class {
346            crate::process::sched::SchedClassId::Fair => self.fair.enqueue(task),
347            crate::process::sched::SchedClassId::RealTime => self.real_time.enqueue(task),
348            crate::process::sched::SchedClassId::Idle => self.idle.enqueue(task),
349        }
350    }
351
352    /// Performs the len by class operation.
353    fn len_by_class(&self, class: crate::process::sched::SchedClassId) -> usize {
354        use crate::process::sched::SchedClassRq;
355        match class {
356            crate::process::sched::SchedClassId::Fair => self.fair.len(),
357            crate::process::sched::SchedClassId::RealTime => self.real_time.len(),
358            crate::process::sched::SchedClassId::Idle => self.idle.len(),
359        }
360    }
361
362    /// Performs the runnable len operation.
363    fn runnable_len(&self) -> usize {
364        self.len_by_class(crate::process::sched::SchedClassId::RealTime)
365            + self.len_by_class(crate::process::sched::SchedClassId::Fair)
366    }
367
368    /// Performs the pick next by class operation.
369    fn pick_next_by_class(
370        &mut self,
371        class: crate::process::sched::SchedClassId,
372    ) -> Option<Arc<Task>> {
373        use crate::process::sched::SchedClassRq;
374        match class {
375            crate::process::sched::SchedClassId::Fair => self.fair.pick_next(),
376            crate::process::sched::SchedClassId::RealTime => self.real_time.pick_next(),
377            crate::process::sched::SchedClassId::Idle => self.idle.pick_next(),
378        }
379    }
380
381    /// Performs the pick next operation.
382    fn pick_next(&mut self, table: &crate::process::sched::SchedClassTable) -> Option<Arc<Task>> {
383        for class in table.pick_order().iter().copied() {
384            if let Some(task) = self.pick_next_by_class(class) {
385                return Some(task);
386            }
387        }
388        None
389    }
390
391    /// Updates current.
392    fn update_current(
393        &mut self,
394        rt: &crate::process::sched::CurrentRuntime,
395        task: &Task,
396        is_yield: bool,
397        table: &crate::process::sched::SchedClassTable,
398    ) -> bool {
399        use crate::process::sched::SchedClassRq;
400        let should_preempt = match table.class_for_task(task) {
401            crate::process::sched::SchedClassId::Fair => {
402                self.fair.update_current(rt, task, is_yield)
403            }
404            crate::process::sched::SchedClassId::RealTime => {
405                self.real_time.update_current(rt, task, is_yield)
406            }
407            crate::process::sched::SchedClassId::Idle => {
408                self.idle.update_current(rt, task, is_yield)
409            }
410        };
411        // Always preempt idle task if there are other tasks ready
412        let any_ready = !self.real_time.is_empty() || !self.fair.is_empty();
413        should_preempt
414            || (table.class_for_task(task) == crate::process::sched::SchedClassId::Idle
415                && any_ready)
416    }
417
418    /// Performs the remove operation.
419    fn remove(&mut self, task_id: crate::process::TaskId) -> bool {
420        use crate::process::sched::SchedClassRq;
421        self.real_time.remove(task_id) || self.fair.remove(task_id) || self.idle.remove(task_id)
422    }
423
424    /// Performs the steal candidate operation.
425    fn steal_candidate(
426        &mut self,
427        table: &crate::process::sched::SchedClassTable,
428    ) -> Option<Arc<Task>> {
429        for class in table.steal_order().iter().copied() {
430            if let Some(task) = self.pick_next_by_class(class) {
431                return Some(task);
432            }
433        }
434        None
435    }
436}
437
438/// Per-CPU scheduler state
439struct SchedulerCpu {
440    /// Multi-class priority queues
441    class_rqs: PerCpuClassRqSet,
442    /// Currently running task
443    current_task: Option<Arc<Task>>,
444    /// Current runtime accounting
445    current_runtime: crate::process::sched::CurrentRuntime,
446    /// Idle task to run when no other tasks are ready
447    idle_task: Arc<Task>,
448    /// Task that was just preempted and needs to be re-queued
449    task_to_requeue: Option<Arc<Task>>,
450    /// Task that is dying or blocked, to drop outside the scheduler lock
451    task_to_drop: Option<Arc<Task>>,
452    /// Flag indicating if the current task's time slice has expired
453    need_resched: bool,
454}
455
456/// The round-robin scheduler (per-CPU queues)
457pub struct Scheduler {
458    /// Per-CPU scheduler state
459    cpus: alloc::vec::Vec<SchedulerCpu>,
460    /// Tasks blocked waiting for an event (keyed by TaskId for O(log n) wake)
461    blocked_tasks: BTreeMap<TaskId, Arc<Task>>,
462    /// All tasks in the system (for lookup by TaskId)
463    pub(crate) all_tasks: BTreeMap<TaskId, Arc<Task>>,
464    /// Map TaskId -> CPU index (for wake/resume routing)
465    task_cpu: BTreeMap<TaskId, usize>,
466    /// Map userspace PID -> internal TaskId (process leader in current model).
467    pid_to_task: BTreeMap<Pid, TaskId>,
468    /// Map userspace TID -> internal TaskId (fast thread lookup).
469    tid_to_task: BTreeMap<Tid, TaskId>,
470    /// Map PID -> process group id.
471    pid_to_pgid: BTreeMap<Pid, Pid>,
472    /// Map PID -> session id.
473    pid_to_sid: BTreeMap<Pid, Pid>,
474    /// Group membership index: pgid -> task ids.
475    pgid_members: BTreeMap<Pid, alloc::vec::Vec<TaskId>>,
476    /// Session membership index: sid -> task ids.
477    sid_members: BTreeMap<Pid, alloc::vec::Vec<TaskId>>,
478    /// Deadline -> task ids map for sleeping tasks (ordered wakeups).
479    #[allow(dead_code)]
480    wake_deadlines: BTreeMap<u64, alloc::vec::Vec<TaskId>>,
481    /// Task -> deadline reverse index.
482    #[allow(dead_code)]
483    wake_deadline_of: BTreeMap<TaskId, u64>,
484    /// Parent relationship: child -> parent
485    parent_of: BTreeMap<TaskId, TaskId>,
486    /// Children list: parent -> children
487    children_of: BTreeMap<TaskId, alloc::vec::Vec<TaskId>>,
488    /// Zombie exit statuses: child -> (exit_code, pid)
489    zombies: BTreeMap<TaskId, (i32, Pid)>,
490    /// Scheduler class table (pick order, steal order, class metadata)
491    class_table: crate::process::sched::SchedClassTable,
492}
493
494/// Performs the validate task context operation.
495fn validate_task_context(task: &Arc<Task>) -> Result<(), &'static str> {
496    let ptr = Arc::as_ptr(task);
497    if ptr.is_null() {
498        return Err("null Task pointer in Arc");
499    }
500    let saved_rsp = unsafe { (*task.context.get()).saved_rsp };
501    let stack_base = task.kernel_stack.virt_base.as_u64();
502    let stack_top = stack_base.saturating_add(task.kernel_stack.size as u64);
503
504    if saved_rsp < stack_base || saved_rsp.saturating_add(56) > stack_top {
505        return Err("saved_rsp outside kernel stack bounds");
506    }
507
508    // For our switch frame layout, return IP is at [saved_rsp + 48].
509    // Use read_unaligned: saved_rsp is only guaranteed to be within the stack
510    // bounds, not necessarily aligned to 8 bytes at this offset.
511    let ret_ip = unsafe { core::ptr::read_unaligned((saved_rsp + 48) as *const u64) };
512    if ret_ip == 0 {
513        return Err("null return IP in switch frame");
514    }
515
516    Ok(())
517}
518
519mod core_impl;
520mod runtime_ops;
521mod task_ops;
522mod timer_ops;
523
524pub use runtime_ops::*;
525pub use task_ops::*;
526pub use timer_ops::*;