1use super::task::{Pid, Task, TaskId, TaskPriority, TaskState, Tid};
81use crate::{
82 arch::x86_64::{apic, percpu, restore_flags, save_flags_and_cli, timer, timer::NS_PER_TICK},
83 sync::SpinLock,
84};
85use alloc::{collections::BTreeMap, sync::Arc, vec::Vec};
86use core::sync::atomic::{AtomicBool, AtomicU64, Ordering};
87
88static CPU_TOTAL_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
96 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
97static CPU_IDLE_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
98 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
99static CPU_RT_RUNTIME_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
100 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
101static CPU_FAIR_RUNTIME_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
102 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
103static CPU_SWITCH_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
104 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
105static CPU_PREEMPT_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
106 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
107static CPU_STEAL_IN_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
108 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
109static CPU_STEAL_OUT_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
110 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
111static CPU_TRY_LOCK_FAIL_COUNT: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
112 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
113static RESCHED_IPI_PENDING: [AtomicBool; crate::arch::x86_64::percpu::MAX_CPUS] =
114 [const { AtomicBool::new(false) }; crate::arch::x86_64::percpu::MAX_CPUS];
115static LAST_STEAL_TICK: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
116 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
117
118const STEAL_IMBALANCE_MIN: usize = 2;
119const STEAL_COOLDOWN_TICKS: u64 = 2;
120
121#[inline]
123fn active_cpu_count() -> usize {
124 percpu::cpu_count()
125 .max(1)
126 .min(crate::arch::x86_64::percpu::MAX_CPUS)
127}
128
129#[inline]
131fn cpu_is_valid(cpu: usize) -> bool {
132 cpu < crate::arch::x86_64::percpu::MAX_CPUS
133}
134
135#[derive(Clone, Copy)]
136pub struct CpuUsageSnapshot {
137 pub cpu_count: usize,
138 pub total_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
139 pub idle_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
140}
141
142#[derive(Clone, Copy)]
143pub struct SchedulerMetricsSnapshot {
144 pub cpu_count: usize,
145 pub rt_runtime_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
146 pub fair_runtime_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
147 pub idle_runtime_ticks: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
148 pub switch_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
149 pub preempt_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
150 pub steal_in_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
151 pub steal_out_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
152 pub try_lock_fail_count: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
153}
154
155#[derive(Clone, Copy)]
156pub struct SchedulerStateSnapshot {
157 pub initialized: bool,
158 pub boot_phase: u8,
159 pub cpu_count: usize,
160 pub pick_order: [crate::process::sched::SchedClassId; 3],
161 pub steal_order: [crate::process::sched::SchedClassId; 2],
162 pub blocked_tasks: usize,
163 pub current_task: [u64; crate::arch::x86_64::percpu::MAX_CPUS],
164 pub rq_rt: [usize; crate::arch::x86_64::percpu::MAX_CPUS],
165 pub rq_fair: [usize; crate::arch::x86_64::percpu::MAX_CPUS],
166 pub rq_idle: [usize; crate::arch::x86_64::percpu::MAX_CPUS],
167 pub need_resched: [bool; crate::arch::x86_64::percpu::MAX_CPUS],
168}
169
170pub fn cpu_usage_snapshot() -> CpuUsageSnapshot {
172 let cpu_count = active_cpu_count();
173 let mut total_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
174 let mut idle_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
175 for i in 0..cpu_count {
176 total_ticks[i] = CPU_TOTAL_TICKS[i].load(Ordering::Relaxed);
177 idle_ticks[i] = CPU_IDLE_TICKS[i].load(Ordering::Relaxed);
178 }
179 CpuUsageSnapshot {
180 cpu_count,
181 total_ticks,
182 idle_ticks,
183 }
184}
185
186pub fn scheduler_metrics_snapshot() -> SchedulerMetricsSnapshot {
188 let cpu_count = active_cpu_count();
189 let mut rt_runtime_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
190 let mut fair_runtime_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
191 let mut idle_runtime_ticks = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
192 let mut switch_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
193 let mut preempt_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
194 let mut steal_in_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
195 let mut steal_out_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
196 let mut try_lock_fail_count = [0u64; crate::arch::x86_64::percpu::MAX_CPUS];
197 for i in 0..cpu_count {
198 rt_runtime_ticks[i] = CPU_RT_RUNTIME_TICKS[i].load(Ordering::Relaxed);
199 fair_runtime_ticks[i] = CPU_FAIR_RUNTIME_TICKS[i].load(Ordering::Relaxed);
200 idle_runtime_ticks[i] = CPU_IDLE_TICKS[i].load(Ordering::Relaxed);
201 switch_count[i] = CPU_SWITCH_COUNT[i].load(Ordering::Relaxed);
202 preempt_count[i] = CPU_PREEMPT_COUNT[i].load(Ordering::Relaxed);
203 steal_in_count[i] = CPU_STEAL_IN_COUNT[i].load(Ordering::Relaxed);
204 steal_out_count[i] = CPU_STEAL_OUT_COUNT[i].load(Ordering::Relaxed);
205 try_lock_fail_count[i] = CPU_TRY_LOCK_FAIL_COUNT[i].load(Ordering::Relaxed);
206 }
207 SchedulerMetricsSnapshot {
208 cpu_count,
209 rt_runtime_ticks,
210 fair_runtime_ticks,
211 idle_runtime_ticks,
212 switch_count,
213 preempt_count,
214 steal_in_count,
215 steal_out_count,
216 try_lock_fail_count,
217 }
218}
219
220pub fn reset_scheduler_metrics() {
222 let cpu_count = active_cpu_count();
223 for i in 0..cpu_count {
224 CPU_RT_RUNTIME_TICKS[i].store(0, Ordering::Relaxed);
225 CPU_FAIR_RUNTIME_TICKS[i].store(0, Ordering::Relaxed);
226 CPU_IDLE_TICKS[i].store(0, Ordering::Relaxed);
227 CPU_SWITCH_COUNT[i].store(0, Ordering::Relaxed);
228 CPU_PREEMPT_COUNT[i].store(0, Ordering::Relaxed);
229 CPU_STEAL_IN_COUNT[i].store(0, Ordering::Relaxed);
230 CPU_STEAL_OUT_COUNT[i].store(0, Ordering::Relaxed);
231 CPU_TRY_LOCK_FAIL_COUNT[i].store(0, Ordering::Relaxed);
232 }
233}
234
235#[inline]
237pub(crate) fn note_try_lock_fail_on_cpu(cpu: usize) {
238 if cpu_is_valid(cpu) {
239 CPU_TRY_LOCK_FAIL_COUNT[cpu].fetch_add(1, Ordering::Relaxed);
240 }
241}
242
243#[inline]
245pub fn note_try_lock_fail() {
246 note_try_lock_fail_on_cpu(current_cpu_index());
247}
248
249fn send_resched_ipi_to_cpu(cpu_index: usize) {
255 if !cpu_is_valid(cpu_index) {
256 return;
257 }
258 if !apic::is_initialized() {
259 return;
260 }
261 let my_cpu = current_cpu_index();
262 if let Some(target_apic) = percpu::apic_id_by_cpu_index(cpu_index) {
263 if let Some(my_apic) = percpu::apic_id_by_cpu_index(my_cpu) {
264 if target_apic != my_apic {
265 if RESCHED_IPI_PENDING[cpu_index].swap(true, Ordering::AcqRel) {
266 return;
267 }
268 apic::send_resched_ipi(target_apic);
269 }
270 }
271 }
272}
273
274pub(crate) static SCHEDULER: SpinLock<Option<Scheduler>> = SpinLock::new(None);
276
277pub fn debug_scheduler_lock_addr() -> usize {
279 &SCHEDULER as *const _ as usize
280}
281
282static TICK_COUNT: AtomicU64 = AtomicU64::new(0);
284static SCHED_VERBOSE: AtomicBool = AtomicBool::new(false);
286
287#[inline]
289fn sched_trace(args: core::fmt::Arguments<'_>) {
290 if SCHED_VERBOSE.load(Ordering::Relaxed) {
291 log::debug!("[sched] {}", args);
292 }
293}
294
295pub(super) struct SwitchTarget {
297 pub(super) old_rsp_ptr: *mut u64,
298 pub(super) new_rsp_ptr: *const u64,
299 pub(super) old_fpu_ptr: *mut u8,
300 pub(super) new_fpu_ptr: *const u8,
301 pub(super) old_xcr0: u64,
302 pub(super) new_xcr0: u64,
303}
304
305unsafe impl Send for SwitchTarget {}
309
310pub enum WaitChildResult {
312 Reaped {
313 child: TaskId,
314 pid: Pid,
315 status: i32,
316 },
317 NoChildren,
318 StillRunning,
319}
320
321fn current_cpu_index() -> usize {
323 crate::arch::x86_64::percpu::current_cpu_index()
324}
325
326struct PerCpuClassRqSet {
327 real_time: crate::process::sched::real_time::RealTimeClassRq,
328 fair: crate::process::sched::fair::FairClassRq,
329 idle: crate::process::sched::idle::IdleClassRq,
330}
331
332impl PerCpuClassRqSet {
333 fn new() -> Self {
335 Self {
336 real_time: crate::process::sched::real_time::RealTimeClassRq::new(),
337 fair: crate::process::sched::fair::FairClassRq::new(),
338 idle: crate::process::sched::idle::IdleClassRq::new(),
339 }
340 }
341
342 fn enqueue(&mut self, class: crate::process::sched::SchedClassId, task: Arc<Task>) {
344 use crate::process::sched::SchedClassRq;
345 match class {
346 crate::process::sched::SchedClassId::Fair => self.fair.enqueue(task),
347 crate::process::sched::SchedClassId::RealTime => self.real_time.enqueue(task),
348 crate::process::sched::SchedClassId::Idle => self.idle.enqueue(task),
349 }
350 }
351
352 fn len_by_class(&self, class: crate::process::sched::SchedClassId) -> usize {
354 use crate::process::sched::SchedClassRq;
355 match class {
356 crate::process::sched::SchedClassId::Fair => self.fair.len(),
357 crate::process::sched::SchedClassId::RealTime => self.real_time.len(),
358 crate::process::sched::SchedClassId::Idle => self.idle.len(),
359 }
360 }
361
362 fn runnable_len(&self) -> usize {
364 self.len_by_class(crate::process::sched::SchedClassId::RealTime)
365 + self.len_by_class(crate::process::sched::SchedClassId::Fair)
366 }
367
368 fn pick_next_by_class(
370 &mut self,
371 class: crate::process::sched::SchedClassId,
372 ) -> Option<Arc<Task>> {
373 use crate::process::sched::SchedClassRq;
374 match class {
375 crate::process::sched::SchedClassId::Fair => self.fair.pick_next(),
376 crate::process::sched::SchedClassId::RealTime => self.real_time.pick_next(),
377 crate::process::sched::SchedClassId::Idle => self.idle.pick_next(),
378 }
379 }
380
381 fn pick_next(&mut self, table: &crate::process::sched::SchedClassTable) -> Option<Arc<Task>> {
383 for class in table.pick_order().iter().copied() {
384 if let Some(task) = self.pick_next_by_class(class) {
385 return Some(task);
386 }
387 }
388 None
389 }
390
391 fn update_current(
393 &mut self,
394 rt: &crate::process::sched::CurrentRuntime,
395 task: &Task,
396 is_yield: bool,
397 table: &crate::process::sched::SchedClassTable,
398 ) -> bool {
399 use crate::process::sched::SchedClassRq;
400 let should_preempt = match table.class_for_task(task) {
401 crate::process::sched::SchedClassId::Fair => {
402 self.fair.update_current(rt, task, is_yield)
403 }
404 crate::process::sched::SchedClassId::RealTime => {
405 self.real_time.update_current(rt, task, is_yield)
406 }
407 crate::process::sched::SchedClassId::Idle => {
408 self.idle.update_current(rt, task, is_yield)
409 }
410 };
411 let any_ready = !self.real_time.is_empty() || !self.fair.is_empty();
413 should_preempt
414 || (table.class_for_task(task) == crate::process::sched::SchedClassId::Idle
415 && any_ready)
416 }
417
418 fn remove(&mut self, task_id: crate::process::TaskId) -> bool {
420 use crate::process::sched::SchedClassRq;
421 self.real_time.remove(task_id) || self.fair.remove(task_id) || self.idle.remove(task_id)
422 }
423
424 fn steal_candidate(
426 &mut self,
427 table: &crate::process::sched::SchedClassTable,
428 ) -> Option<Arc<Task>> {
429 for class in table.steal_order().iter().copied() {
430 if let Some(task) = self.pick_next_by_class(class) {
431 return Some(task);
432 }
433 }
434 None
435 }
436}
437
438struct SchedulerCpu {
440 class_rqs: PerCpuClassRqSet,
442 current_task: Option<Arc<Task>>,
444 current_runtime: crate::process::sched::CurrentRuntime,
446 idle_task: Arc<Task>,
448 task_to_requeue: Option<Arc<Task>>,
450 task_to_drop: Option<Arc<Task>>,
452 need_resched: bool,
454}
455
456pub struct Scheduler {
458 cpus: alloc::vec::Vec<SchedulerCpu>,
460 blocked_tasks: BTreeMap<TaskId, Arc<Task>>,
462 pub(crate) all_tasks: BTreeMap<TaskId, Arc<Task>>,
464 task_cpu: BTreeMap<TaskId, usize>,
466 pid_to_task: BTreeMap<Pid, TaskId>,
468 tid_to_task: BTreeMap<Tid, TaskId>,
470 pid_to_pgid: BTreeMap<Pid, Pid>,
472 pid_to_sid: BTreeMap<Pid, Pid>,
474 pgid_members: BTreeMap<Pid, alloc::vec::Vec<TaskId>>,
476 sid_members: BTreeMap<Pid, alloc::vec::Vec<TaskId>>,
478 #[allow(dead_code)]
480 wake_deadlines: BTreeMap<u64, alloc::vec::Vec<TaskId>>,
481 #[allow(dead_code)]
483 wake_deadline_of: BTreeMap<TaskId, u64>,
484 parent_of: BTreeMap<TaskId, TaskId>,
486 children_of: BTreeMap<TaskId, alloc::vec::Vec<TaskId>>,
488 zombies: BTreeMap<TaskId, (i32, Pid)>,
490 class_table: crate::process::sched::SchedClassTable,
492}
493
494fn validate_task_context(task: &Arc<Task>) -> Result<(), &'static str> {
496 let ptr = Arc::as_ptr(task);
497 if ptr.is_null() {
498 return Err("null Task pointer in Arc");
499 }
500 let saved_rsp = unsafe { (*task.context.get()).saved_rsp };
501 let stack_base = task.kernel_stack.virt_base.as_u64();
502 let stack_top = stack_base.saturating_add(task.kernel_stack.size as u64);
503
504 if saved_rsp < stack_base || saved_rsp.saturating_add(56) > stack_top {
505 return Err("saved_rsp outside kernel stack bounds");
506 }
507
508 let ret_ip = unsafe { core::ptr::read_unaligned((saved_rsp + 48) as *const u64) };
512 if ret_ip == 0 {
513 return Err("null return IP in switch frame");
514 }
515
516 Ok(())
517}
518
519mod core_impl;
520mod runtime_ops;
521mod task_ops;
522mod timer_ops;
523
524pub use runtime_ops::*;
525pub use task_ops::*;
526pub use timer_ops::*;