strat9_kernel/process/scheduler/timer_ops.rs
1use super::*;
2use core::sync::atomic::{AtomicBool, AtomicU64};
3
4// One-shot bootstrap nudge: ensure each CPU requests at least one preemption
5// after entering Ring 3. This breaks "first task runs forever" scenarios when
6// class accounting has not yet accumulated enough runtime to trigger resched.
7static FIRST_TICK_FORCE_RESCHED: [AtomicBool; crate::arch::x86_64::percpu::MAX_CPUS] =
8 [const { AtomicBool::new(false) }; crate::arch::x86_64::percpu::MAX_CPUS];
9
10// Per-CPU local tick counter (incremented every timer tick, independent of
11// the BSP-only global TICK_COUNT). Used for periodic force-resched below.
12static CPU_LOCAL_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
13 [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
14
15// Force a reschedule at least every N ticks per CPU, regardless of GLOBAL_SCHED_STATE
16// lock availability. This guarantees kernel tasks (shell, idle) get CPU time
17// even if timer_tick consistently loses the scheduler lock race with the other CPU.
18const PERIODIC_RESCHED_TICKS: u64 = 5;
19
20/// Timer interrupt handler - called from interrupt context.
21///
22/// Increments the global tick counter unconditionally on BSP so wall-clock
23/// time never drifts even when the scheduler lock is contended. Secondary
24/// bookkeeping (interval timers, wake deadlines, per-task accounting) is
25/// deferred when the lock is unavailable.
26///
27/// Lock discipline: `tick_all_timers` and `check_wake_deadlines` each acquire
28/// the scheduler lock themselves via `try_lock`. The per-task block below uses
29/// its own `try_lock`. These are separate acquisitions by design - the inner
30/// functions must not be called while the outer lock is held (that would deadlock).
31pub fn timer_tick() {
32 let _perf = super::perf_counters::PerfScope::new(
33 &super::perf_counters::IRQ_TIMER_TSC,
34 &super::perf_counters::IRQ_TIMER_COUNT,
35 );
36 let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
37
38 if cpu_is_valid(cpu_idx) {
39 CPU_TOTAL_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
40 }
41
42 // BSP wall-clock: ALWAYS advance, regardless of any lock state.
43 if cpu_idx == 0 {
44 TICK_COUNT.fetch_add(1, Ordering::Relaxed);
45 }
46
47 // Per-CPU local tick counter (all CPUs, not just BSP).
48 let local_tick = if cpu_is_valid(cpu_idx) {
49 CPU_LOCAL_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed) + 1
50 } else {
51 0
52 };
53
54 // Lock-free bootstrap nudge: first local timer tick requests one resched
55 // without touching GLOBAL_SCHED_STATE. This avoids pathological boot windows where
56 // another CPU holds GLOBAL_SCHED_STATE and this CPU would otherwise defer the first
57 // `need_resched` update indefinitely.
58 if cpu_is_valid(cpu_idx) && !FIRST_TICK_FORCE_RESCHED[cpu_idx].swap(true, Ordering::AcqRel) {
59 request_force_resched_hint(cpu_idx);
60 }
61
62 // Periodic force-resched: guarantee every CPU reschedules at least every
63 // PERIODIC_RESCHED_TICKS ticks. This prevents starvation of kernel tasks
64 // when timer_tick fails to acquire the scheduler lock repeatedly, or when
65 // FairClassRq::update_current returns false (e.g., single-task queue).
66 if cpu_is_valid(cpu_idx) && local_tick > 0 && local_tick % PERIODIC_RESCHED_TICKS == 0 {
67 request_force_resched_hint(cpu_idx);
68 }
69
70 // BSP-only secondary bookkeeping: interval timers and sleep wakeups.
71 // NS_PER_TICK = 1_000_000_000 / TIMER_HZ (10_000_000 ns at 100 Hz).
72 // Both helpers acquire the scheduler lock internally via try_lock and
73 // skip silently when contended - no probe needed.
74 if cpu_idx == 0 {
75 let tick = TICK_COUNT.load(Ordering::Relaxed);
76 let current_time_ns = tick * NS_PER_TICK;
77 crate::process::timer::tick_all_timers(current_time_ns);
78 check_wake_deadlines(current_time_ns);
79 }
80
81 // Per-task accounting on this CPU : uses LOCAL lock only (no global GLOBAL_SCHED_STATE).
82 // This ensures timer ticks are never dropped due to another CPU holding GLOBAL_SCHED_STATE.
83 if cpu_is_valid(cpu_idx) {
84 if let Some(mut guard) = LOCAL_SCHEDULERS[cpu_idx].try_lock_no_irqsave() {
85 if let Some(ref mut cpu) = *guard {
86 let should_resched = if let Some(ref current_task) = cpu.current_task {
87 let class = cpu.class_table.class_for_task(current_task);
88 match class {
89 crate::process::sched::SchedClassId::RealTime => {
90 CPU_RT_RUNTIME_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
91 }
92 crate::process::sched::SchedClassId::Fair => {
93 CPU_FAIR_RUNTIME_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
94 }
95 crate::process::sched::SchedClassId::Idle => {
96 CPU_IDLE_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
97 }
98 }
99 current_task.ticks.fetch_add(1, Ordering::Relaxed);
100 cpu.current_runtime.update();
101 cpu.class_rqs.update_current(
102 &cpu.current_runtime,
103 current_task,
104 false,
105 &cpu.class_table,
106 )
107 } else {
108 false
109 };
110 if should_resched {
111 cpu.need_resched = true;
112 }
113 }
114 } else {
115 note_try_lock_fail_on_cpu(cpu_idx);
116 }
117 }
118}
119
120/// Check wake deadlines for all tasks and wake up those whose sleep has expired.
121///
122/// Called from timer_tick() with interrupts disabled.
123/// Uses try_lock() to avoid deadlock if called while scheduler lock is held.
124///
125/// # Lock discipline
126///
127/// The `BLOCKED_TASKS` lock is held **only** during the scan + re-enqueue phase.
128/// The lock is explicitly dropped before sending IPIs (which may acquire
129/// per-CPU data) and before any `Arc<Task>` drop (which reaches
130/// `KernelStack::drop → free_frames → buddy_alloc.lock()`).
131///
132/// To guarantee this, every `Arc<Task>` removed from `blocked_tasks` is
133/// moved into the `deferred_drops` array. Those Arcs are dropped after the
134/// guard goes out of scope, ensuring `free_frames` is never called while the
135/// scheduler lock is held.
136fn check_wake_deadlines(current_time_ns: u64) {
137 let mut ipi_targets = [false; crate::arch::x86_64::percpu::MAX_CPUS];
138 let my_cpu = current_cpu_index();
139
140 // Stack-allocated storage for tasks whose Arc must be dropped outside the
141 // scheduler lock. Sized to the same batch limit used for the scan so that
142 // we never need a heap allocation here.
143 const BATCH: usize = 128;
144 let mut deferred_drops: [Option<Arc<Task>>; BATCH] = [const { None }; BATCH];
145 let mut drop_count = 0usize;
146
147 {
148 // --- begin critical section (BLOCKED_TASKS lock held) ---
149 let mut blocked = match super::BLOCKED_TASKS.try_lock_no_irqsave() {
150 Some(guard) => guard,
151 None => return,
152 };
153
154 let mut to_wake = [TaskId::from_u64(0); BATCH];
155 let mut count = 0usize;
156 for (id, task) in blocked.iter() {
157 let deadline = task.wake_deadline_ns.load(Ordering::Relaxed);
158 if deadline != 0 && current_time_ns >= deadline {
159 if count < BATCH {
160 to_wake[count] = *id;
161 count += 1;
162 } else {
163 break;
164 }
165 }
166 }
167
168 for id in to_wake.iter().copied().take(count) {
169 if let Some(blocked_task) = blocked.remove(&id) {
170 blocked_task.wake_deadline_ns.store(0, Ordering::Relaxed);
171 blocked_task.set_state(TaskState::Ready);
172 let home = blocked_task.home_cpu.load(Ordering::Relaxed);
173 let cpu = if home != usize::MAX { home } else { 0 };
174 let class = {
175 use crate::process::sched::SchedClassId;
176 match blocked_task.sched_policy() {
177 crate::process::sched::SchedPolicy::RealTimeRR { .. }
178 | crate::process::sched::SchedPolicy::RealTimeFifo { .. } => {
179 SchedClassId::RealTime
180 }
181 crate::process::sched::SchedPolicy::Fair(_) => SchedClassId::Fair,
182 crate::process::sched::SchedPolicy::Idle => SchedClassId::Idle,
183 }
184 };
185 if let Some(ref mut local_cpu) = *LOCAL_SCHEDULERS[cpu].lock() {
186 // `enqueue` moves the Arc into the run-queue, so no
187 // drop occurs here; the Arc is alive in class_rqs.
188 local_cpu.class_rqs.enqueue(class, blocked_task);
189 local_cpu.need_resched = true;
190 if cpu != my_cpu && cpu_is_valid(cpu) {
191 ipi_targets[cpu] = true;
192 }
193 } else {
194 // No valid CPU slot: stash for drop outside the lock.
195 // This is the only path where an Arc<Task> can be the
196 // last reference and trigger KernelStack::drop.
197 if drop_count < BATCH {
198 deferred_drops[drop_count] = Some(blocked_task);
199 drop_count += 1;
200 }
201 // If deferred_drops is full the task Arc is dropped here,
202 // still under the lock : but that case means we already
203 // have 128 orphaned tasks with no valid CPU, which is a
204 // bug elsewhere; emit a trace and accept the latency hit.
205 }
206 }
207 }
208 // `blocked` guard drops here : BLOCKED_TASKS lock released BEFORE any
209 // Arc<Task> drop and BEFORE send_resched_ipi_to_cpu.
210 // --- end critical section ---
211 }
212 unsafe { core::arch::asm!("mov al, '2'; out 0xe9, al", out("al") _) };
213
214 // Drop orphaned task Arcs outside the scheduler lock so that
215 // KernelStack::drop → free_frames → buddy_alloc.lock() does not race
216 // with any other GLOBAL_SCHED_STATE lock acquisition on this or another CPU.
217 for slot in deferred_drops[..drop_count].iter_mut() {
218 drop(slot.take());
219 }
220
221 for (cpu, send) in ipi_targets.iter().copied().enumerate() {
222 if send {
223 send_resched_ipi_to_cpu(cpu);
224 }
225 }
226}
227
228/// Get the current tick count
229pub fn ticks() -> u64 {
230 TICK_COUNT.load(Ordering::Relaxed)
231}
232
233/// Get a list of all tasks in the system (for timer checking).
234/// Returns None if scheduler is not initialized or currently locked.
235pub fn get_all_tasks() -> Option<alloc::vec::Vec<Arc<Task>>> {
236 use alloc::vec::Vec;
237 let scheduler = match GLOBAL_SCHED_STATE.try_lock() {
238 Some(guard) => guard,
239 None => {
240 note_try_lock_fail();
241 return None;
242 }
243 };
244 if let Some(ref sched) = *scheduler {
245 let mut tasks = Vec::with_capacity(sched.all_tasks.len());
246 for (_, task) in sched.all_tasks.iter() {
247 tasks.push(task.clone());
248 }
249 Some(tasks)
250 } else {
251 None
252 }
253}