Skip to main content

strat9_kernel/process/scheduler/
timer_ops.rs

1use super::*;
2use core::sync::atomic::{AtomicBool, AtomicU64};
3
4// One-shot bootstrap nudge: ensure each CPU requests at least one preemption
5// after entering Ring 3. This breaks "first task runs forever" scenarios when
6// class accounting has not yet accumulated enough runtime to trigger resched.
7static FIRST_TICK_FORCE_RESCHED: [AtomicBool; crate::arch::x86_64::percpu::MAX_CPUS] =
8    [const { AtomicBool::new(false) }; crate::arch::x86_64::percpu::MAX_CPUS];
9
10// Per-CPU local tick counter (incremented every timer tick, independent of
11// the BSP-only global TICK_COUNT). Used for periodic force-resched below.
12static CPU_LOCAL_TICKS: [AtomicU64; crate::arch::x86_64::percpu::MAX_CPUS] =
13    [const { AtomicU64::new(0) }; crate::arch::x86_64::percpu::MAX_CPUS];
14
15// Force a reschedule at least every N ticks per CPU, regardless of GLOBAL_SCHED_STATE
16// lock availability. This guarantees kernel tasks (shell, idle) get CPU time
17// even if timer_tick consistently loses the scheduler lock race with the other CPU.
18const PERIODIC_RESCHED_TICKS: u64 = 5;
19
20/// Timer interrupt handler - called from interrupt context.
21///
22/// Increments the global tick counter unconditionally on BSP so wall-clock
23/// time never drifts even when the scheduler lock is contended. Secondary
24/// bookkeeping (interval timers, wake deadlines, per-task accounting) is
25/// deferred when the lock is unavailable.
26///
27/// Lock discipline: `tick_all_timers` and `check_wake_deadlines` each acquire
28/// the scheduler lock themselves via `try_lock`. The per-task block below uses
29/// its own `try_lock`. These are separate acquisitions by design - the inner
30/// functions must not be called while the outer lock is held (that would deadlock).
31pub fn timer_tick() {
32    let _perf = super::perf_counters::PerfScope::new(
33        &super::perf_counters::IRQ_TIMER_TSC,
34        &super::perf_counters::IRQ_TIMER_COUNT,
35    );
36    let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
37
38    if cpu_is_valid(cpu_idx) {
39        CPU_TOTAL_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
40    }
41
42    // BSP wall-clock: ALWAYS advance, regardless of any lock state.
43    if cpu_idx == 0 {
44        TICK_COUNT.fetch_add(1, Ordering::Relaxed);
45    }
46
47    // Per-CPU local tick counter (all CPUs, not just BSP).
48    let local_tick = if cpu_is_valid(cpu_idx) {
49        CPU_LOCAL_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed) + 1
50    } else {
51        0
52    };
53
54    // Lock-free bootstrap nudge: first local timer tick requests one resched
55    // without touching GLOBAL_SCHED_STATE. This avoids pathological boot windows where
56    // another CPU holds GLOBAL_SCHED_STATE and this CPU would otherwise defer the first
57    // `need_resched` update indefinitely.
58    if cpu_is_valid(cpu_idx) && !FIRST_TICK_FORCE_RESCHED[cpu_idx].swap(true, Ordering::AcqRel) {
59        request_force_resched_hint(cpu_idx);
60    }
61
62    // Periodic force-resched: guarantee every CPU reschedules at least every
63    // PERIODIC_RESCHED_TICKS ticks. This prevents starvation of kernel tasks
64    // when timer_tick fails to acquire the scheduler lock repeatedly, or when
65    // FairClassRq::update_current returns false (e.g., single-task queue).
66    if cpu_is_valid(cpu_idx) && local_tick > 0 && local_tick % PERIODIC_RESCHED_TICKS == 0 {
67        request_force_resched_hint(cpu_idx);
68    }
69
70    // BSP-only secondary bookkeeping: interval timers and sleep wakeups.
71    // NS_PER_TICK = 1_000_000_000 / TIMER_HZ (10_000_000 ns at 100 Hz).
72    // Both helpers acquire the scheduler lock internally via try_lock and
73    // skip silently when contended - no probe needed.
74    if cpu_idx == 0 {
75        let tick = TICK_COUNT.load(Ordering::Relaxed);
76        let current_time_ns = tick * NS_PER_TICK;
77        crate::process::timer::tick_all_timers(current_time_ns);
78        check_wake_deadlines(current_time_ns);
79    }
80
81    // Per-task accounting on this CPU : uses LOCAL lock only (no global GLOBAL_SCHED_STATE).
82    // This ensures timer ticks are never dropped due to another CPU holding GLOBAL_SCHED_STATE.
83    if cpu_is_valid(cpu_idx) {
84        if let Some(mut guard) = LOCAL_SCHEDULERS[cpu_idx].try_lock_no_irqsave() {
85            if let Some(ref mut cpu) = *guard {
86                let should_resched = if let Some(ref current_task) = cpu.current_task {
87                    let class = cpu.class_table.class_for_task(current_task);
88                    match class {
89                        crate::process::sched::SchedClassId::RealTime => {
90                            CPU_RT_RUNTIME_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
91                        }
92                        crate::process::sched::SchedClassId::Fair => {
93                            CPU_FAIR_RUNTIME_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
94                        }
95                        crate::process::sched::SchedClassId::Idle => {
96                            CPU_IDLE_TICKS[cpu_idx].fetch_add(1, Ordering::Relaxed);
97                        }
98                    }
99                    current_task.ticks.fetch_add(1, Ordering::Relaxed);
100                    cpu.current_runtime.update();
101                    cpu.class_rqs.update_current(
102                        &cpu.current_runtime,
103                        current_task,
104                        false,
105                        &cpu.class_table,
106                    )
107                } else {
108                    false
109                };
110                if should_resched {
111                    cpu.need_resched = true;
112                }
113            }
114        } else {
115            note_try_lock_fail_on_cpu(cpu_idx);
116        }
117    }
118}
119
120/// Check wake deadlines for all tasks and wake up those whose sleep has expired.
121///
122/// Called from timer_tick() with interrupts disabled.
123/// Uses try_lock() to avoid deadlock if called while scheduler lock is held.
124///
125/// # Lock discipline
126///
127/// The `BLOCKED_TASKS` lock is held **only** during the scan + re-enqueue phase.
128/// The lock is explicitly dropped before sending IPIs (which may acquire
129/// per-CPU data) and before any `Arc<Task>` drop (which reaches
130/// `KernelStack::drop → free_frames → buddy_alloc.lock()`).
131///
132/// To guarantee this, every `Arc<Task>` removed from `blocked_tasks` is
133/// moved into the `deferred_drops` array. Those Arcs are dropped after the
134/// guard goes out of scope, ensuring `free_frames` is never called while the
135/// scheduler lock is held.
136fn check_wake_deadlines(current_time_ns: u64) {
137    let mut ipi_targets = [false; crate::arch::x86_64::percpu::MAX_CPUS];
138    let my_cpu = current_cpu_index();
139
140    // Stack-allocated storage for tasks whose Arc must be dropped outside the
141    // scheduler lock. Sized to the same batch limit used for the scan so that
142    // we never need a heap allocation here.
143    const BATCH: usize = 128;
144    let mut deferred_drops: [Option<Arc<Task>>; BATCH] = [const { None }; BATCH];
145    let mut drop_count = 0usize;
146
147    {
148        // --- begin critical section (BLOCKED_TASKS lock held) ---
149        let mut blocked = match super::BLOCKED_TASKS.try_lock_no_irqsave() {
150            Some(guard) => guard,
151            None => return,
152        };
153
154        let mut to_wake = [TaskId::from_u64(0); BATCH];
155        let mut count = 0usize;
156        for (id, task) in blocked.iter() {
157            let deadline = task.wake_deadline_ns.load(Ordering::Relaxed);
158            if deadline != 0 && current_time_ns >= deadline {
159                if count < BATCH {
160                    to_wake[count] = *id;
161                    count += 1;
162                } else {
163                    break;
164                }
165            }
166        }
167
168        for id in to_wake.iter().copied().take(count) {
169            if let Some(blocked_task) = blocked.remove(&id) {
170                blocked_task.wake_deadline_ns.store(0, Ordering::Relaxed);
171                blocked_task.set_state(TaskState::Ready);
172                let home = blocked_task.home_cpu.load(Ordering::Relaxed);
173                let cpu = if home != usize::MAX { home } else { 0 };
174                let class = {
175                    use crate::process::sched::SchedClassId;
176                    match blocked_task.sched_policy() {
177                        crate::process::sched::SchedPolicy::RealTimeRR { .. }
178                        | crate::process::sched::SchedPolicy::RealTimeFifo { .. } => {
179                            SchedClassId::RealTime
180                        }
181                        crate::process::sched::SchedPolicy::Fair(_) => SchedClassId::Fair,
182                        crate::process::sched::SchedPolicy::Idle => SchedClassId::Idle,
183                    }
184                };
185                if let Some(ref mut local_cpu) = *LOCAL_SCHEDULERS[cpu].lock() {
186                    // `enqueue` moves the Arc into the run-queue, so no
187                    // drop occurs here; the Arc is alive in class_rqs.
188                    local_cpu.class_rqs.enqueue(class, blocked_task);
189                    local_cpu.need_resched = true;
190                    if cpu != my_cpu && cpu_is_valid(cpu) {
191                        ipi_targets[cpu] = true;
192                    }
193                } else {
194                    // No valid CPU slot: stash for drop outside the lock.
195                    // This is the only path where an Arc<Task> can be the
196                    // last reference and trigger KernelStack::drop.
197                    if drop_count < BATCH {
198                        deferred_drops[drop_count] = Some(blocked_task);
199                        drop_count += 1;
200                    }
201                    // If deferred_drops is full the task Arc is dropped here,
202                    // still under the lock : but that case means we already
203                    // have 128 orphaned tasks with no valid CPU, which is a
204                    // bug elsewhere; emit a trace and accept the latency hit.
205                }
206            }
207        }
208        // `blocked` guard drops here : BLOCKED_TASKS lock released BEFORE any
209        // Arc<Task> drop and BEFORE send_resched_ipi_to_cpu.
210        // --- end critical section ---
211    }
212    unsafe { core::arch::asm!("mov al, '2'; out 0xe9, al", out("al") _) };
213
214    // Drop orphaned task Arcs outside the scheduler lock so that
215    // KernelStack::drop → free_frames → buddy_alloc.lock() does not race
216    // with any other GLOBAL_SCHED_STATE lock acquisition on this or another CPU.
217    for slot in deferred_drops[..drop_count].iter_mut() {
218        drop(slot.take());
219    }
220
221    for (cpu, send) in ipi_targets.iter().copied().enumerate() {
222        if send {
223            send_resched_ipi_to_cpu(cpu);
224        }
225    }
226}
227
228/// Get the current tick count
229pub fn ticks() -> u64 {
230    TICK_COUNT.load(Ordering::Relaxed)
231}
232
233/// Get a list of all tasks in the system (for timer checking).
234/// Returns None if scheduler is not initialized or currently locked.
235pub fn get_all_tasks() -> Option<alloc::vec::Vec<Arc<Task>>> {
236    use alloc::vec::Vec;
237    let scheduler = match GLOBAL_SCHED_STATE.try_lock() {
238        Some(guard) => guard,
239        None => {
240            note_try_lock_fail();
241            return None;
242        }
243    };
244    if let Some(ref sched) = *scheduler {
245        let mut tasks = Vec::with_capacity(sched.all_tasks.len());
246        for (_, task) in sched.all_tasks.iter() {
247            tasks.push(task.clone());
248        }
249        Some(tasks)
250    } else {
251        None
252    }
253}