Skip to main content

strat9_kernel/process/scheduler/
task_ops.rs

1use super::{runtime_ops::finish_switch, *};
2
3static PENDING_SILO_CLEANUPS: SpinLock<Vec<TaskId>> = SpinLock::new(Vec::new());
4
5/// Mark the current task as Dead and yield to the scheduler.
6///
7/// Called by SYS_PROC_EXIT. The task will not be re-queued because
8/// `pick_next_task()` only re-queues tasks in `Running` state.
9/// This function does not return.
10pub fn exit_current_task(exit_code: i32) -> ! {
11    // -- clear_child_tid (POSIX pthread join) --
12    // Must happen BEFORE we drop the address space - write 0 to the TID pointer
13    // and do a futex_wake so any waiting pthread_join() can proceed.
14    if let Some(task) = current_task_clone() {
15        let tidptr = task
16            .clear_child_tid
17            .load(core::sync::atomic::Ordering::Relaxed);
18        if tidptr != 0 {
19            // Safety: tidptr is a user address in the still-active address space.
20            let ptr = tidptr as *mut u32;
21            // Use is_aligned (pointer alignment check, not user-mapped check).
22            if (tidptr & 3) == 0 && tidptr < 0xFFFF_8000_0000_0000 {
23                unsafe { ptr.write_volatile(0) };
24                // Futex wake: wake all threads waiting on this address (e.g. pthread_join).
25                let _ = crate::syscall::futex::sys_futex_wake(tidptr, u32::MAX);
26            }
27        }
28    }
29
30    let cpu_index = current_cpu_index();
31    let mut parent_to_signal: Option<TaskId> = None;
32    let mut ipi_to_cpu: Option<usize> = None;
33    {
34        let saved_flags = save_flags_and_cli();
35        let mut scheduler = SCHEDULER.lock();
36        if let Some(ref mut sched) = *scheduler {
37            if let Some(current) = sched
38                .cpus
39                .get(cpu_index)
40                .and_then(|cpu| cpu.current_task.clone())
41            {
42                let current_id = current.id;
43                let current_pid = current.pid;
44                let parent = sched.parent_of.get(&current_id).copied();
45                let _ = sched.clear_task_wake_deadline_locked(current_id);
46                // SAFETY: We hold the scheduler lock and interrupts are disabled.
47                unsafe {
48                    *current.state.get() = TaskState::Dead;
49                }
50                // Do NOT call cleanup_task_resources or all_tasks.remove() here!
51                // The task is still in current_task[cpu_index], and an interrupt
52                // could access it. Instead, mark it Dead and let pick_next_task
53                // handle the cleanup when it moves the task to task_to_drop.
54                // We only remove task_cpu and identity mappings to prevent
55                // lookups while the task is dying.
56                sched.task_cpu.remove(&current_id);
57                sched.unregister_identity_locked(current_id, current_pid, current.tid);
58                sched.parent_of.remove(&current_id);
59
60                ipi_to_cpu = reparent_children(sched, current_id);
61
62                if parent.is_some() {
63                    sched.zombies.insert(current_id, (exit_code, current_pid));
64                }
65                if let Some(parent_id) = parent {
66                    let (_, ipi_wake) = sched.wake_task_locked(parent_id);
67                    if ipi_to_cpu.is_none() {
68                        ipi_to_cpu = ipi_wake;
69                    }
70                    parent_to_signal = Some(parent_id);
71                }
72            }
73        }
74        drop(scheduler);
75        restore_flags(saved_flags);
76    }
77    if let Some(ci) = ipi_to_cpu {
78        send_resched_ipi_to_cpu(ci);
79    }
80
81    if let Some(parent_id) = parent_to_signal {
82        // Must happen outside scheduler lock to avoid lock recursion.
83        let _ =
84            crate::process::signal::send_signal(parent_id, crate::process::signal::Signal::SIGCHLD);
85    }
86
87    // Yield to pick the next task. Since we're Dead, we won't come back.
88    yield_task();
89
90    // Safety net - should never reach here
91    loop {
92        crate::arch::x86_64::hlt();
93    }
94}
95
96/// Get the current task's ID (if any task is running).
97pub fn current_task_id() -> Option<TaskId> {
98    let saved_flags = save_flags_and_cli();
99    let cpu_index = current_cpu_index();
100    let id = {
101        let scheduler = SCHEDULER.lock();
102        if let Some(ref sched) = *scheduler {
103            sched
104                .cpus
105                .get(cpu_index)
106                .and_then(|cpu| cpu.current_task.as_ref().map(|t| t.id))
107        } else {
108            None
109        }
110    };
111    restore_flags(saved_flags);
112    id
113}
114
115/// Get the current task's ID without blocking (safe for exceptions).
116pub fn current_task_id_try() -> Option<TaskId> {
117    let saved_flags = save_flags_and_cli();
118    let cpu_index = current_cpu_index();
119    let id = if let Some(scheduler) = SCHEDULER.try_lock() {
120        if let Some(ref sched) = *scheduler {
121            sched
122                .cpus
123                .get(cpu_index)
124                .and_then(|cpu| cpu.current_task.as_ref().map(|t| t.id))
125        } else {
126            None
127        }
128    } else {
129        None
130    };
131    restore_flags(saved_flags);
132    id
133}
134
135/// Get the current process ID (POSIX pid).
136pub fn current_pid() -> Option<Pid> {
137    current_task_clone().map(|t| t.pid)
138}
139
140/// Get the current thread ID (POSIX tid).
141pub fn current_tid() -> Option<Tid> {
142    current_task_clone().map(|t| t.tid)
143}
144
145/// Get the current process group id.
146pub fn current_pgid() -> Option<Pid> {
147    current_task_clone().map(|t| t.pgid.load(Ordering::Relaxed))
148}
149
150/// Get the current session id.
151pub fn current_sid() -> Option<Pid> {
152    current_task_clone().map(|t| t.sid.load(Ordering::Relaxed))
153}
154
155/// Get the current task (cloned Arc), if any.
156#[track_caller]
157pub fn current_task_clone() -> Option<Arc<Task>> {
158    let saved_flags = save_flags_and_cli();
159    let cpu_index = current_cpu_index();
160    let task = {
161        let mut scheduler = SCHEDULER.lock();
162        if let Some(ref mut sched) = *scheduler {
163            sched.cpus.get_mut(cpu_index).and_then(|cpu| {
164                let arc = cpu.current_task.as_ref()?;
165                let strong = Arc::strong_count(arc);
166                // Treat insane refcounts as corruption and fall back to idle.
167                if strong == 0 || strong > (isize::MAX as usize) / 2 {
168                    let ptr = Arc::as_ptr(arc) as *const u8;
169                    let caller = core::panic::Location::caller();
170                    crate::serial_println!(
171                        "[sched] CORRUPT Arc refcount! cpu={} strong={:#x} ptr={:p} caller={}:{}",
172                        cpu_index,
173                        strong,
174                        ptr,
175                        caller.file(),
176                        caller.line(),
177                    );
178                    let idle = cpu.idle_task.clone();
179                    cpu.current_task = Some(idle.clone());
180                    Some(idle)
181                } else {
182                    Some(arc.clone())
183                }
184            })
185        } else {
186            None
187        }
188    };
189    restore_flags(saved_flags);
190    task
191}
192
193/// Best-effort, non-blocking variant of [`current_task_clone`].
194///
195/// Returns `None` when the scheduler lock is contended.
196/// Useful in cleanup paths where blocking on `SCHEDULER.lock()` could deadlock.
197#[track_caller]
198pub fn current_task_clone_try() -> Option<Arc<Task>> {
199    let saved_flags = save_flags_and_cli();
200    let cpu_index = current_cpu_index();
201    let task = if let Some(mut scheduler) = SCHEDULER.try_lock() {
202        if let Some(ref mut sched) = *scheduler {
203            sched.cpus.get_mut(cpu_index).and_then(|cpu| {
204                let arc = cpu.current_task.as_ref()?;
205                let strong = Arc::strong_count(arc);
206                // Treat insane refcounts as corruption and fall back to idle.
207                if strong == 0 || strong > (isize::MAX as usize) / 2 {
208                    let ptr = Arc::as_ptr(arc) as *const u8;
209                    let caller = core::panic::Location::caller();
210                    crate::serial_println!(
211                        "[sched] CORRUPT Arc refcount! cpu={} strong={:#x} ptr={:p} caller={}:{}",
212                        cpu_index,
213                        strong,
214                        ptr,
215                        caller.file(),
216                        caller.line(),
217                    );
218                    let idle = cpu.idle_task.clone();
219                    cpu.current_task = Some(idle.clone());
220                    Some(idle)
221                } else {
222                    Some(arc.clone())
223                }
224            })
225        } else {
226            None
227        }
228    } else {
229        None
230    };
231    restore_flags(saved_flags);
232    task
233}
234
235/// Debug-only blocking variant used to diagnose early ring3 entry stalls.
236///
237/// Spins with `try_lock()` so we can emit progress logs instead of blocking
238/// silently on `SCHEDULER.lock()`.
239pub fn current_task_clone_spin_debug(trace_label: &str) -> Option<Arc<Task>> {
240    let cpu_index = current_cpu_index();
241    let mut spins = 0usize;
242    loop {
243        if let Some(mut scheduler) = SCHEDULER.try_lock() {
244            return if let Some(ref mut sched) = *scheduler {
245                sched.cpus.get_mut(cpu_index).and_then(|cpu| {
246                    let arc = cpu.current_task.as_ref()?;
247                    let strong = Arc::strong_count(arc);
248                    if strong == 0 || strong > (isize::MAX as usize) / 2 {
249                        let ptr = Arc::as_ptr(arc) as *const u8;
250                        crate::serial_force_println!(
251                            "[trace][sched] {} corrupt current_task cpu={} strong={:#x} ptr={:p}",
252                            trace_label,
253                            cpu_index,
254                            strong,
255                            ptr,
256                        );
257                        let idle = cpu.idle_task.clone();
258                        cpu.current_task = Some(idle.clone());
259                        Some(idle)
260                    } else {
261                        Some(arc.clone())
262                    }
263                })
264            } else {
265                None
266            };
267        }
268
269        spins = spins.saturating_add(1);
270        if spins == 2_000_000 {
271            crate::serial_force_println!(
272                "[trace][sched] {} waiting current_task cpu={} owner_cpu={}",
273                trace_label,
274                cpu_index,
275                SCHEDULER.owner_cpu()
276            );
277            spins = 0;
278        }
279        core::hint::spin_loop();
280    }
281}
282
283/// Resolve a POSIX pid to internal TaskId.
284pub fn get_task_id_by_pid(pid: Pid) -> Option<TaskId> {
285    let saved_flags = save_flags_and_cli();
286    let out = {
287        let scheduler = SCHEDULER.lock();
288        if let Some(ref sched) = *scheduler {
289            sched.pid_to_task.get(&pid).copied()
290        } else {
291            None
292        }
293    };
294    restore_flags(saved_flags);
295    out
296}
297
298/// Resolve a POSIX pid to the corresponding task.
299pub fn get_task_by_pid(pid: Pid) -> Option<Arc<Task>> {
300    let tid = get_task_id_by_pid(pid)?;
301    get_task_by_id(tid)
302}
303
304/// Resolve a POSIX tid to the corresponding internal task id.
305pub fn get_task_id_by_tid(tid: Tid) -> Option<TaskId> {
306    let saved_flags = save_flags_and_cli();
307    let out = {
308        let scheduler = SCHEDULER.lock();
309        if let Some(ref sched) = *scheduler {
310            sched
311                .tid_to_task
312                .get(&tid)
313                .copied()
314                .or_else(|| sched.pid_to_task.get(&(tid as Pid)).copied())
315        } else {
316            None
317        }
318    };
319    restore_flags(saved_flags);
320    out
321}
322
323/// Resolve a PID to the current process group id.
324pub fn get_pgid_by_pid(pid: Pid) -> Option<Pid> {
325    let saved_flags = save_flags_and_cli();
326    let out = {
327        let scheduler = SCHEDULER.lock();
328        if let Some(ref sched) = *scheduler {
329            sched.pid_to_pgid.get(&pid).copied()
330        } else {
331            None
332        }
333    };
334    restore_flags(saved_flags);
335    out
336}
337
338/// Resolve a PID to the current session id.
339pub fn get_sid_by_pid(pid: Pid) -> Option<Pid> {
340    let saved_flags = save_flags_and_cli();
341    let out = {
342        let scheduler = SCHEDULER.lock();
343        if let Some(ref sched) = *scheduler {
344            sched.pid_to_sid.get(&pid).copied()
345        } else {
346            None
347        }
348    };
349    restore_flags(saved_flags);
350    out
351}
352
353/// Collect task IDs that currently belong to process group `pgid`.
354pub fn get_task_ids_in_pgid(pgid: Pid) -> alloc::vec::Vec<TaskId> {
355    use alloc::vec::Vec;
356    let saved_flags = save_flags_and_cli();
357    let out = {
358        let scheduler = SCHEDULER.lock();
359        if let Some(ref sched) = *scheduler {
360            sched
361                .pgid_members
362                .get(&pgid)
363                .cloned()
364                .unwrap_or_else(Vec::new)
365        } else {
366            Vec::new()
367        }
368    };
369    restore_flags(saved_flags);
370    out
371}
372
373/// Set process group id for `target_pid` (or current if `None`).
374pub fn set_process_group(
375    requester: TaskId,
376    target_pid: Option<Pid>,
377    new_pgid: Option<Pid>,
378) -> Result<Pid, crate::syscall::error::SyscallError> {
379    use crate::syscall::error::SyscallError;
380
381    let saved_flags = save_flags_and_cli();
382    let result = (|| -> Result<Pid, SyscallError> {
383        let mut scheduler = SCHEDULER.lock();
384        let sched = scheduler.as_mut().ok_or(SyscallError::Fault)?;
385
386        let requester_task = sched
387            .all_tasks
388            .get(&requester)
389            .cloned()
390            .ok_or(SyscallError::Fault)?;
391        let requester_sid = requester_task.sid.load(Ordering::Relaxed);
392
393        let target_id = match target_pid {
394            None => requester,
395            Some(pid) => sched
396                .pid_to_task
397                .get(&pid)
398                .copied()
399                .ok_or(SyscallError::NotFound)?,
400        };
401
402        if target_id != requester {
403            let is_child = sched
404                .children_of
405                .get(&requester)
406                .map(|children| children.iter().any(|child| *child == target_id))
407                .unwrap_or(false);
408            if !is_child {
409                return Err(SyscallError::PermissionDenied);
410            }
411        }
412
413        let target_task = sched
414            .all_tasks
415            .get(&target_id)
416            .cloned()
417            .ok_or(SyscallError::NotFound)?;
418        let target_pid_value = target_task.pid;
419        let target_sid = target_task.sid.load(Ordering::Relaxed);
420
421        if target_sid != requester_sid {
422            return Err(SyscallError::PermissionDenied);
423        }
424
425        if target_pid_value == target_sid {
426            return Err(SyscallError::PermissionDenied);
427        }
428
429        let desired_pgid = new_pgid.unwrap_or(target_pid_value);
430        if desired_pgid != target_pid_value {
431            let group_leader_tid = sched
432                .pid_to_task
433                .get(&desired_pgid)
434                .copied()
435                .ok_or(SyscallError::NotFound)?;
436            let group_leader = sched
437                .all_tasks
438                .get(&group_leader_tid)
439                .ok_or(SyscallError::NotFound)?;
440            if group_leader.sid.load(Ordering::Relaxed) != target_sid {
441                return Err(SyscallError::PermissionDenied);
442            }
443        }
444
445        let old_pgid = target_task.pgid.load(Ordering::Relaxed);
446        target_task.pgid.store(desired_pgid, Ordering::Relaxed);
447        if old_pgid != desired_pgid {
448            Scheduler::member_remove(&mut sched.pgid_members, old_pgid, target_id);
449            Scheduler::member_add(&mut sched.pgid_members, desired_pgid, target_id);
450            sched.pid_to_pgid.insert(target_pid_value, desired_pgid);
451        }
452        Ok(desired_pgid)
453    })();
454    restore_flags(saved_flags);
455    result
456}
457
458/// Create a new session for the calling task.
459pub fn create_session(requester: TaskId) -> Result<Pid, crate::syscall::error::SyscallError> {
460    use crate::syscall::error::SyscallError;
461
462    let saved_flags = save_flags_and_cli();
463    let result = (|| -> Result<Pid, SyscallError> {
464        let mut scheduler = SCHEDULER.lock();
465        let sched = scheduler.as_mut().ok_or(SyscallError::Fault)?;
466
467        let requester_task = sched
468            .all_tasks
469            .get(&requester)
470            .cloned()
471            .ok_or(SyscallError::Fault)?;
472        let pid = requester_task.pid;
473        if requester_task.pgid.load(Ordering::Relaxed) == pid {
474            return Err(SyscallError::PermissionDenied);
475        }
476
477        let old_sid = requester_task.sid.load(Ordering::Relaxed);
478        let old_pgid = requester_task.pgid.load(Ordering::Relaxed);
479        requester_task.sid.store(pid, Ordering::Relaxed);
480        requester_task.pgid.store(pid, Ordering::Relaxed);
481        Scheduler::member_remove(&mut sched.sid_members, old_sid, requester);
482        Scheduler::member_remove(&mut sched.pgid_members, old_pgid, requester);
483        Scheduler::member_add(&mut sched.sid_members, pid, requester);
484        Scheduler::member_add(&mut sched.pgid_members, pid, requester);
485        sched.pid_to_sid.insert(pid, pid);
486        sched.pid_to_pgid.insert(pid, pid);
487        Ok(pid)
488    })();
489    restore_flags(saved_flags);
490    result
491}
492
493/// Get a task by its TaskId (if still registered).
494pub fn get_task_by_id(id: TaskId) -> Option<Arc<Task>> {
495    let saved_flags = save_flags_and_cli();
496    let task = {
497        let scheduler = SCHEDULER.lock();
498        if let Some(ref sched) = *scheduler {
499            sched.all_tasks.get(&id).cloned()
500        } else {
501            None
502        }
503    };
504    restore_flags(saved_flags);
505    task
506}
507
508/// Update a task scheduling policy and requeue if needed.
509pub fn set_task_sched_policy(id: TaskId, policy: crate::process::sched::SchedPolicy) -> bool {
510    let saved_flags = save_flags_and_cli();
511    let mut ipi_to_cpu: Option<usize> = None;
512    let updated = {
513        let mut scheduler = SCHEDULER.lock();
514        if let Some(ref mut sched) = *scheduler {
515            let cpu_index = sched.task_cpu.get(&id).copied().unwrap_or(0);
516            let task = match sched.all_tasks.get(&id).cloned() {
517                Some(t) => t,
518                None => return false,
519            };
520            task.set_sched_policy(policy);
521            let class = sched.class_table.class_for_task(&task);
522
523            if let Some(cpu) = sched.cpus.get_mut(cpu_index) {
524                // If task is queued in ready classes, migrate it to the new class.
525                if cpu.class_rqs.remove(id) {
526                    cpu.class_rqs.enqueue(class, task.clone());
527                }
528                cpu.need_resched = true;
529            }
530            if cpu_index != current_cpu_index() {
531                ipi_to_cpu = Some(cpu_index);
532            }
533            sched_trace(format_args!(
534                "set_policy task={} cpu={} policy={:?}",
535                id.as_u64(),
536                cpu_index,
537                policy
538            ));
539            true
540        } else {
541            false
542        }
543    };
544    if let Some(ci) = ipi_to_cpu {
545        send_resched_ipi_to_cpu(ci);
546    }
547    restore_flags(saved_flags);
548    updated
549}
550
551/// Get parent task ID for a child task.
552pub fn get_parent_id(child: TaskId) -> Option<TaskId> {
553    let saved_flags = save_flags_and_cli();
554    let parent = {
555        let scheduler = SCHEDULER.lock();
556        if let Some(ref sched) = *scheduler {
557            sched.parent_of.get(&child).copied()
558        } else {
559            None
560        }
561    };
562    restore_flags(saved_flags);
563    parent
564}
565
566/// Get parent process ID for a child task.
567pub fn get_parent_pid(child: TaskId) -> Option<Pid> {
568    let parent_tid = get_parent_id(child)?;
569    let parent = get_task_by_id(parent_tid)?;
570    Some(parent.pid)
571}
572
573/// Try to reap a zombie child.
574///
575/// `target=None` means "any child".
576pub fn try_wait_child(parent: TaskId, target: Option<TaskId>) -> WaitChildResult {
577    let saved_flags = save_flags_and_cli();
578    let result = {
579        let mut scheduler = SCHEDULER.lock();
580        if let Some(ref mut sched) = *scheduler {
581            sched.try_reap_child_locked(parent, target)
582        } else {
583            WaitChildResult::NoChildren
584        }
585    };
586    restore_flags(saved_flags);
587    result
588}
589
590/// Block the current task and yield to the scheduler.
591///
592/// The current task is moved from Running to Blocked state and placed
593/// in the `blocked_tasks` map. It will not be re-scheduled until
594/// `wake_task(id)` is called.
595///
596/// ## Lost-wakeup prevention
597///
598/// Before actually blocking, this function checks the task's `wake_pending`
599/// flag. If a concurrent `wake_task()` fired between the moment the task
600/// added itself to a `WaitQueue` and this call, the flag will be set and
601/// the function returns immediately without blocking.
602///
603/// Must NOT be called with interrupts disabled or while holding the
604/// scheduler lock (this function acquires both).
605pub fn block_current_task() {
606    let saved_flags = save_flags_and_cli();
607    let cpu_index = current_cpu_index();
608
609    let switch_target = {
610        let mut scheduler = SCHEDULER.lock();
611        if let Some(ref mut sched) = *scheduler {
612            if let Some(cpu) = sched.cpus.get_mut(cpu_index) {
613                if let Some(ref current) = cpu.current_task {
614                    // Check for a pending wakeup that raced with us before we
615                    // entered the scheduler lock. If set, clear it and skip
616                    // blocking - the task carries on as if it was woken normally.
617                    // SAFETY: AtomicBool::swap is safe to call from any context.
618                    if current
619                        .wake_pending
620                        .swap(false, core::sync::atomic::Ordering::AcqRel)
621                    {
622                        // Pending wakeup consumed - do not block.
623                        None
624                    } else {
625                        // SAFETY: We hold the scheduler lock and interrupts are disabled.
626                        unsafe {
627                            *current.state.get() = TaskState::Blocked;
628                        }
629                        // Move it to the blocked map
630                        sched.blocked_tasks.insert(current.id, current.clone());
631                        // Now pick the next task (the blocked task won't be re-queued
632                        // because pick_next_task only re-queues Running tasks)
633                        sched.yield_cpu(cpu_index)
634                    }
635                } else {
636                    sched.yield_cpu(cpu_index)
637                }
638            } else {
639                None
640            }
641        } else {
642            None
643        }
644    }; // Lock released
645
646    if let Some(ref target) = switch_target {
647        unsafe {
648            crate::process::task::do_switch_context(target);
649        }
650        finish_switch();
651    }
652
653    restore_flags(saved_flags);
654}
655
656/// Wake a blocked task by its ID.
657///
658/// Moves the task from `blocked_tasks` to the ready queue and sets its
659/// state to Ready. Returns `true` if the task was found and woken.
660///
661/// ## Lost-wakeup prevention
662///
663/// If the task is not yet in `blocked_tasks` (it is still transitioning
664/// from Ready -> Blocked inside `block_current_task()`), this function sets
665/// the task's `wake_pending` flag so that `block_current_task()` will see
666/// the pending wakeup and return immediately without actually blocking.
667pub fn wake_task(id: TaskId) -> bool {
668    let saved_flags = save_flags_and_cli();
669    let (woken, ipi_cpu) = {
670        let mut scheduler = SCHEDULER.lock();
671        if let Some(ref mut sched) = *scheduler {
672            sched.wake_task_locked(id)
673        } else {
674            (false, None)
675        }
676    };
677    if let Some(ci) = ipi_cpu {
678        send_resched_ipi_to_cpu(ci);
679    }
680    restore_flags(saved_flags);
681    woken
682}
683
684/// Sets task wake deadline.
685pub fn set_task_wake_deadline(id: TaskId, deadline_ns: u64) -> bool {
686    let saved_flags = save_flags_and_cli();
687    let out = {
688        let mut scheduler = SCHEDULER.lock();
689        if let Some(ref mut sched) = *scheduler {
690            sched.set_task_wake_deadline_locked(id, deadline_ns)
691        } else {
692            false
693        }
694    };
695    restore_flags(saved_flags);
696    out
697}
698
699/// Performs the clear task wake deadline operation.
700pub fn clear_task_wake_deadline(id: TaskId) -> bool {
701    set_task_wake_deadline(id, 0)
702}
703
704/// Suspend a task by ID (best-effort).
705///
706/// Moves the task to the blocked map and marks it Blocked.
707/// - If the task is the *current* task on *this* CPU, a context switch is
708///   performed immediately.
709/// - If the task is the *current* task on *another* CPU, an IPI is sent to
710///   trigger preemption on that CPU. The task will not be re-queued at the
711///   next tick because its state is Blocked.
712pub fn suspend_task(id: TaskId) -> bool {
713    let saved_flags = save_flags_and_cli();
714
715    let mut switch_target: Option<SwitchTarget> = None;
716    let mut suspended = false;
717    let mut ipi_to_cpu: Option<usize> = None;
718
719    {
720        let mut scheduler = SCHEDULER.lock();
721        if let Some(ref mut sched) = *scheduler {
722            let my_cpu = current_cpu_index();
723
724            // Check if the task is the current task on any CPU.
725            for (ci, cpu) in sched.cpus.iter_mut().enumerate() {
726                if let Some(ref current) = cpu.current_task {
727                    if current.id == id {
728                        unsafe {
729                            *current.state.get() = TaskState::Blocked;
730                        }
731                        sched.blocked_tasks.insert(current.id, current.clone());
732                        suspended = true;
733                        if ci == my_cpu {
734                            switch_target = sched.yield_cpu(ci);
735                        } else {
736                            // Cross-CPU: IPI will make the remote CPU preempt.
737                            ipi_to_cpu = Some(ci);
738                        }
739                        break;
740                    }
741                }
742            }
743
744            // Remove from ready queues (task was not running anywhere).
745            if !suspended {
746                for cpu in &mut sched.cpus {
747                    if cpu.class_rqs.remove(id) {
748                        if let Some(task) = sched.all_tasks.get(&id) {
749                            unsafe {
750                                *task.state.get() = TaskState::Blocked;
751                            }
752                            sched.blocked_tasks.insert(task.id, task.clone());
753                        }
754                        suspended = true;
755                        break;
756                    }
757                }
758            }
759
760            // Already blocked.
761            if !suspended && sched.blocked_tasks.contains_key(&id) {
762                suspended = true;
763            }
764        }
765    } // scheduler lock released before IPI and context switch
766
767    if let Some(ref target) = switch_target {
768        unsafe {
769            crate::process::task::do_switch_context(target);
770        }
771        finish_switch();
772    }
773
774    if let Some(ci) = ipi_to_cpu {
775        send_resched_ipi_to_cpu(ci);
776    }
777
778    restore_flags(saved_flags);
779    suspended
780}
781
782/// Resume a previously suspended task by ID.
783///
784/// Moves the task from blocked to ready queue and marks it Ready.
785pub fn resume_task(id: TaskId) -> bool {
786    let saved_flags = save_flags_and_cli();
787    let mut ipi_to_cpu: Option<usize> = None;
788    let resumed = {
789        let mut scheduler = SCHEDULER.lock();
790        if let Some(ref mut sched) = *scheduler {
791            if let Some(task) = sched.blocked_tasks.remove(&id) {
792                let _ = sched.clear_task_wake_deadline_locked(id);
793                // SAFETY: scheduler lock held.
794                unsafe {
795                    *task.state.get() = TaskState::Ready;
796                }
797                let cpu_index = sched.task_cpu.get(&id).copied().unwrap_or(0);
798                let class = sched.class_table.class_for_task(&task);
799                if let Some(cpu) = sched.cpus.get_mut(cpu_index) {
800                    cpu.class_rqs.enqueue(class, task);
801                    cpu.need_resched = true;
802                }
803                if cpu_index != current_cpu_index() {
804                    ipi_to_cpu = Some(cpu_index);
805                }
806                true
807            } else {
808                false
809            }
810        } else {
811            false
812        }
813    };
814    if let Some(ci) = ipi_to_cpu {
815        send_resched_ipi_to_cpu(ci);
816    }
817    restore_flags(saved_flags);
818    resumed
819}
820
821/// Kill a task by ID (best-effort).
822///
823/// - Ready / blocked tasks are removed and marked Dead immediately.
824/// - If the task is the *current* task on *this* CPU, a context switch is
825///   performed immediately.
826/// - If the task is the *current* task on *another* CPU, an IPI triggers
827///   preemption on that CPU; the task will not be re-queued because its
828///   state is Dead.
829///
830/// Returns `true` if the task was found and killed.
831pub fn kill_task(id: TaskId) -> bool {
832    let pid = crate::process::get_task_by_id(id)
833        .map(|t| t.pid)
834        .unwrap_or(0);
835    crate::audit::log(
836        crate::audit::AuditCategory::Process,
837        pid,
838        crate::silo::task_silo_id(id).unwrap_or(0),
839        alloc::format!("kill_task tid={}", id.as_u64()),
840    );
841    let saved_flags = save_flags_and_cli();
842
843    let mut switch_target: Option<SwitchTarget> = None;
844    let mut killed = false;
845    let mut ipi_to_cpu: Option<usize> = None;
846    let mut parent_to_signal: Option<TaskId> = None;
847
848    {
849        let mut scheduler = SCHEDULER.lock();
850        if let Some(ref mut sched) = *scheduler {
851            // Keep parent/waitpid semantics even for forced termination paths.
852            // A killed child must still become a zombie until reaped by waitpid().
853            const FORCED_KILL_EXIT_CODE: i32 = 1;
854            let my_cpu = current_cpu_index();
855
856            // Check if the task is the current task on any CPU.
857            let mut running_hit: Option<(usize, Arc<Task>)> = None;
858            for (ci, cpu) in sched.cpus.iter().enumerate() {
859                if let Some(current) = cpu.current_task.as_ref() {
860                    if current.id == id {
861                        // Check if already marked Dead by a previous kill attempt
862                        let state = unsafe { *current.state.get() };
863                        if state != TaskState::Dead {
864                            running_hit = Some((ci, current.clone()));
865                        }
866                        break;
867                    }
868                }
869            }
870            if let Some((ci, current)) = running_hit {
871                let task_pid = current.pid;
872                let _ = sched.clear_task_wake_deadline_locked(id);
873                unsafe {
874                    *current.state.get() = TaskState::Dead;
875                }
876                // Do NOT call cleanup_task_resources or all_tasks.remove() here!
877                // The task is still in current_task[ci], and an interrupt could
878                // access it. Instead, mark it Dead and let pick_next_task handle
879                // the cleanup when it moves the task to task_to_drop.
880                sched.task_cpu.remove(&id);
881                sched.unregister_identity_locked(id, task_pid, current.tid);
882                let (parent, ipi_death) =
883                    finalize_forced_death(sched, id, FORCED_KILL_EXIT_CODE, task_pid);
884                parent_to_signal = parent;
885                killed = true;
886                if ci == my_cpu {
887                    switch_target = sched.yield_cpu(ci);
888                } else {
889                    ipi_to_cpu = Some(ci);
890                }
891                if ipi_to_cpu.is_none() {
892                    ipi_to_cpu = ipi_death;
893                }
894            }
895
896            // Remove from ready queues.
897            if !killed {
898                let mut removed_from_ready = false;
899                for ci in 0..sched.cpus.len() {
900                    if let Some(cpu) = sched.cpus.get_mut(ci) {
901                        if cpu.class_rqs.remove(id) {
902                            removed_from_ready = true;
903                            break;
904                        }
905                    }
906                }
907                if removed_from_ready {
908                    let _ = sched.clear_task_wake_deadline_locked(id);
909                    if let Some(task) = sched.all_tasks.remove(&id) {
910                        let task_pid = task.pid;
911                        unsafe {
912                            *task.state.get() = TaskState::Dead;
913                        }
914                        cleanup_task_resources(&task);
915                        sched.task_cpu.remove(&id);
916                        sched.unregister_identity_locked(id, task_pid, task.tid);
917                        let (parent, ipi_death) =
918                            finalize_forced_death(sched, id, FORCED_KILL_EXIT_CODE, task_pid);
919                        parent_to_signal = parent;
920                        if ipi_to_cpu.is_none() {
921                            ipi_to_cpu = ipi_death;
922                        }
923                    }
924                    killed = true;
925                }
926            }
927
928            // Remove from blocked map.
929            if !killed {
930                if let Some(task) = sched.blocked_tasks.remove(&id) {
931                    let task_pid = task.pid;
932                    let _ = sched.clear_task_wake_deadline_locked(id);
933                    // SAFETY: scheduler lock held.
934                    unsafe {
935                        *task.state.get() = TaskState::Dead;
936                    }
937                    cleanup_task_resources(&task);
938                    sched.all_tasks.remove(&id);
939                    sched.task_cpu.remove(&id);
940                    sched.unregister_identity_locked(id, task_pid, task.tid);
941                    let (parent, ipi_death) =
942                        finalize_forced_death(sched, id, FORCED_KILL_EXIT_CODE, task_pid);
943                    parent_to_signal = parent;
944                    if ipi_to_cpu.is_none() {
945                        ipi_to_cpu = ipi_death;
946                    }
947                    killed = true;
948                }
949            }
950        }
951    } // scheduler lock released before IPI and context switch
952
953    if let Some(ref target) = switch_target {
954        unsafe {
955            crate::process::task::do_switch_context(target);
956        }
957        finish_switch();
958    }
959
960    if let Some(ci) = ipi_to_cpu {
961        send_resched_ipi_to_cpu(ci);
962    }
963
964    if let Some(parent_id) = parent_to_signal {
965        // Must happen outside scheduler lock to avoid lock recursion.
966        let _ =
967            crate::process::signal::send_signal(parent_id, crate::process::signal::Signal::SIGCHLD);
968    }
969
970    restore_flags(saved_flags);
971    killed
972}
973
974/// Performs the finalize forced death operation.
975fn finalize_forced_death(
976    sched: &mut Scheduler,
977    task_id: TaskId,
978    exit_code: i32,
979    task_pid: Pid,
980) -> (Option<TaskId>, Option<usize>) {
981    let ipi_reparent = reparent_children(sched, task_id);
982    let parent = sched.parent_of.remove(&task_id);
983    if let Some(parent_id) = parent {
984        sched.zombies.insert(task_id, (exit_code, task_pid));
985        let (_, ipi_wake) = sched.wake_task_locked(parent_id);
986        (Some(parent_id), ipi_reparent.or(ipi_wake))
987    } else {
988        (None, ipi_reparent)
989    }
990}
991
992/// Performs the reparent children operation.
993fn reparent_children(sched: &mut Scheduler, dying: TaskId) -> Option<usize> {
994    let children = match sched.children_of.remove(&dying) {
995        Some(c) => c,
996        None => return None,
997    };
998    let init_id = sched
999        .pid_to_task
1000        .get(&1)
1001        .copied()
1002        .or_else(|| sched.all_tasks.keys().next().copied());
1003    let Some(init_id) = init_id else {
1004        for child in &children {
1005            sched.parent_of.remove(child);
1006        }
1007        return None;
1008    };
1009    if init_id == dying {
1010        for child in &children {
1011            sched.parent_of.remove(child);
1012        }
1013        return None;
1014    }
1015    let mut has_zombie = false;
1016    let init_children = sched.children_of.entry(init_id).or_default();
1017    for child in children {
1018        if !has_zombie && sched.zombies.contains_key(&child) {
1019            has_zombie = true;
1020        }
1021        sched.parent_of.insert(child, init_id);
1022        init_children.push(child);
1023    }
1024    if has_zombie {
1025        let (_, ipi) = sched.wake_task_locked(init_id);
1026        ipi
1027    } else {
1028        None
1029    }
1030}
1031
1032/// Performs the cleanup task resources operation.
1033///
1034/// Called when a task exits or is killed to release ports, capabilities,
1035/// and user address space mappings.
1036///
1037/// # Safety
1038/// Must be called with the scheduler lock held and the task no longer
1039/// accessible from any global map (all_tasks, current_task, etc.).
1040fn queue_silo_cleanup(task_id: TaskId) {
1041    let mut guard = PENDING_SILO_CLEANUPS.lock();
1042    guard.push(task_id);
1043}
1044
1045pub fn flush_deferred_silo_cleanups() {
1046    let mut guard = PENDING_SILO_CLEANUPS.lock();
1047    if guard.is_empty() {
1048        return;
1049    }
1050    let mut drained = Vec::new();
1051    drained.append(&mut *guard);
1052    drop(guard);
1053    for task_id in drained {
1054        crate::silo::on_task_terminated(task_id);
1055    }
1056}
1057
1058pub(crate) fn cleanup_task_resources(task: &Arc<Task>) {
1059    crate::ipc::port::cleanup_ports_for_task(task.id);
1060    queue_silo_cleanup(task.id);
1061
1062    // SAFETY: strong_count is racy (a concurrent get_task_by_id may temporarily
1063    // hold an extra Arc ref). Worst case: cleanup is deferred until the last ref
1064    // drops elsewhere - no resource leak, just delayed release.
1065    let is_last_process_ref = Arc::strong_count(&task.process) == 1;
1066    if !is_last_process_ref {
1067        return;
1068    }
1069
1070    unsafe {
1071        (&mut *task.process.capabilities.get()).revoke_all();
1072    }
1073
1074    let as_ref = unsafe { &*task.process.address_space.get() };
1075    if !as_ref.is_kernel() && Arc::strong_count(as_ref) == 1 {
1076        as_ref.unmap_all_user_regions();
1077    }
1078}