strat9_kernel/syscall/wait.rs
1//! Wait-family syscall handlers: waitpid, getpid, getppid.
2//!
3//! Design notes
4//! ============
5//!
6//! The blocking loop follows the **Aero / Maestro** pattern:
7//!
8//! ```text
9//! loop {
10//! try_wait_child() ← O(log n) scan under scheduler lock
11//! StillRunning → block_current_task()
12//! ↑ woken by exit_current_task → wake_task_locked(parent)
13//! }
14//! ```
15//!
16//! Lost-wakeup race: if the child exits between `try_wait_child` returning
17//! `StillRunning` and `block_current_task()` reaching the scheduler lock,
18//! `exit_current_task` will have already called `wake_task_locked(parent)`,
19//! which sets `task.wake_pending = true`. `block_current_task()` checks
20//! this flag and aborts the block, so the parent re-runs the loop immediately
21//! without sleeping.
22//!
23//! Signal interruption: after each sleep the pending-signal flag is checked;
24//! if a signal is queued the syscall returns `-EINTR` so userspace can handle
25//! it before retrying.
26//!
27//! ## Plan 9 flavour
28//!
29//! `sys_waitpid` encodes the exit status using the standard Linux `W_EXITCODE`
30//! macro (`status << 8`), which musl/glibc decode correctly. A separate
31//! Plan 9-style `Waitmsg` structure (`pid + exit_code + msg[64]`) is written to
32//! an optional second output pointer when the caller provides one (via the
33//! `waitmsg_ptr` variant—`SYS_PROC_WAIT`).
34
35use crate::{
36 memory::UserSliceWrite,
37 process::{
38 block_current_task, current_task_clone, current_task_id, get_task_id_by_pid,
39 has_pending_signals,
40 scheduler::{try_wait_child, WaitChildResult},
41 TaskId,
42 },
43 syscall::error::SyscallError,
44};
45
46// ─── Options flags ────────────────────────────────────────────────────────────
47
48/// Do not block if no child has exited yet.
49pub const WNOHANG: u32 = 1 << 0;
50
51// ─── Plan 9-style exit message ────────────────────────────────────────────────
52
53/// Plan 9-inspired exit message written to userspace by `SYS_PROC_WAIT`.
54///
55/// Layout (C-compatible, 80 bytes total):
56/// ```text
57/// pid u64 — task ID of the exited child
58/// exit_code i32 — numeric exit code (0 = success)
59/// _pad i32 — padding for alignment
60/// msg [u8; 64] — null-terminated exit description
61/// ```
62///
63/// The `msg` field follows Plan 9 convention:
64/// - `""` (empty, or first byte = 0) → process exited normally (code 0)
65/// - `"exit <N>"` → process exited with code N ≠ 0
66/// - `"killed"` → process was killed by signal
67#[repr(C)]
68pub struct Waitmsg {
69 pub pid: u64,
70 pub exit_code: i32,
71 pub _pad: i32,
72 pub msg: [u8; 64],
73}
74
75impl Waitmsg {
76 /// Creates a new instance.
77 fn new(pid: u64, exit_code: i32) -> Self {
78 let mut msg = [0u8; 64];
79 if exit_code != 0 {
80 // Write "exit <N>" using a stack buffer — no heap, no format!.
81 let prefix = b"exit ";
82 msg[..prefix.len()].copy_from_slice(prefix);
83 write_decimal(exit_code, &mut msg[prefix.len()..]);
84 }
85 // exit_code == 0: leave msg all-zero (Plan 9: empty = clean exit)
86 Waitmsg {
87 pid,
88 exit_code,
89 _pad: 0,
90 msg,
91 }
92 }
93}
94
95/// Write the decimal representation of `n` into `buf`, null-terminated.
96///
97/// Uses digit-reversal on a small stack scratch buffer — no heap allocation.
98/// Handles negative values with a leading `-`. Writes at most `buf.len()-1`
99/// digits and always null-terminates `buf[0]` on empty / overflow.
100fn write_decimal(n: i32, buf: &mut [u8]) {
101 if buf.is_empty() {
102 return;
103 }
104
105 // Collect digits into a scratch buffer (i32 is at most 11 chars: "-2147483648")
106 let mut scratch = [0u8; 12];
107 let mut len = 0usize;
108
109 let negative = n < 0;
110 // Work in u32 to avoid overflow on i32::MIN
111 let mut v: u32 = if negative {
112 (n as i64).unsigned_abs() as u32
113 } else {
114 n as u32
115 };
116
117 if v == 0 {
118 scratch[0] = b'0';
119 len = 1;
120 } else {
121 while v > 0 && len < scratch.len() {
122 scratch[len] = b'0' + (v % 10) as u8;
123 v /= 10;
124 len += 1;
125 }
126 // scratch holds digits in reverse order — fix that in-place
127 scratch[..len].reverse();
128 }
129
130 // Prepend '-' if negative
131 let (digits_start, digits_len) = if negative {
132 let total = len + 1;
133 // Shift digits right by 1 to make room for '-'
134 for i in (1..total.min(scratch.len())).rev() {
135 scratch[i] = scratch[i - 1];
136 }
137 scratch[0] = b'-';
138 (0, total.min(scratch.len()))
139 } else {
140 (0, len)
141 };
142
143 // Copy into buf, leaving room for null terminator
144 let copy_len = digits_len.min(buf.len() - 1);
145 buf[..copy_len].copy_from_slice(&scratch[digits_start..digits_start + copy_len]);
146 buf[copy_len] = 0;
147}
148
149// ─── Helper ───────────────────────────────────────────────────────────────────
150
151/// Encode `exit_code` as a Linux `wstatus` word: `W_EXITCODE(code, 0)`.
152///
153/// The low 7 bits are the termination signal (0 = exited normally).
154/// Bits 8-15 are the exit code.
155#[inline]
156fn encode_wstatus(exit_code: i32) -> i32 {
157 (exit_code & 0xff) << 8
158}
159
160/// Block until a matching child becomes a zombie, checking signals each cycle.
161///
162/// Returns `Ok(WaitChildResult::Reaped { .. })` or propagates `EINTR` /
163/// `NoChildren`.
164fn wait_blocking(
165 parent_id: TaskId,
166 target: Option<TaskId>,
167) -> Result<(TaskId, u64, i32), SyscallError> {
168 loop {
169 match try_wait_child(parent_id, target) {
170 WaitChildResult::Reaped { child, pid, status } => {
171 return Ok((child, pid as u64, status));
172 }
173 WaitChildResult::NoChildren => {
174 return Err(SyscallError::NoChildren);
175 }
176 WaitChildResult::StillRunning => {
177 if has_pending_signals() {
178 return Err(SyscallError::Interrupted);
179 }
180 block_current_task();
181 }
182 }
183 }
184}
185
186// ─── Syscall handlers ─────────────────────────────────────────────────────────
187
188/// SYS_PROC_WAITPID (310): wait for a child process to exit.
189///
190/// Arguments:
191/// - `pid` : child task ID to wait for, or `-1` (any child).
192/// - `status_ptr` : userspace `*i32` to receive the encoded wait status
193/// (`W_EXITCODE`). Pass `0` to discard.
194/// - `options` : `WNOHANG (1)` — return immediately if no child ready.
195///
196/// Returns:
197/// - child task ID on success.
198/// - `0` if `WNOHANG` and no child has exited yet.
199///
200/// Errors:
201/// - `-ECHILD (-10)` — no matching children.
202/// - `-EINTR (-4)` — interrupted by a pending signal.
203/// - `-EINVAL (-22)` — unknown option bits.
204pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> Result<u64, SyscallError> {
205 if options & !WNOHANG != 0 {
206 return Err(SyscallError::InvalidArgument);
207 }
208 let wnohang = options & WNOHANG != 0;
209
210 let parent_id = current_task_id().ok_or(SyscallError::Fault)?;
211
212 // Build child filter.
213 // pid > 0 → wait for that specific child
214 // pid == -1 → wait for any child
215 // pid == 0 → process-group semantics (not supported)
216 // pid < -1 → wait for group |pid| (not supported)
217 let target: Option<TaskId> = if pid > 0 {
218 match get_task_id_by_pid(pid as u32) {
219 Some(t) => Some(t),
220 None => return Err(SyscallError::NoChildren),
221 }
222 } else if pid == -1 {
223 None // any child
224 } else {
225 // pid == 0 or pid < -1: process-group wait — not implemented.
226 return Err(SyscallError::InvalidArgument);
227 };
228
229 // ── Non-blocking fast path ────────────────────────────────────────────
230 if wnohang {
231 return match try_wait_child(parent_id, target) {
232 WaitChildResult::Reaped { pid, status, .. } => {
233 write_wstatus(status_ptr, status)?;
234 log::debug!("waitpid(WNOHANG): reaped pid={} status={}", pid, status);
235 Ok(pid as u64)
236 }
237 WaitChildResult::NoChildren => Err(SyscallError::NoChildren),
238 WaitChildResult::StillRunning => Ok(0), // no zombie yet
239 };
240 }
241
242 // ── Blocking path ─────────────────────────────────────────────────────
243 let (_child, child_pid, status) = wait_blocking(parent_id, target)?;
244 write_wstatus(status_ptr, status)?;
245 log::debug!("waitpid: reaped pid={} status={}", child_pid, status);
246 Ok(child_pid)
247}
248
249/// SYS_PROC_WAIT (311): Plan 9-style wait — any child, writes full Waitmsg.
250///
251/// Arguments:
252/// - `waitmsg_ptr`: userspace pointer to a `Waitmsg` struct (80 bytes).
253/// Pass `0` to discard.
254///
255/// Returns the child task ID on success.
256///
257/// Errors: `-ECHILD`, `-EINTR`.
258pub fn sys_wait(waitmsg_ptr: u64) -> Result<u64, SyscallError> {
259 let parent_id = current_task_id().ok_or(SyscallError::Fault)?;
260 let (_child, child_pid, status) = wait_blocking(parent_id, None)?;
261
262 if waitmsg_ptr != 0 {
263 let wmsg = Waitmsg::new(child_pid, status);
264 // SAFETY: Waitmsg is repr(C) and fully initialised above.
265 let bytes = unsafe {
266 core::slice::from_raw_parts(
267 &wmsg as *const Waitmsg as *const u8,
268 core::mem::size_of::<Waitmsg>(),
269 )
270 };
271 write_user_with_cow(waitmsg_ptr, bytes)?;
272 }
273
274 log::debug!("sys_wait: reaped pid={} exit_code={}", child_pid, status);
275 Ok(child_pid)
276}
277
278/// SYS_PROC_GETPID (308): return the current task's ID.
279pub fn sys_getpid() -> Result<u64, SyscallError> {
280 super::process::sys_getpid()
281}
282
283/// SYS_PROC_GETPPID (309): return the parent task's ID, or 0 if none.
284pub fn sys_getppid() -> Result<u64, SyscallError> {
285 super::process::sys_getppid()
286}
287
288// ─── Internal helpers ─────────────────────────────────────────────────────────
289
290/// Write the Linux-encoded wait status to a nullable userspace pointer.
291fn write_wstatus(status_ptr: u64, exit_code: i32) -> Result<(), SyscallError> {
292 if status_ptr != 0 {
293 let wstatus = encode_wstatus(exit_code);
294 write_user_with_cow(status_ptr, &wstatus.to_ne_bytes())?;
295 }
296 Ok(())
297}
298
299/// Performs the resolve cow for range operation.
300fn resolve_cow_for_range(ptr: u64, len: usize) -> Result<(), SyscallError> {
301 if len == 0 {
302 return Ok(());
303 }
304 let task = current_task_clone().ok_or(SyscallError::Fault)?;
305 let address_space = unsafe { &*task.process.address_space.get() };
306 let start = ptr & !0xfff;
307 let end = (ptr + (len as u64).saturating_sub(1)) & !0xfff;
308 let mut page = start;
309 loop {
310 crate::syscall::fork::handle_cow_fault(page, address_space)
311 .map_err(|_| SyscallError::Fault)?;
312 if page == end {
313 break;
314 }
315 page = page.saturating_add(4096);
316 }
317 Ok(())
318}
319
320/// Writes user with cow.
321fn write_user_with_cow(ptr: u64, data: &[u8]) -> Result<(), SyscallError> {
322 match UserSliceWrite::new(ptr, data.len()) {
323 Ok(user) => {
324 user.copy_from(data);
325 Ok(())
326 }
327 Err(crate::memory::UserSliceError::PermissionDenied) => {
328 resolve_cow_for_range(ptr, data.len())?;
329 let user = UserSliceWrite::new(ptr, data.len())?;
330 user.copy_from(data);
331 Ok(())
332 }
333 Err(e) => Err(e.into()),
334 }
335}