Skip to main content

strat9_kernel/syscall/
wait.rs

1//! Wait-family syscall handlers: waitpid, getpid, getppid.
2//!
3//! Design notes
4//! ============
5//!
6//! The blocking loop follows the **Aero / Maestro** pattern:
7//!
8//! ```text
9//! loop {
10//!     try_wait_child()       ← O(log n) scan under scheduler lock
11//!     StillRunning → block_current_task()
12//!                             ↑ woken by exit_current_task → wake_task_locked(parent)
13//! }
14//! ```
15//!
16//! Lost-wakeup race: if the child exits between `try_wait_child` returning
17//! `StillRunning` and `block_current_task()` reaching the scheduler lock,
18//! `exit_current_task` will have already called `wake_task_locked(parent)`,
19//! which sets `task.wake_pending = true`.  `block_current_task()` checks
20//! this flag and aborts the block, so the parent re-runs the loop immediately
21//! without sleeping.
22//!
23//! Signal interruption: after each sleep the pending-signal flag is checked;
24//! if a signal is queued the syscall returns `-EINTR` so userspace can handle
25//! it before retrying.
26//!
27//! ## Plan 9 flavour
28//!
29//! `sys_waitpid` encodes the exit status using the standard Linux `W_EXITCODE`
30//! macro (`status << 8`), which musl/glibc decode correctly.  A separate
31//! Plan 9-style `Waitmsg` structure (`pid + exit_code + msg[64]`) is written to
32//! an optional second output pointer when the caller provides one (via the
33//! `waitmsg_ptr` variant—`SYS_PROC_WAIT`).
34
35use crate::{
36    memory::UserSliceWrite,
37    process::{
38        block_current_task, current_task_clone, current_task_id, get_task_id_by_pid,
39        has_pending_signals,
40        scheduler::{try_wait_child, WaitChildResult},
41        TaskId,
42    },
43    syscall::error::SyscallError,
44};
45
46// ─── Options flags ────────────────────────────────────────────────────────────
47
48/// Do not block if no child has exited yet.
49pub const WNOHANG: u32 = 1 << 0;
50
51// ─── Plan 9-style exit message ────────────────────────────────────────────────
52
53/// Plan 9-inspired exit message written to userspace by `SYS_PROC_WAIT`.
54///
55/// Layout (C-compatible, 80 bytes total):
56/// ```text
57/// pid       u64   — task ID of the exited child
58/// exit_code i32   — numeric exit code (0 = success)
59/// _pad      i32   — padding for alignment
60/// msg       [u8; 64] — null-terminated exit description
61/// ```
62///
63/// The `msg` field follows Plan 9 convention:
64///   - `""` (empty, or first byte = 0)  → process exited normally (code 0)
65///   - `"exit <N>"`                      → process exited with code N ≠ 0
66///   - `"killed"`                        → process was killed by signal
67#[repr(C)]
68pub struct Waitmsg {
69    pub pid: u64,
70    pub exit_code: i32,
71    pub _pad: i32,
72    pub msg: [u8; 64],
73}
74
75impl Waitmsg {
76    /// Creates a new instance.
77    fn new(pid: u64, exit_code: i32) -> Self {
78        let mut msg = [0u8; 64];
79        if exit_code != 0 {
80            // Write "exit <N>" using a stack buffer — no heap, no format!.
81            let prefix = b"exit ";
82            msg[..prefix.len()].copy_from_slice(prefix);
83            write_decimal(exit_code, &mut msg[prefix.len()..]);
84        }
85        // exit_code == 0: leave msg all-zero (Plan 9: empty = clean exit)
86        Waitmsg {
87            pid,
88            exit_code,
89            _pad: 0,
90            msg,
91        }
92    }
93}
94
95/// Write the decimal representation of `n` into `buf`, null-terminated.
96///
97/// Uses digit-reversal on a small stack scratch buffer — no heap allocation.
98/// Handles negative values with a leading `-`.  Writes at most `buf.len()-1`
99/// digits and always null-terminates `buf[0]` on empty / overflow.
100fn write_decimal(n: i32, buf: &mut [u8]) {
101    if buf.is_empty() {
102        return;
103    }
104
105    // Collect digits into a scratch buffer (i32 is at most 11 chars: "-2147483648")
106    let mut scratch = [0u8; 12];
107    let mut len = 0usize;
108
109    let negative = n < 0;
110    // Work in u32 to avoid overflow on i32::MIN
111    let mut v: u32 = if negative {
112        (n as i64).unsigned_abs() as u32
113    } else {
114        n as u32
115    };
116
117    if v == 0 {
118        scratch[0] = b'0';
119        len = 1;
120    } else {
121        while v > 0 && len < scratch.len() {
122            scratch[len] = b'0' + (v % 10) as u8;
123            v /= 10;
124            len += 1;
125        }
126        // scratch holds digits in reverse order — fix that in-place
127        scratch[..len].reverse();
128    }
129
130    // Prepend '-' if negative
131    let (digits_start, digits_len) = if negative {
132        let total = len + 1;
133        // Shift digits right by 1 to make room for '-'
134        for i in (1..total.min(scratch.len())).rev() {
135            scratch[i] = scratch[i - 1];
136        }
137        scratch[0] = b'-';
138        (0, total.min(scratch.len()))
139    } else {
140        (0, len)
141    };
142
143    // Copy into buf, leaving room for null terminator
144    let copy_len = digits_len.min(buf.len() - 1);
145    buf[..copy_len].copy_from_slice(&scratch[digits_start..digits_start + copy_len]);
146    buf[copy_len] = 0;
147}
148
149// ─── Helper ───────────────────────────────────────────────────────────────────
150
151/// Encode `exit_code` as a Linux `wstatus` word: `W_EXITCODE(code, 0)`.
152///
153/// The low 7 bits are the termination signal (0 = exited normally).
154/// Bits 8-15 are the exit code.
155#[inline]
156fn encode_wstatus(exit_code: i32) -> i32 {
157    (exit_code & 0xff) << 8
158}
159
160/// Block until a matching child becomes a zombie, checking signals each cycle.
161///
162/// Returns `Ok(WaitChildResult::Reaped { .. })` or propagates `EINTR` /
163/// `NoChildren`.
164fn wait_blocking(
165    parent_id: TaskId,
166    target: Option<TaskId>,
167) -> Result<(TaskId, u64, i32), SyscallError> {
168    loop {
169        match try_wait_child(parent_id, target) {
170            WaitChildResult::Reaped { child, pid, status } => {
171                return Ok((child, pid as u64, status));
172            }
173            WaitChildResult::NoChildren => {
174                return Err(SyscallError::NoChildren);
175            }
176            WaitChildResult::StillRunning => {
177                if has_pending_signals() {
178                    return Err(SyscallError::Interrupted);
179                }
180                block_current_task();
181            }
182        }
183    }
184}
185
186// ─── Syscall handlers ─────────────────────────────────────────────────────────
187
188/// SYS_PROC_WAITPID (310): wait for a child process to exit.
189///
190/// Arguments:
191///   - `pid`        : child task ID to wait for, or `-1` (any child).
192///   - `status_ptr` : userspace `*i32` to receive the encoded wait status
193///                    (`W_EXITCODE`). Pass `0` to discard.
194///   - `options`    : `WNOHANG (1)` — return immediately if no child ready.
195///
196/// Returns:
197///   - child task ID on success.
198///   - `0` if `WNOHANG` and no child has exited yet.
199///
200/// Errors:
201///   - `-ECHILD (-10)` — no matching children.
202///   - `-EINTR  (-4)`  — interrupted by a pending signal.
203///   - `-EINVAL (-22)` — unknown option bits.
204pub fn sys_waitpid(pid: i64, status_ptr: u64, options: u32) -> Result<u64, SyscallError> {
205    if options & !WNOHANG != 0 {
206        return Err(SyscallError::InvalidArgument);
207    }
208    let wnohang = options & WNOHANG != 0;
209
210    let parent_id = current_task_id().ok_or(SyscallError::Fault)?;
211
212    // Build child filter.
213    //   pid > 0  → wait for that specific child
214    //   pid == -1 → wait for any child
215    //   pid == 0  → process-group semantics (not supported)
216    //   pid < -1  → wait for group |pid| (not supported)
217    let target: Option<TaskId> = if pid > 0 {
218        match get_task_id_by_pid(pid as u32) {
219            Some(t) => Some(t),
220            None => return Err(SyscallError::NoChildren),
221        }
222    } else if pid == -1 {
223        None // any child
224    } else {
225        // pid == 0 or pid < -1: process-group wait — not implemented.
226        return Err(SyscallError::InvalidArgument);
227    };
228
229    // ── Non-blocking fast path ────────────────────────────────────────────
230    if wnohang {
231        return match try_wait_child(parent_id, target) {
232            WaitChildResult::Reaped { pid, status, .. } => {
233                write_wstatus(status_ptr, status)?;
234                log::debug!("waitpid(WNOHANG): reaped pid={} status={}", pid, status);
235                Ok(pid as u64)
236            }
237            WaitChildResult::NoChildren => Err(SyscallError::NoChildren),
238            WaitChildResult::StillRunning => Ok(0), // no zombie yet
239        };
240    }
241
242    // ── Blocking path ─────────────────────────────────────────────────────
243    let (_child, child_pid, status) = wait_blocking(parent_id, target)?;
244    write_wstatus(status_ptr, status)?;
245    log::debug!("waitpid: reaped pid={} status={}", child_pid, status);
246    Ok(child_pid)
247}
248
249/// SYS_PROC_WAIT (311): Plan 9-style wait — any child, writes full Waitmsg.
250///
251/// Arguments:
252///   - `waitmsg_ptr`: userspace pointer to a `Waitmsg` struct (80 bytes).
253///                    Pass `0` to discard.
254///
255/// Returns the child task ID on success.
256///
257/// Errors: `-ECHILD`, `-EINTR`.
258pub fn sys_wait(waitmsg_ptr: u64) -> Result<u64, SyscallError> {
259    let parent_id = current_task_id().ok_or(SyscallError::Fault)?;
260    let (_child, child_pid, status) = wait_blocking(parent_id, None)?;
261
262    if waitmsg_ptr != 0 {
263        let wmsg = Waitmsg::new(child_pid, status);
264        // SAFETY: Waitmsg is repr(C) and fully initialised above.
265        let bytes = unsafe {
266            core::slice::from_raw_parts(
267                &wmsg as *const Waitmsg as *const u8,
268                core::mem::size_of::<Waitmsg>(),
269            )
270        };
271        write_user_with_cow(waitmsg_ptr, bytes)?;
272    }
273
274    log::debug!("sys_wait: reaped pid={} exit_code={}", child_pid, status);
275    Ok(child_pid)
276}
277
278/// SYS_PROC_GETPID (308): return the current task's ID.
279pub fn sys_getpid() -> Result<u64, SyscallError> {
280    super::process::sys_getpid()
281}
282
283/// SYS_PROC_GETPPID (309): return the parent task's ID, or 0 if none.
284pub fn sys_getppid() -> Result<u64, SyscallError> {
285    super::process::sys_getppid()
286}
287
288// ─── Internal helpers ─────────────────────────────────────────────────────────
289
290/// Write the Linux-encoded wait status to a nullable userspace pointer.
291fn write_wstatus(status_ptr: u64, exit_code: i32) -> Result<(), SyscallError> {
292    if status_ptr != 0 {
293        let wstatus = encode_wstatus(exit_code);
294        write_user_with_cow(status_ptr, &wstatus.to_ne_bytes())?;
295    }
296    Ok(())
297}
298
299/// Performs the resolve cow for range operation.
300fn resolve_cow_for_range(ptr: u64, len: usize) -> Result<(), SyscallError> {
301    if len == 0 {
302        return Ok(());
303    }
304    let task = current_task_clone().ok_or(SyscallError::Fault)?;
305    let address_space = unsafe { &*task.process.address_space.get() };
306    let start = ptr & !0xfff;
307    let end = (ptr + (len as u64).saturating_sub(1)) & !0xfff;
308    let mut page = start;
309    loop {
310        crate::syscall::fork::handle_cow_fault(page, address_space)
311            .map_err(|_| SyscallError::Fault)?;
312        if page == end {
313            break;
314        }
315        page = page.saturating_add(4096);
316    }
317    Ok(())
318}
319
320/// Writes user with cow.
321fn write_user_with_cow(ptr: u64, data: &[u8]) -> Result<(), SyscallError> {
322    match UserSliceWrite::new(ptr, data.len()) {
323        Ok(user) => {
324            user.copy_from(data);
325            Ok(())
326        }
327        Err(crate::memory::UserSliceError::PermissionDenied) => {
328            resolve_cow_for_range(ptr, data.len())?;
329            let user = UserSliceWrite::new(ptr, data.len())?;
330            user.copy_from(data);
331            Ok(())
332        }
333        Err(e) => Err(e.into()),
334    }
335}