Skip to main content

strat9_kernel/syscall/
process.rs

1//! Process and thread management syscalls.
2//!
3//! Implements PID/TID retrieval per the Strat9-OS ABI.
4
5use super::{error::SyscallError, SyscallFrame};
6use crate::process::{
7    block_current_task, create_session, current_pgid, current_task_clone, current_task_id,
8    current_tid, get_parent_id, get_parent_pid, get_pgid_by_pid, get_sid_by_pid,
9    get_task_id_by_tid,
10    scheduler::add_task_with_parent,
11    set_process_group,
12    task::{CpuContext, ExtendedState, KernelStack, SyncUnsafeCell, Task},
13    WaitChildResult,
14};
15use alloc::{boxed::Box, sync::Arc};
16use core::{mem::offset_of, sync::atomic::Ordering};
17
18#[repr(C)]
19#[derive(Clone, Copy)]
20struct ThreadUserContext {
21    entry: u64,
22    stack_top: u64,
23    arg0: u64,
24    user_cs: u64,
25    user_rflags: u64,
26    user_ss: u64,
27}
28
29const THREAD_OFF_ENTRY: usize = offset_of!(ThreadUserContext, entry);
30const THREAD_OFF_STACK_TOP: usize = offset_of!(ThreadUserContext, stack_top);
31const THREAD_OFF_ARG0: usize = offset_of!(ThreadUserContext, arg0);
32const THREAD_OFF_USER_CS: usize = offset_of!(ThreadUserContext, user_cs);
33const THREAD_OFF_USER_RFLAGS: usize = offset_of!(ThreadUserContext, user_rflags);
34const THREAD_OFF_USER_SS: usize = offset_of!(ThreadUserContext, user_ss);
35
36/// Performs the thread child start operation.
37extern "C" fn thread_child_start(ctx_ptr: u64) -> ! {
38    // SAFETY: `ctx_ptr` is allocated with Box::into_raw in `build_user_thread_task`
39    // and passed as immutable bootstrap data for this task only.
40    let boxed = unsafe { Box::from_raw(ctx_ptr as *mut ThreadUserContext) };
41    let ctx = *boxed;
42    // SAFETY: Assembly routine performs an iretq into userspace with validated context.
43    unsafe { thread_iret_from_ctx(&ctx as *const ThreadUserContext) }
44}
45
46/// Performs the thread iret from ctx operation.
47#[unsafe(naked)]
48unsafe extern "C" fn thread_iret_from_ctx(_ctx: *const ThreadUserContext) -> ! {
49    core::arch::naked_asm!(
50        "mov rsi, rdi",
51        // Build iret frame: SS, RSP, RFLAGS, CS, RIP
52        "mov r8, [rsi + {off_user_ss}]",
53        "push r8",
54        "mov r8, [rsi + {off_stack_top}]",
55        "push r8",
56        "mov r8, [rsi + {off_user_rflags}]",
57        "push r8",
58        "mov r8, [rsi + {off_user_cs}]",
59        "push r8",
60        "mov r8, [rsi + {off_entry}]",
61        "push r8",
62        // Argument convention for userspace entry: rdi = arg0
63        "mov rdi, [rsi + {off_arg0}]",
64        // Child thread returns 0 if entry routine ever reads rax.
65        "xor rax, rax",
66        "iretq",
67        off_entry = const THREAD_OFF_ENTRY,
68        off_stack_top = const THREAD_OFF_STACK_TOP,
69        off_arg0 = const THREAD_OFF_ARG0,
70        off_user_cs = const THREAD_OFF_USER_CS,
71        off_user_rflags = const THREAD_OFF_USER_RFLAGS,
72        off_user_ss = const THREAD_OFF_USER_SS,
73    );
74}
75
76/// Performs the build user thread task operation.
77fn build_user_thread_task(
78    parent: &Arc<Task>,
79    bootstrap_ctx: Box<ThreadUserContext>,
80    tls_base: u64,
81) -> Result<Arc<Task>, SyscallError> {
82    let kernel_stack =
83        KernelStack::allocate(Task::DEFAULT_STACK_SIZE).map_err(|_| SyscallError::OutOfMemory)?;
84    let context = CpuContext::new(thread_child_start as *const () as u64, &kernel_stack);
85    let (pid, tid, _) = Task::allocate_process_ids();
86
87    let parent_fpu = unsafe { &*parent.fpu_state.get() };
88    let mut child_fpu = ExtendedState::new();
89    child_fpu.copy_from(parent_fpu);
90
91    let task = Arc::new(Task {
92        id: crate::process::TaskId::new(),
93        pid,
94        tid,
95        tgid: parent.tgid,
96        pgid: core::sync::atomic::AtomicU32::new(parent.pgid.load(Ordering::Relaxed)),
97        sid: core::sync::atomic::AtomicU32::new(parent.sid.load(Ordering::Relaxed)),
98        uid: core::sync::atomic::AtomicU32::new(parent.uid.load(Ordering::Relaxed)),
99        euid: core::sync::atomic::AtomicU32::new(parent.euid.load(Ordering::Relaxed)),
100        gid: core::sync::atomic::AtomicU32::new(parent.gid.load(Ordering::Relaxed)),
101        egid: core::sync::atomic::AtomicU32::new(parent.egid.load(Ordering::Relaxed)),
102        state: SyncUnsafeCell::new(crate::process::TaskState::Ready),
103        priority: parent.priority,
104        context: SyncUnsafeCell::new(context),
105        kernel_stack,
106        user_stack: None,
107        name: "user-thread",
108        process: parent.process.clone(),
109        pending_signals: crate::process::signal::SignalSet::new(),
110        blocked_signals: parent.blocked_signals.clone(),
111        signal_stack: SyncUnsafeCell::new(None),
112        itimers: crate::process::timer::ITimers::new(),
113        wake_pending: core::sync::atomic::AtomicBool::new(false),
114        wake_deadline_ns: core::sync::atomic::AtomicU64::new(0),
115        trampoline_entry: core::sync::atomic::AtomicU64::new(0),
116        trampoline_stack_top: core::sync::atomic::AtomicU64::new(0),
117        trampoline_arg0: core::sync::atomic::AtomicU64::new(0),
118        ticks: core::sync::atomic::AtomicU64::new(0),
119        sched_policy: SyncUnsafeCell::new(parent.sched_policy()),
120        vruntime: core::sync::atomic::AtomicU64::new(parent.vruntime()),
121        clear_child_tid: core::sync::atomic::AtomicU64::new(0),
122        user_fs_base: core::sync::atomic::AtomicU64::new(tls_base),
123        fpu_state: SyncUnsafeCell::new(child_fpu),
124        xcr0_mask: core::sync::atomic::AtomicU64::new(parent.xcr0_mask.load(Ordering::Relaxed)),
125    });
126
127    // CpuContext initial stack layout: r15, r14, r13(arg), r12(entry), rbp, rbx, ret
128    // Seed r13 with bootstrap context pointer for `thread_child_start`.
129    unsafe {
130        let ctx = &mut *task.context.get();
131        let frame = ctx.saved_rsp as *mut u64;
132        *frame.add(2) = Box::into_raw(bootstrap_ctx) as u64;
133    }
134
135    Ok(task)
136}
137
138/// SYS_GETPID (311): Return current process ID.
139///
140/// In Strat9, each task has a unique ID, so getpid returns the TaskId.
141pub fn sys_getpid() -> Result<u64, SyscallError> {
142    current_task_clone()
143        .map(|task| task.tgid as u64)
144        .ok_or(SyscallError::Fault)
145}
146
147/// SYS_GETTID (312): Return current thread ID.
148///
149/// In the current single-threaded silo model, TID == PID.
150pub fn sys_gettid() -> Result<u64, SyscallError> {
151    current_tid()
152        .map(|tid| tid as u64)
153        .ok_or(SyscallError::Fault)
154}
155
156/// SYS_THREAD_CREATE (341): create a userspace thread sharing current process resources.
157pub fn sys_thread_create(
158    frame: &SyscallFrame,
159    entry: u64,
160    stack_top: u64,
161    arg0: u64,
162    flags: u64,
163    tls_base: u64,
164) -> Result<u64, SyscallError> {
165    const USER_TOP_EXCLUSIVE: u64 = 0x0000_8000_0000_0000;
166
167    if flags != 0 {
168        return Err(SyscallError::InvalidArgument);
169    }
170
171    if entry == 0
172        || stack_top == 0
173        || entry >= USER_TOP_EXCLUSIVE
174        || stack_top >= USER_TOP_EXCLUSIVE
175        || (stack_top & 0xF) != 0
176    {
177        return Err(SyscallError::InvalidArgument);
178    }
179
180    let parent = current_task_clone().ok_or(SyscallError::Fault)?;
181    if parent.is_kernel() {
182        return Err(SyscallError::PermissionDenied);
183    }
184
185    let user_ctx = Box::new(ThreadUserContext {
186        entry,
187        stack_top,
188        arg0,
189        user_cs: frame.iret_cs,
190        user_rflags: frame.iret_rflags | (1 << 9),
191        user_ss: frame.iret_ss,
192    });
193
194    let child = build_user_thread_task(&parent, user_ctx, tls_base)?;
195    let tid = child.tid as u64;
196    add_task_with_parent(child, parent.id);
197    Ok(tid)
198}
199
200/// SYS_THREAD_JOIN (342): wait for a thread created by the current task.
201pub fn sys_thread_join(tid: u64, status_ptr: u64, flags: u64) -> Result<u64, SyscallError> {
202    if flags != 0 {
203        return Err(SyscallError::InvalidArgument);
204    }
205
206    let wait_tid = u32::try_from(tid).map_err(|_| SyscallError::InvalidArgument)?;
207    let current = current_task_clone().ok_or(SyscallError::Fault)?;
208    if wait_tid == current.tid {
209        return Err(SyscallError::InvalidArgument);
210    }
211
212    let parent_id = current_task_id().ok_or(SyscallError::Fault)?;
213    let child_id = get_task_id_by_tid(wait_tid).ok_or(SyscallError::NotFound)?;
214    if get_parent_id(child_id) != Some(parent_id) {
215        return Err(SyscallError::NotFound);
216    }
217
218    loop {
219        match crate::process::try_wait_child(parent_id, Some(child_id)) {
220            WaitChildResult::Reaped { status, .. } => {
221                if status_ptr != 0 {
222                    let out = crate::memory::UserSliceWrite::new(status_ptr, 4)
223                        .map_err(|_| SyscallError::Fault)?;
224                    out.copy_from(&(status as i32).to_ne_bytes());
225                }
226                return Ok(wait_tid as u64);
227            }
228            WaitChildResult::NoChildren => return Err(SyscallError::NotFound),
229            WaitChildResult::StillRunning => block_current_task(),
230        }
231    }
232}
233
234/// SYS_THREAD_EXIT (343): exit only the current thread.
235pub fn sys_thread_exit(exit_code: u64) -> Result<u64, SyscallError> {
236    let code = i32::try_from(exit_code).map_err(|_| SyscallError::InvalidArgument)?;
237    crate::process::scheduler::exit_current_task(code)
238}
239
240/// SYS_PROC_GETPPID/SYS_GETPPID (309): Return parent process ID.
241pub fn sys_getppid() -> Result<u64, SyscallError> {
242    let child = current_task_id().ok_or(SyscallError::Fault)?;
243    Ok(get_parent_pid(child).map(|p| p as u64).unwrap_or(0))
244}
245
246/// SYS_GETPGID (318): Return process group id for `pid` (`0` = caller).
247pub fn sys_getpgid(pid: i64) -> Result<u64, SyscallError> {
248    if pid < 0 {
249        return Err(SyscallError::InvalidArgument);
250    }
251    if pid == 0 {
252        return current_pgid()
253            .map(|pgid| pgid as u64)
254            .ok_or(SyscallError::Fault);
255    }
256    get_pgid_by_pid(pid as u32)
257        .map(|pgid| pgid as u64)
258        .ok_or(SyscallError::NotFound)
259}
260
261/// POSIX getpgrp wrapper (equivalent to getpgid(0)).
262pub fn sys_getpgrp() -> Result<u64, SyscallError> {
263    current_pgid()
264        .map(|pgid| pgid as u64)
265        .ok_or(SyscallError::Fault)
266}
267
268/// SYS_GETSID (332): Return session id for `pid` (`0` = caller).
269pub fn sys_getsid(pid: i64) -> Result<u64, SyscallError> {
270    if pid < 0 {
271        return Err(SyscallError::InvalidArgument);
272    }
273    if pid == 0 {
274        return crate::process::current_sid()
275            .map(|sid| sid as u64)
276            .ok_or(SyscallError::Fault);
277    }
278    get_sid_by_pid(pid as u32)
279        .map(|sid| sid as u64)
280        .ok_or(SyscallError::NotFound)
281}
282
283/// SYS_SETPGID (317): set process group id.
284pub fn sys_setpgid(pid: i64, pgid: i64) -> Result<u64, SyscallError> {
285    if pid < 0 || pgid < 0 {
286        return Err(SyscallError::InvalidArgument);
287    }
288    let caller = current_task_id().ok_or(SyscallError::Fault)?;
289    let target_pid = if pid == 0 { None } else { Some(pid as u32) };
290    let new_pgid = if pgid == 0 { None } else { Some(pgid as u32) };
291    let final_pgid = set_process_group(caller, target_pid, new_pgid)?;
292    Ok(final_pgid as u64)
293}
294
295/// SYS_SETSID (319): create a new session.
296pub fn sys_setsid() -> Result<u64, SyscallError> {
297    let caller = current_task_id().ok_or(SyscallError::Fault)?;
298    create_session(caller).map(|sid| sid as u64)
299}
300
301// ─── Credentials ─────────────────────────────────────────────────────────────
302
303/// SYS_GETUID (335): Return real user id.
304pub fn sys_getuid() -> Result<u64, SyscallError> {
305    let task = current_task_clone().ok_or(SyscallError::Fault)?;
306    Ok(task.uid.load(Ordering::Relaxed) as u64)
307}
308
309/// SYS_GETEUID (336): Return effective user id.
310pub fn sys_geteuid() -> Result<u64, SyscallError> {
311    let task = current_task_clone().ok_or(SyscallError::Fault)?;
312    Ok(task.euid.load(Ordering::Relaxed) as u64)
313}
314
315/// SYS_GETGID (337): Return real group id.
316pub fn sys_getgid() -> Result<u64, SyscallError> {
317    let task = current_task_clone().ok_or(SyscallError::Fault)?;
318    Ok(task.gid.load(Ordering::Relaxed) as u64)
319}
320
321/// SYS_GETEGID (338): Return effective group id.
322pub fn sys_getegid() -> Result<u64, SyscallError> {
323    let task = current_task_clone().ok_or(SyscallError::Fault)?;
324    Ok(task.egid.load(Ordering::Relaxed) as u64)
325}
326
327/// SYS_SETUID (339): Set real and effective user id (simplified: no capabilities check).
328pub fn sys_setuid(uid: u64) -> Result<u64, SyscallError> {
329    if uid > u32::MAX as u64 {
330        return Err(SyscallError::InvalidArgument);
331    }
332    let task = current_task_clone().ok_or(SyscallError::Fault)?;
333    // Privileged (uid==0) can set anything; unprivileged can only set to current uid/euid.
334    let euid = task.euid.load(Ordering::Relaxed);
335    let cur_uid = task.uid.load(Ordering::Relaxed);
336    if euid != 0 && uid as u32 != cur_uid && uid as u32 != euid {
337        return Err(SyscallError::PermissionDenied);
338    }
339    task.uid.store(uid as u32, Ordering::Relaxed);
340    task.euid.store(uid as u32, Ordering::Relaxed);
341    Ok(0)
342}
343
344/// SYS_SETGID (340): Set real and effective group id (simplified).
345pub fn sys_setgid(gid: u64) -> Result<u64, SyscallError> {
346    if gid > u32::MAX as u64 {
347        return Err(SyscallError::InvalidArgument);
348    }
349    let task = current_task_clone().ok_or(SyscallError::Fault)?;
350    let euid = task.euid.load(Ordering::Relaxed);
351    let cur_gid = task.gid.load(Ordering::Relaxed);
352    let egid = task.egid.load(Ordering::Relaxed);
353    if euid != 0 && gid as u32 != cur_gid && gid as u32 != egid {
354        return Err(SyscallError::PermissionDenied);
355    }
356    task.gid.store(gid as u32, Ordering::Relaxed);
357    task.egid.store(gid as u32, Ordering::Relaxed);
358    Ok(0)
359}
360
361// ─── Thread lifecycle helpers ─────────────────────────────────────────────────
362
363/// SYS_SET_TID_ADDRESS (333): Store `tidptr` in the task; return current TID.
364///
365/// The kernel will write 0 to `tidptr` and call futex_wake when the thread
366/// exits. This is the mechanism used by pthreads for thread join.
367pub fn sys_set_tid_address(tidptr: u64) -> Result<u64, SyscallError> {
368    let task = current_task_clone().ok_or(SyscallError::Fault)?;
369    task.clear_child_tid.store(tidptr, Ordering::Relaxed);
370    Ok(task.tid as u64)
371}
372
373/// SYS_EXIT_GROUP (334): Exit all threads in the thread group.
374///
375/// In the current single-threaded model this is identical to SYS_PROC_EXIT.
376/// When multi-threading is added, this must kill every task sharing the same TGID.
377pub fn sys_exit_group(exit_code: u64) -> Result<u64, SyscallError> {
378    // Diverges — never returns.
379    crate::process::scheduler::exit_current_task(exit_code as i32)
380}
381
382// ─── Architecture-specific ────────────────────────────────────────────────────
383
384/// x86_64 arch_prctl operation codes (Linux-compatible).
385const ARCH_SET_GS: u64 = 0x1001;
386const ARCH_SET_FS: u64 = 0x1002;
387const ARCH_GET_FS: u64 = 0x1003;
388const ARCH_GET_GS: u64 = 0x1004;
389
390/// MSR addresses for FS/GS base.
391const MSR_FS_BASE: u32 = 0xC000_0100;
392const MSR_GS_BASE: u32 = 0xC000_0101;
393
394/// SYS_ARCH_PRCTL (350): Architecture-specific process settings.
395///
396/// Supported operations:
397/// - `ARCH_SET_FS` (0x1002): Set user-space FS.base (Thread Local Storage).
398/// - `ARCH_GET_FS` (0x1003): Read current FS.base into *arg.
399pub fn sys_arch_prctl(code: u64, addr: u64) -> Result<u64, SyscallError> {
400    let task = current_task_clone().ok_or(SyscallError::Fault)?;
401    match code {
402        ARCH_SET_FS => {
403            // Store in task struct (so it survives context switches).
404            task.user_fs_base.store(addr, Ordering::Relaxed);
405            // Write to MSR immediately — we are the current task.
406            unsafe { wrmsr(MSR_FS_BASE, addr) };
407            Ok(0)
408        }
409        ARCH_GET_FS => {
410            let base = task.user_fs_base.load(Ordering::Relaxed);
411            // Write the 8-byte value back to the provided user pointer.
412            use crate::memory::UserSliceWrite;
413            let out = UserSliceWrite::new(addr, 8).map_err(|_| SyscallError::Fault)?;
414            out.copy_from(&base.to_ne_bytes());
415            Ok(0)
416        }
417        ARCH_SET_GS => {
418            // GS slot not separately stored for now.
419            unsafe { wrmsr(MSR_GS_BASE, addr) };
420            Ok(0)
421        }
422        ARCH_GET_GS => {
423            let base = unsafe { rdmsr(MSR_GS_BASE) };
424            use crate::memory::UserSliceWrite;
425            let out = UserSliceWrite::new(addr, 8).map_err(|_| SyscallError::Fault)?;
426            out.copy_from(&base.to_ne_bytes());
427            Ok(0)
428        }
429        _ => Err(SyscallError::InvalidArgument),
430    }
431}
432
433/// Write a 64-bit value to an MSR.
434///
435/// # Safety
436/// Must only be called with valid MSR addresses. Misuse causes a #GP.
437#[inline]
438unsafe fn wrmsr(msr: u32, value: u64) {
439    let lo = value as u32;
440    let hi = (value >> 32) as u32;
441    unsafe {
442        core::arch::asm!(
443            "wrmsr",
444            in("ecx") msr,
445            in("eax") lo,
446            in("edx") hi,
447            options(nostack, preserves_flags),
448        );
449    }
450}
451
452/// Read a 64-bit value from an MSR.
453///
454/// # Safety
455/// Must only be called with valid MSR addresses.
456#[inline]
457unsafe fn rdmsr(msr: u32) -> u64 {
458    let lo: u32;
459    let hi: u32;
460    unsafe {
461        core::arch::asm!(
462            "rdmsr",
463            in("ecx") msr,
464            out("eax") lo,
465            out("edx") hi,
466            options(nostack, preserves_flags),
467        );
468    }
469    lo as u64 | ((hi as u64) << 32)
470}
471
472// ─── tgkill ───────────────────────────────────────────────────────────────────
473
474/// SYS_TGKILL (352): Send a signal to a specific thread in a thread group.
475///
476/// In the current single-threaded model, tgid and tid both map to a single
477/// task (pid == tid == tgid). We verify both match before delivering.
478pub fn sys_tgkill(tgid: u64, tid: u64, signum: u64) -> Result<u64, SyscallError> {
479    use crate::process::{get_task_by_pid, send_signal, Signal};
480
481    // Sanity check.
482    if signum as u32 >= 64 {
483        return Err(SyscallError::InvalidArgument);
484    }
485
486    // Resolve tgid → task.
487    let task = get_task_by_pid(tgid as u32).ok_or(SyscallError::NotFound)?;
488
489    // Verify the tid matches (single-threaded: task.tid == task.pid).
490    if task.tid as u64 != tid && task.pid as u64 != tid {
491        return Err(SyscallError::NotFound);
492    }
493
494    if signum == 0 {
495        return Ok(0); // existence check only
496    }
497
498    let sig = Signal::from_u32(signum as u32).ok_or(SyscallError::InvalidArgument)?;
499    send_signal(task.id, sig)?;
500    Ok(0)
501}