Skip to main content

strat9_kernel/syscall/
process.rs

1//! Process and thread management syscalls.
2//!
3//! Implements PID/TID retrieval per the Strat9-OS ABI.
4
5use super::{error::SyscallError, SyscallFrame};
6use crate::process::{
7    block_current_task, create_session, current_pgid, current_task_clone, current_task_id,
8    current_tid, get_child_task_id_by_tid, get_parent_pid, get_pgid_by_pid, get_sid_by_pid,
9    get_task_ids_in_tgid, kill_task,
10    scheduler::add_task_with_parent,
11    set_process_group,
12    task::{CpuContext, ExtendedState, KernelStack, SyncUnsafeCell, Task},
13    WaitChildResult,
14};
15use alloc::{boxed::Box, sync::Arc};
16use core::{mem::offset_of, sync::atomic::Ordering};
17
18#[repr(C)]
19#[derive(Clone, Copy)]
20struct ThreadUserContext {
21    entry: u64,
22    stack_top: u64,
23    arg0: u64,
24    user_cs: u64,
25    user_rflags: u64,
26    user_ss: u64,
27}
28
29const THREAD_OFF_ENTRY: usize = offset_of!(ThreadUserContext, entry);
30const THREAD_OFF_STACK_TOP: usize = offset_of!(ThreadUserContext, stack_top);
31const THREAD_OFF_ARG0: usize = offset_of!(ThreadUserContext, arg0);
32const THREAD_OFF_USER_CS: usize = offset_of!(ThreadUserContext, user_cs);
33const THREAD_OFF_USER_RFLAGS: usize = offset_of!(ThreadUserContext, user_rflags);
34const THREAD_OFF_USER_SS: usize = offset_of!(ThreadUserContext, user_ss);
35
36/// Performs the thread child start operation.
37extern "C" fn thread_child_start(ctx_ptr: u64) -> ! {
38    // SAFETY: `ctx_ptr` is allocated with Box::into_raw in `build_user_thread_task`
39    // and passed as immutable bootstrap data for this task only.
40    let boxed = unsafe { Box::from_raw(ctx_ptr as *mut ThreadUserContext) };
41    let ctx = *boxed;
42    // SAFETY: Assembly routine performs an iretq into userspace with validated context.
43    unsafe { thread_iret_from_ctx(&ctx as *const ThreadUserContext) }
44}
45
46/// Performs the thread iret from ctx operation.
47#[unsafe(naked)]
48unsafe extern "C" fn thread_iret_from_ctx(_ctx: *const ThreadUserContext) -> ! {
49    core::arch::naked_asm!(
50        // Mask IRQs before touching GS. The user RFLAGS frame re-enables IF.
51        "cli",
52        "mov rsi, rdi",
53        // Build iret frame: SS, RSP, RFLAGS, CS, RIP
54        "mov r8, [rsi + {off_user_ss}]",
55        "push r8",
56        "mov r8, [rsi + {off_stack_top}]",
57        "push r8",
58        "mov r8, [rsi + {off_user_rflags}]",
59        "push r8",
60        "mov r8, [rsi + {off_user_cs}]",
61        "push r8",
62        "mov r8, [rsi + {off_entry}]",
63        "push r8",
64        // Argument convention for userspace entry: rdi = arg0
65        "mov rdi, [rsi + {off_arg0}]",
66        // Child thread returns 0 if entry routine ever reads rax.
67        "xor rax, rax",
68        "swapgs",
69        "iretq",
70        off_entry = const THREAD_OFF_ENTRY,
71        off_stack_top = const THREAD_OFF_STACK_TOP,
72        off_arg0 = const THREAD_OFF_ARG0,
73        off_user_cs = const THREAD_OFF_USER_CS,
74        off_user_rflags = const THREAD_OFF_USER_RFLAGS,
75        off_user_ss = const THREAD_OFF_USER_SS,
76    );
77}
78
79/// Performs the build user thread task operation.
80fn build_user_thread_task(
81    parent: &Arc<Task>,
82    bootstrap_ctx: Box<ThreadUserContext>,
83    tls_base: u64,
84) -> Result<Arc<Task>, SyscallError> {
85    let kernel_stack =
86        KernelStack::allocate(Task::DEFAULT_STACK_SIZE).map_err(|_| SyscallError::OutOfMemory)?;
87    let context = CpuContext::new(thread_child_start as *const () as u64, &kernel_stack);
88    let (pid, tid, _) = Task::allocate_process_ids();
89
90    let parent_fpu = unsafe { &*parent.fpu_state.get() };
91    let mut child_fpu = ExtendedState::new();
92    child_fpu.copy_from(parent_fpu);
93    let interrupt_frame = crate::syscall::SyscallFrame {
94        r15: 0,
95        r14: 0,
96        r13: 0,
97        r12: 0,
98        rbp: 0,
99        rbx: 0,
100        r11: bootstrap_ctx.user_rflags,
101        r10: 0,
102        r9: 0,
103        r8: 0,
104        rsi: 0,
105        rdi: bootstrap_ctx.arg0,
106        rdx: 0,
107        rcx: bootstrap_ctx.entry,
108        rax: 0,
109        iret_rip: bootstrap_ctx.entry,
110        iret_cs: bootstrap_ctx.user_cs,
111        iret_rflags: bootstrap_ctx.user_rflags,
112        iret_rsp: bootstrap_ctx.stack_top,
113        iret_ss: bootstrap_ctx.user_ss,
114    };
115
116    let task = Arc::new(Task {
117        id: crate::process::TaskId::new(),
118        pid,
119        tid,
120        tgid: parent.tgid,
121        pgid: core::sync::atomic::AtomicU32::new(parent.pgid.load(Ordering::Relaxed)),
122        sid: core::sync::atomic::AtomicU32::new(parent.sid.load(Ordering::Relaxed)),
123        uid: core::sync::atomic::AtomicU32::new(parent.uid.load(Ordering::Relaxed)),
124        euid: core::sync::atomic::AtomicU32::new(parent.euid.load(Ordering::Relaxed)),
125        gid: core::sync::atomic::AtomicU32::new(parent.gid.load(Ordering::Relaxed)),
126        egid: core::sync::atomic::AtomicU32::new(parent.egid.load(Ordering::Relaxed)),
127        state: core::sync::atomic::AtomicU8::new(crate::process::TaskState::Ready as u8),
128        priority: parent.priority,
129        context: SyncUnsafeCell::new(context),
130        resume_kind: SyncUnsafeCell::new(crate::process::task::ResumeKind::RetFrame),
131        interrupt_rsp: core::sync::atomic::AtomicU64::new(0),
132        kernel_stack,
133        user_stack: None,
134        name: "user-thread",
135        process: parent.process.clone(),
136        pending_signals: crate::process::signal::SignalSet::new(),
137        blocked_signals: parent.blocked_signals.clone(),
138        irq_signal_delivery_blocked: core::sync::atomic::AtomicBool::new(false),
139        signal_stack: SyncUnsafeCell::new(None),
140        itimers: crate::process::timer::ITimers::new(),
141        wake_pending: core::sync::atomic::AtomicBool::new(false),
142        wake_deadline_ns: core::sync::atomic::AtomicU64::new(0),
143        trampoline_entry: core::sync::atomic::AtomicU64::new(0),
144        trampoline_stack_top: core::sync::atomic::AtomicU64::new(0),
145        trampoline_arg0: core::sync::atomic::AtomicU64::new(0),
146        ticks: core::sync::atomic::AtomicU64::new(0),
147        sched_policy: SyncUnsafeCell::new(parent.sched_policy()),
148        home_cpu: core::sync::atomic::AtomicUsize::new(usize::MAX),
149        vruntime: core::sync::atomic::AtomicU64::new(parent.vruntime()),
150        fair_rq_generation: core::sync::atomic::AtomicU64::new(0),
151        fair_on_rq: core::sync::atomic::AtomicBool::new(false),
152        clear_child_tid: core::sync::atomic::AtomicU64::new(0),
153        user_fs_base: core::sync::atomic::AtomicU64::new(tls_base),
154        fpu_state: SyncUnsafeCell::new(child_fpu),
155        xcr0_mask: core::sync::atomic::AtomicU64::new(parent.xcr0_mask.load(Ordering::Relaxed)),
156        rt_link: intrusive_collections::LinkedListLink::new(),
157    });
158
159    // CpuContext initial stack layout: r15, r14, r13(arg), r12(entry), rbp, rbx, ret
160    // Seed r13 with bootstrap context pointer for `thread_child_start`.
161    unsafe {
162        let ctx = &mut *task.context.get();
163        let frame = ctx.saved_rsp as *mut u64;
164        *frame.add(2) = Box::into_raw(bootstrap_ctx) as u64;
165    }
166
167    task.seed_interrupt_frame(interrupt_frame);
168
169    Ok(task)
170}
171
172/// SYS_GETPID (311): Return current process ID.
173///
174/// In Strat9, each task has a unique ID, so getpid returns the TaskId.
175pub fn sys_getpid() -> Result<u64, SyscallError> {
176    current_task_clone()
177        .map(|task| task.tgid as u64)
178        .ok_or(SyscallError::Fault)
179}
180
181/// SYS_GETTID (312): Return current thread ID.
182///
183/// In the current single-threaded silo model, TID == PID.
184pub fn sys_gettid() -> Result<u64, SyscallError> {
185    current_tid()
186        .map(|tid| tid as u64)
187        .ok_or(SyscallError::Fault)
188}
189
190/// SYS_THREAD_CREATE (341): create a userspace thread sharing current process resources.
191pub fn sys_thread_create(
192    frame: &SyscallFrame,
193    entry: u64,
194    stack_top: u64,
195    arg0: u64,
196    flags: u64,
197    tls_base: u64,
198) -> Result<u64, SyscallError> {
199    const USER_TOP_EXCLUSIVE: u64 = 0x0000_8000_0000_0000;
200
201    if flags != 0 {
202        return Err(SyscallError::InvalidArgument);
203    }
204
205    if entry == 0
206        || stack_top == 0
207        || entry >= USER_TOP_EXCLUSIVE
208        || stack_top >= USER_TOP_EXCLUSIVE
209        || (stack_top & 0xF) != 0
210    {
211        return Err(SyscallError::InvalidArgument);
212    }
213
214    let parent = current_task_clone().ok_or(SyscallError::Fault)?;
215    if parent.is_kernel() {
216        return Err(SyscallError::PermissionDenied);
217    }
218
219    let user_ctx = Box::new(ThreadUserContext {
220        entry,
221        stack_top,
222        arg0,
223        user_cs: frame.iret_cs,
224        user_rflags: frame.iret_rflags | (1 << 9),
225        user_ss: frame.iret_ss,
226    });
227
228    let child = build_user_thread_task(&parent, user_ctx, tls_base)?;
229    let tid = child.tid as u64;
230    add_task_with_parent(child, parent.id);
231    Ok(tid)
232}
233
234/// SYS_THREAD_JOIN (342): wait for a thread created by the current task.
235pub fn sys_thread_join(tid: u64, status_ptr: u64, flags: u64) -> Result<u64, SyscallError> {
236    if flags != 0 {
237        return Err(SyscallError::InvalidArgument);
238    }
239
240    let wait_tid = u32::try_from(tid).map_err(|_| SyscallError::InvalidArgument)?;
241    let current = current_task_clone().ok_or(SyscallError::Fault)?;
242    if wait_tid == current.tid {
243        return Err(SyscallError::InvalidArgument);
244    }
245
246    let parent_id = current_task_id().ok_or(SyscallError::Fault)?;
247    let child_id = get_child_task_id_by_tid(parent_id, wait_tid).ok_or(SyscallError::NotFound)?;
248
249    loop {
250        match crate::process::try_wait_child(parent_id, Some(child_id)) {
251            WaitChildResult::Reaped { status, .. } => {
252                if status_ptr != 0 {
253                    let out = crate::memory::UserSliceWrite::new(status_ptr, 4)
254                        .map_err(|_| SyscallError::Fault)?;
255                    out.copy_from(&(status as i32).to_ne_bytes());
256                }
257                return Ok(wait_tid as u64);
258            }
259            WaitChildResult::NoChildren => return Err(SyscallError::NotFound),
260            WaitChildResult::StillRunning => block_current_task(),
261        }
262    }
263}
264
265/// SYS_THREAD_EXIT (343): exit only the current thread.
266pub fn sys_thread_exit(exit_code: u64) -> Result<u64, SyscallError> {
267    let code = i32::try_from(exit_code).map_err(|_| SyscallError::InvalidArgument)?;
268    crate::process::scheduler::exit_current_task(code)
269}
270
271/// SYS_PROC_GETPPID/SYS_GETPPID (309): Return parent process ID.
272pub fn sys_getppid() -> Result<u64, SyscallError> {
273    let child = current_task_id().ok_or(SyscallError::Fault)?;
274    Ok(get_parent_pid(child).map(|p| p as u64).unwrap_or(0))
275}
276
277/// SYS_GETPGID (318): Return process group id for `pid` (`0` = caller).
278pub fn sys_getpgid(pid: i64) -> Result<u64, SyscallError> {
279    if pid < 0 {
280        return Err(SyscallError::InvalidArgument);
281    }
282    if pid == 0 {
283        return current_pgid()
284            .map(|pgid| pgid as u64)
285            .ok_or(SyscallError::Fault);
286    }
287    get_pgid_by_pid(pid as u32)
288        .map(|pgid| pgid as u64)
289        .ok_or(SyscallError::NotFound)
290}
291
292/// POSIX getpgrp wrapper (equivalent to getpgid(0)).
293pub fn sys_getpgrp() -> Result<u64, SyscallError> {
294    current_pgid()
295        .map(|pgid| pgid as u64)
296        .ok_or(SyscallError::Fault)
297}
298
299/// SYS_GETSID (332): Return session id for `pid` (`0` = caller).
300pub fn sys_getsid(pid: i64) -> Result<u64, SyscallError> {
301    if pid < 0 {
302        return Err(SyscallError::InvalidArgument);
303    }
304    if pid == 0 {
305        return crate::process::current_sid()
306            .map(|sid| sid as u64)
307            .ok_or(SyscallError::Fault);
308    }
309    get_sid_by_pid(pid as u32)
310        .map(|sid| sid as u64)
311        .ok_or(SyscallError::NotFound)
312}
313
314/// SYS_SETPGID (317): set process group id.
315pub fn sys_setpgid(pid: i64, pgid: i64) -> Result<u64, SyscallError> {
316    if pid < 0 || pgid < 0 {
317        return Err(SyscallError::InvalidArgument);
318    }
319    let caller = current_task_id().ok_or(SyscallError::Fault)?;
320    let target_pid = if pid == 0 { None } else { Some(pid as u32) };
321    let new_pgid = if pgid == 0 { None } else { Some(pgid as u32) };
322    let final_pgid = set_process_group(caller, target_pid, new_pgid)?;
323    Ok(final_pgid as u64)
324}
325
326/// SYS_SETSID (319): create a new session.
327pub fn sys_setsid() -> Result<u64, SyscallError> {
328    let caller = current_task_id().ok_or(SyscallError::Fault)?;
329    create_session(caller).map(|sid| sid as u64)
330}
331
332// ========== Credentials ==============================
333
334/// SYS_GETUID (335): Return real user id.
335pub fn sys_getuid() -> Result<u64, SyscallError> {
336    let task = current_task_clone().ok_or(SyscallError::Fault)?;
337    Ok(task.uid.load(Ordering::Relaxed) as u64)
338}
339
340/// SYS_GETEUID (336): Return effective user id.
341pub fn sys_geteuid() -> Result<u64, SyscallError> {
342    let task = current_task_clone().ok_or(SyscallError::Fault)?;
343    Ok(task.euid.load(Ordering::Relaxed) as u64)
344}
345
346/// SYS_GETGID (337): Return real group id.
347pub fn sys_getgid() -> Result<u64, SyscallError> {
348    let task = current_task_clone().ok_or(SyscallError::Fault)?;
349    Ok(task.gid.load(Ordering::Relaxed) as u64)
350}
351
352/// SYS_GETEGID (338): Return effective group id.
353pub fn sys_getegid() -> Result<u64, SyscallError> {
354    let task = current_task_clone().ok_or(SyscallError::Fault)?;
355    Ok(task.egid.load(Ordering::Relaxed) as u64)
356}
357
358/// SYS_SETUID (339): Set real and effective user id (simplified: no capabilities check).
359pub fn sys_setuid(uid: u64) -> Result<u64, SyscallError> {
360    if uid > u32::MAX as u64 {
361        return Err(SyscallError::InvalidArgument);
362    }
363    let task = current_task_clone().ok_or(SyscallError::Fault)?;
364    // Privileged (uid==0) can set anything; unprivileged can only set to current uid/euid.
365    let euid = task.euid.load(Ordering::Relaxed);
366    let cur_uid = task.uid.load(Ordering::Relaxed);
367    if euid != 0 && uid as u32 != cur_uid && uid as u32 != euid {
368        return Err(SyscallError::PermissionDenied);
369    }
370    task.uid.store(uid as u32, Ordering::Relaxed);
371    task.euid.store(uid as u32, Ordering::Relaxed);
372    Ok(0)
373}
374
375/// SYS_SETGID (340): Set real and effective group id (simplified).
376pub fn sys_setgid(gid: u64) -> Result<u64, SyscallError> {
377    if gid > u32::MAX as u64 {
378        return Err(SyscallError::InvalidArgument);
379    }
380    let task = current_task_clone().ok_or(SyscallError::Fault)?;
381    let euid = task.euid.load(Ordering::Relaxed);
382    let cur_gid = task.gid.load(Ordering::Relaxed);
383    let egid = task.egid.load(Ordering::Relaxed);
384    if euid != 0 && gid as u32 != cur_gid && gid as u32 != egid {
385        return Err(SyscallError::PermissionDenied);
386    }
387    task.gid.store(gid as u32, Ordering::Relaxed);
388    task.egid.store(gid as u32, Ordering::Relaxed);
389    Ok(0)
390}
391
392// ========== Thread lifecycle helpers ================================================================================================================================================================
393
394/// SYS_SET_TID_ADDRESS (333): Store `tidptr` in the task; return current TID.
395///
396/// The kernel will write 0 to `tidptr` and call futex_wake when the thread
397/// exits. This is the mechanism used by pthreads for thread join.
398pub fn sys_set_tid_address(tidptr: u64) -> Result<u64, SyscallError> {
399    let task = current_task_clone().ok_or(SyscallError::Fault)?;
400    task.clear_child_tid.store(tidptr, Ordering::Relaxed);
401    Ok(task.tid as u64)
402}
403
404/// SYS_EXIT_GROUP (334): Exit all threads in the thread group.
405pub fn sys_exit_group(exit_code: u64) -> Result<u64, SyscallError> {
406    let current = current_task_clone().ok_or(SyscallError::Fault)?;
407    for sibling_id in get_task_ids_in_tgid(current.tgid) {
408        if sibling_id != current.id {
409            let _ = kill_task(sibling_id);
410        }
411    }
412
413    // Diverges : never returns.
414    crate::process::scheduler::exit_current_task(exit_code as i32)
415}
416
417// ========== Architecture-specific ==========================================================================================================================================================================
418
419/// x86_64 arch_prctl operation codes (Linux-compatible).
420const ARCH_SET_GS: u64 = 0x1001;
421const ARCH_SET_FS: u64 = 0x1002;
422const ARCH_GET_FS: u64 = 0x1003;
423const ARCH_GET_GS: u64 = 0x1004;
424
425/// MSR addresses for FS/GS base.
426const MSR_FS_BASE: u32 = 0xC000_0100;
427const MSR_GS_BASE: u32 = 0xC000_0101;
428
429/// SYS_ARCH_PRCTL (350): Architecture-specific process settings.
430///
431/// Supported operations:
432/// - `ARCH_SET_FS` (0x1002): Set user-space FS.base (Thread Local Storage).
433/// - `ARCH_GET_FS` (0x1003): Read current FS.base into *arg.
434pub fn sys_arch_prctl(code: u64, addr: u64) -> Result<u64, SyscallError> {
435    let task = current_task_clone().ok_or(SyscallError::Fault)?;
436    match code {
437        ARCH_SET_FS => {
438            // Store in task struct (so it survives context switches).
439            task.user_fs_base.store(addr, Ordering::Relaxed);
440            // Write to MSR immediately : we are the current task.
441            unsafe { wrmsr(MSR_FS_BASE, addr) };
442            Ok(0)
443        }
444        ARCH_GET_FS => {
445            let base = task.user_fs_base.load(Ordering::Relaxed);
446            // Write the 8-byte value back to the provided user pointer.
447            use crate::memory::UserSliceWrite;
448            let out = UserSliceWrite::new(addr, 8).map_err(|_| SyscallError::Fault)?;
449            out.copy_from(&base.to_ne_bytes());
450            Ok(0)
451        }
452        ARCH_SET_GS => {
453            // GS slot not separately stored for now.
454            unsafe { wrmsr(MSR_GS_BASE, addr) };
455            Ok(0)
456        }
457        ARCH_GET_GS => {
458            let base = unsafe { rdmsr(MSR_GS_BASE) };
459            use crate::memory::UserSliceWrite;
460            let out = UserSliceWrite::new(addr, 8).map_err(|_| SyscallError::Fault)?;
461            out.copy_from(&base.to_ne_bytes());
462            Ok(0)
463        }
464        _ => Err(SyscallError::InvalidArgument),
465    }
466}
467
468/// Write a 64-bit value to an MSR.
469///
470/// # Safety
471/// Must only be called with valid MSR addresses. Misuse causes a #GP.
472#[inline]
473unsafe fn wrmsr(msr: u32, value: u64) {
474    let lo = value as u32;
475    let hi = (value >> 32) as u32;
476    unsafe {
477        core::arch::asm!(
478            "wrmsr",
479            in("ecx") msr,
480            in("eax") lo,
481            in("edx") hi,
482            options(nostack, preserves_flags),
483        );
484    }
485}
486
487/// Read a 64-bit value from an MSR.
488///
489/// # Safety
490/// Must only be called with valid MSR addresses.
491#[inline]
492unsafe fn rdmsr(msr: u32) -> u64 {
493    let lo: u32;
494    let hi: u32;
495    unsafe {
496        core::arch::asm!(
497            "rdmsr",
498            in("ecx") msr,
499            out("eax") lo,
500            out("edx") hi,
501            options(nostack, preserves_flags),
502        );
503    }
504    lo as u64 | ((hi as u64) << 32)
505}
506
507// ========== tgkill ==================================================
508
509/// SYS_TGKILL (352): Send a signal to a specific thread in a thread group.
510///
511/// In the current single-threaded model, tgid and tid both map to a single
512/// task (pid == tid == tgid). We verify both match before delivering.
513pub fn sys_tgkill(tgid: u64, tid: u64, signum: u64) -> Result<u64, SyscallError> {
514    use crate::process::{get_task_by_pid, send_signal, Signal};
515
516    // Sanity check.
517    if signum as u32 >= 64 {
518        return Err(SyscallError::InvalidArgument);
519    }
520
521    // Resolve tgid → task.
522    let task = get_task_by_pid(tgid as u32).ok_or(SyscallError::NotFound)?;
523
524    // Verify the tid matches (single-threaded: task.tid == task.pid).
525    if task.tid as u64 != tid && task.pid as u64 != tid {
526        return Err(SyscallError::NotFound);
527    }
528
529    if signum == 0 {
530        return Ok(0); // existence check only
531    }
532
533    let sig = Signal::from_u32(signum as u32).ok_or(SyscallError::InvalidArgument)?;
534    send_signal(task.id, sig)?;
535    Ok(0)
536}