1use crate::{
27 memory::{resolve_handle, AddressSpace, EffectiveMapping, VmaPageSize},
28 process::{
29 current_task_clone,
30 scheduler::add_task_with_parent,
31 signal::{SigActionData, SigStack, SignalSet},
32 task::{CpuContext, KernelStack, Pid, SyncUnsafeCell, Task},
33 TaskId, TaskState,
34 },
35 syscall::{error::SyscallError, SyscallFrame},
36};
37use alloc::{boxed::Box, sync::Arc};
38use core::{
39 mem::offset_of,
40 sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering},
41};
42use x86_64::structures::paging::mapper::TranslateResult;
43pub struct ForkResult {
45 pub child_pid: Pid,
46}
47
48#[inline]
50fn local_invlpg(vaddr: u64) {
51 crate::arch::x86_64::tlb::local_page(x86_64::VirtAddr::new(vaddr));
54}
55
56#[repr(C)]
57#[derive(Clone, Copy)]
58struct ForkUserContext {
59 r15: u64,
60 r14: u64,
61 r13: u64,
62 r12: u64,
63 rbp: u64,
64 rbx: u64,
65 r11: u64,
66 r10: u64,
67 r9: u64,
68 r8: u64,
69 rsi: u64,
70 rdi: u64,
71 rdx: u64,
72 rcx: u64,
73 user_rip: u64,
74 user_cs: u64,
75 user_rflags: u64,
76 user_rsp: u64,
77 user_ss: u64,
78}
79
80const OFF_R15: usize = offset_of!(ForkUserContext, r15);
81const OFF_R14: usize = offset_of!(ForkUserContext, r14);
82const OFF_R13: usize = offset_of!(ForkUserContext, r13);
83const OFF_R12: usize = offset_of!(ForkUserContext, r12);
84const OFF_RBP: usize = offset_of!(ForkUserContext, rbp);
85const OFF_RBX: usize = offset_of!(ForkUserContext, rbx);
86const OFF_R11: usize = offset_of!(ForkUserContext, r11);
87const OFF_R10: usize = offset_of!(ForkUserContext, r10);
88const OFF_R9: usize = offset_of!(ForkUserContext, r9);
89const OFF_R8: usize = offset_of!(ForkUserContext, r8);
90const OFF_RSI: usize = offset_of!(ForkUserContext, rsi);
91const OFF_RDI: usize = offset_of!(ForkUserContext, rdi);
92const OFF_RDX: usize = offset_of!(ForkUserContext, rdx);
93const OFF_RCX: usize = offset_of!(ForkUserContext, rcx);
94const OFF_USER_RIP: usize = offset_of!(ForkUserContext, user_rip);
95const OFF_USER_CS: usize = offset_of!(ForkUserContext, user_cs);
96const OFF_USER_RFLAGS: usize = offset_of!(ForkUserContext, user_rflags);
97const OFF_USER_RSP: usize = offset_of!(ForkUserContext, user_rsp);
98const OFF_USER_SS: usize = offset_of!(ForkUserContext, user_ss);
99
100extern "C" fn fork_child_start(ctx_ptr: u64) -> ! {
102 let boxed = unsafe { Box::from_raw(ctx_ptr as *mut ForkUserContext) };
103 let ctx = *boxed;
104 unsafe { fork_iret_from_ctx(&ctx as *const ForkUserContext) }
105}
106
107#[unsafe(naked)]
109unsafe extern "C" fn fork_iret_from_ctx(_ctx: *const ForkUserContext) -> ! {
110 core::arch::naked_asm!(
111 "cli",
113 "mov rsi, rdi",
114
115 "mov r8, [rsi + {off_user_ss}]",
118 "push r8", "mov r8, [rsi + {off_user_rsp}]",
120 "push r8", "mov r8, [rsi + {off_user_rflags}]",
122 "push r8", "mov r8, [rsi + {off_user_cs}]",
124 "push r8", "mov r8, [rsi + {off_user_rip}]",
126 "push r8", "mov r15, [rsi + {off_r15}]",
130 "mov r14, [rsi + {off_r14}]",
131 "mov r13, [rsi + {off_r13}]",
132 "mov r12, [rsi + {off_r12}]",
133 "mov rbp, [rsi + {off_rbp}]",
134 "mov rbx, [rsi + {off_rbx}]",
135 "mov r11, [rsi + {off_r11}]",
136 "mov r10, [rsi + {off_r10}]",
137 "mov r9, [rsi + {off_r9}]",
138 "mov r8, [rsi + {off_r8}]", "mov rdx, [rsi + {off_rdx}]",
140 "mov rcx, [rsi + {off_rcx}]",
141 "mov rdi, [rsi + {off_rdi}]",
142 "mov rax, 0", "mov rsi, [rsi + {off_rsi}]", "swapgs",
145 "iretq",
146 off_r15 = const OFF_R15,
147 off_r14 = const OFF_R14,
148 off_r13 = const OFF_R13,
149 off_r12 = const OFF_R12,
150 off_rbp = const OFF_RBP,
151 off_rbx = const OFF_RBX,
152 off_r11 = const OFF_R11,
153 off_r10 = const OFF_R10,
154 off_r9 = const OFF_R9,
155 off_r8 = const OFF_R8,
156 off_rsi = const OFF_RSI,
157 off_rdi = const OFF_RDI,
158 off_rdx = const OFF_RDX,
159 off_rcx = const OFF_RCX,
160 off_user_rip = const OFF_USER_RIP,
161 off_user_cs = const OFF_USER_CS,
162 off_user_rflags = const OFF_USER_RFLAGS,
163 off_user_rsp = const OFF_USER_RSP,
164 off_user_ss = const OFF_USER_SS,
165 );
166}
167
168fn build_child_task(
170 parent: &Arc<Task>,
171 child_as: Arc<AddressSpace>,
172 bootstrap_ctx: Box<ForkUserContext>,
173) -> Result<Arc<Task>, SyscallError> {
174 let kernel_stack =
175 KernelStack::allocate(Task::DEFAULT_STACK_SIZE).map_err(|_| SyscallError::OutOfMemory)?;
176 let context = CpuContext::new(fork_child_start as *const () as u64, &kernel_stack);
177
178 let parent_caps = unsafe { (&*parent.process.capabilities.get()).clone() };
179 let parent_fd = unsafe { (&*parent.process.fd_table.get()).clone_for_fork() };
180 let parent_blocked = parent.blocked_signals.clone();
181 let parent_actions: [SigActionData; 64] = unsafe { *parent.process.signal_actions.get() };
182 let parent_sigstack: Option<SigStack> = unsafe { *parent.signal_stack.get() };
183 let interrupt_frame = crate::syscall::SyscallFrame {
184 r15: bootstrap_ctx.r15,
185 r14: bootstrap_ctx.r14,
186 r13: bootstrap_ctx.r13,
187 r12: bootstrap_ctx.r12,
188 rbp: bootstrap_ctx.rbp,
189 rbx: bootstrap_ctx.rbx,
190 r11: bootstrap_ctx.r11,
191 r10: bootstrap_ctx.r10,
192 r9: bootstrap_ctx.r9,
193 r8: bootstrap_ctx.r8,
194 rsi: bootstrap_ctx.rsi,
195 rdi: bootstrap_ctx.rdi,
196 rdx: bootstrap_ctx.rdx,
197 rcx: bootstrap_ctx.rcx,
198 rax: 0,
199 iret_rip: bootstrap_ctx.user_rip,
200 iret_cs: bootstrap_ctx.user_cs,
201 iret_rflags: bootstrap_ctx.user_rflags,
202 iret_rsp: bootstrap_ctx.user_rsp,
203 iret_ss: bootstrap_ctx.user_ss,
204 };
205
206 let (pid, tid, tgid) = Task::allocate_process_ids();
207 child_as.set_owner_pid(pid);
208 let task = Arc::new(Task {
209 id: TaskId::new(),
210 pid,
211 tid,
212 tgid,
213 pgid: AtomicU32::new(parent.pgid.load(Ordering::Relaxed)),
214 sid: AtomicU32::new(parent.sid.load(Ordering::Relaxed)),
215 uid: AtomicU32::new(parent.uid.load(Ordering::Relaxed)),
216 euid: AtomicU32::new(parent.euid.load(Ordering::Relaxed)),
217 gid: AtomicU32::new(parent.gid.load(Ordering::Relaxed)),
218 egid: AtomicU32::new(parent.egid.load(Ordering::Relaxed)),
219 state: core::sync::atomic::AtomicU8::new(TaskState::Ready as u8),
220 priority: parent.priority,
221 context: SyncUnsafeCell::new(context),
222 resume_kind: SyncUnsafeCell::new(crate::process::task::ResumeKind::RetFrame),
223 interrupt_rsp: AtomicU64::new(0),
224 kernel_stack,
225 user_stack: None,
226
227 name: "fork-child",
228 process: alloc::sync::Arc::new(crate::process::process::Process {
229 pid,
230 address_space: crate::process::task::SyncUnsafeCell::new(child_as),
231 address_space_lock: crate::sync::SpinLock::new(()),
232 fd_table: crate::process::task::SyncUnsafeCell::new(parent_fd),
233 capabilities: crate::process::task::SyncUnsafeCell::new(parent_caps),
234 signal_actions: crate::process::task::SyncUnsafeCell::new(parent_actions),
235 brk: core::sync::atomic::AtomicU64::new(
236 parent
237 .process
238 .brk
239 .load(core::sync::atomic::Ordering::Relaxed),
240 ),
241 mmap_hint: core::sync::atomic::AtomicU64::new(
242 parent
243 .process
244 .mmap_hint
245 .load(core::sync::atomic::Ordering::Relaxed),
246 ),
247 cwd: crate::process::task::SyncUnsafeCell::new(
248 unsafe { &*parent.process.cwd.get() }.clone(),
249 ),
250 cwd_fd: core::sync::atomic::AtomicU64::new(
252 parent
253 .process
254 .cwd_fd
255 .load(core::sync::atomic::Ordering::Relaxed),
256 ),
257 umask: core::sync::atomic::AtomicU32::new(
258 parent
259 .process
260 .umask
261 .load(core::sync::atomic::Ordering::Relaxed),
262 ),
263 }),
264 pending_signals: SignalSet::new(),
266 blocked_signals: parent_blocked,
268 irq_signal_delivery_blocked: AtomicBool::new(false),
269 signal_stack: SyncUnsafeCell::new(parent_sigstack),
270 itimers: crate::process::timer::ITimers::new(),
271 wake_pending: AtomicBool::new(false),
272 wake_deadline_ns: AtomicU64::new(0),
273 trampoline_entry: AtomicU64::new(0),
274 trampoline_stack_top: AtomicU64::new(0),
275 trampoline_arg0: AtomicU64::new(0),
276 ticks: AtomicU64::new(0),
277 sched_policy: SyncUnsafeCell::new(parent.sched_policy()),
278 home_cpu: AtomicUsize::new(usize::MAX),
279 vruntime: AtomicU64::new(parent.vruntime()),
280 fair_rq_generation: AtomicU64::new(0),
281 fair_on_rq: AtomicBool::new(false),
282 clear_child_tid: AtomicU64::new(0),
284 user_fs_base: AtomicU64::new(0),
288 fpu_state: {
289 let parent_fpu = unsafe { &*parent.fpu_state.get() };
290 let mut child_fpu = crate::process::task::ExtendedState::new();
291 child_fpu.copy_from(parent_fpu);
292 SyncUnsafeCell::new(child_fpu)
293 },
294 xcr0_mask: AtomicU64::new(parent.xcr0_mask.load(core::sync::atomic::Ordering::Relaxed)),
295 rt_link: intrusive_collections::LinkedListLink::new(),
296 });
297
298 unsafe {
300 let ctx = &mut *task.context.get();
301 let frame = ctx.saved_rsp as *mut u64;
302 *frame.add(2) = Box::into_raw(bootstrap_ctx) as u64;
303 }
304
305 task.seed_interrupt_frame(interrupt_frame);
306
307 Ok(task)
308}
309
310pub fn sys_fork(frame: &SyscallFrame) -> Result<ForkResult, SyscallError> {
312 let parent = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
313
314 if parent.is_kernel() {
316 log::warn!("fork: attempt to fork kernel thread '{}'", parent.name);
317 return Err(SyscallError::PermissionDenied);
318 }
319
320 let parent_as = parent.process.address_space_arc();
325
326 if !parent_as.has_user_mappings() {
328 log::warn!(
329 "fork: attempt to fork task '{}' with no user mappings",
330 parent.name
331 );
332 return Err(SyscallError::InvalidArgument);
333 }
334
335 let child_as = parent_as
336 .clone_cow()
337 .map_err(|_| SyscallError::OutOfMemory)?;
338
339 let child_user_ctx = Box::new(ForkUserContext {
340 r15: frame.r15,
341 r14: frame.r14,
342 r13: frame.r13,
343 r12: frame.r12,
344 rbp: frame.rbp,
345 rbx: frame.rbx,
346 r11: frame.r11,
347 r10: frame.r10,
348 r9: frame.r9,
349 r8: frame.r8,
350 rsi: frame.rsi,
351 rdi: frame.rdi,
352 rdx: frame.rdx,
353 rcx: frame.rcx,
354 user_rip: frame.iret_rip,
355 user_cs: frame.iret_cs,
356 user_rflags: frame.iret_rflags,
357 user_rsp: frame.iret_rsp,
358 user_ss: frame.iret_ss,
359 });
360
361 let child_task = build_child_task(&parent, child_as, child_user_ctx)?;
362 let child_pid = child_task.pid;
363 add_task_with_parent(child_task, parent.id);
364
365 Ok(ForkResult { child_pid })
366}
367
368pub fn handle_cow_fault(virt_addr: u64, address_space: &AddressSpace) -> Result<(), &'static str> {
372 use crate::memory::paging::BuddyFrameAllocator;
373 use x86_64::{
374 structures::paging::{Mapper, Page, PageTableFlags, Size2MiB, Size4KiB, Translate},
375 VirtAddr,
376 };
377
378 let mapping = address_space
379 .effective_mapping_containing(virt_addr)
380 .ok_or("Page not mapped")?;
381 let page_start = mapping.start;
382 let page = Page::<Size4KiB>::containing_address(VirtAddr::new(page_start));
383
384 let mut mapper = unsafe { address_space.mapper() };
386
387 let (phys_frame_addr, flags) = match mapper.translate(VirtAddr::new(page_start)) {
389 TranslateResult::Mapped {
390 frame,
391 offset: _,
392 flags,
393 } => (frame.start_address(), flags),
394 _ => return Err("Page not mapped"),
395 };
396
397 const COW_BIT: PageTableFlags = PageTableFlags::BIT_9;
399
400 if !flags.contains(COW_BIT) {
401 return Err("Not a COW page");
402 }
403
404 let old_handle = mapping.handle;
405 let refcount = crate::memory::cow::handle_get_refcount(old_handle);
406
407 if refcount == 1 {
408 let new_flags = (flags | PageTableFlags::WRITABLE) & !COW_BIT;
410
411 unsafe {
412 match mapping.page_size {
413 VmaPageSize::Small => mapper
414 .update_flags(page, new_flags)
415 .map_err(|_| "Failed to update 4K flags")?
416 .flush(),
417 VmaPageSize::Huge => mapper
418 .update_flags(
419 Page::<Size2MiB>::containing_address(VirtAddr::new(page_start)),
420 new_flags | PageTableFlags::HUGE_PAGE,
421 )
422 .map_err(|_| "Failed to update 2M flags")?
423 .flush(),
424 }
425 }
426 let tracked_flags = match mapping.page_size {
427 VmaPageSize::Small => new_flags,
428 VmaPageSize::Huge => new_flags | PageTableFlags::HUGE_PAGE,
429 };
430 let _ = address_space.update_effective_mapping_flags(page_start, tracked_flags);
431 local_invlpg(virt_addr);
433 return Ok(());
434 }
435
436 let mut frame_allocator = BuddyFrameAllocator;
438 let order = match mapping.page_size {
439 VmaPageSize::Small => 0,
440 VmaPageSize::Huge => 9,
441 };
442 let copy_bytes = mapping.page_size.bytes() as usize;
443 let new_frame = crate::sync::with_irqs_disabled(|token| {
444 if order == 0 {
445 crate::memory::allocate_frame(token)
446 } else {
447 crate::memory::allocate_phys_contiguous(token, order)
448 }
449 })
450 .map_err(|_| "OOM during COW copy")?;
451
452 unsafe {
454 let src = crate::memory::phys_to_virt(phys_frame_addr.as_u64()) as *const u8;
455 let dst = crate::memory::phys_to_virt(new_frame.start_address.as_u64()) as *mut u8;
456 core::ptr::copy_nonoverlapping(src, dst, copy_bytes);
457 }
458
459 let new_flags = (flags | PageTableFlags::WRITABLE) & !COW_BIT;
461 let tracked_flags = match mapping.page_size {
462 VmaPageSize::Small => new_flags,
463 VmaPageSize::Huge => new_flags | PageTableFlags::HUGE_PAGE,
464 };
465 let new_handle = resolve_handle(new_frame.start_address);
466
467 let remap_res: Result<(), &'static str> = match mapping.page_size {
469 VmaPageSize::Small => {
470 let old_unmapped = mapper
471 .unmap(page)
472 .map_err(|_| "Failed to unmap old 4K COW frame")?
473 .0;
474 debug_assert_eq!(old_unmapped.start_address(), phys_frame_addr);
475 unsafe {
476 mapper.map_to(
477 page,
478 x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(
479 new_frame.start_address,
480 ),
481 new_flags,
482 &mut frame_allocator,
483 )
484 }
485 .map(|flush| flush.flush())
486 .map_err(|_| "Failed to map new 4K COW frame")
487 }
488 VmaPageSize::Huge => {
489 let huge_page = Page::<Size2MiB>::containing_address(VirtAddr::new(page_start));
490 let old_unmapped = mapper
491 .unmap(huge_page)
492 .map_err(|_| "Failed to unmap old 2M COW frame")?
493 .0;
494 debug_assert_eq!(old_unmapped.start_address(), phys_frame_addr);
495 unsafe {
496 mapper.map_to(
497 huge_page,
498 x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(
499 new_frame.start_address,
500 ),
501 tracked_flags,
502 &mut frame_allocator,
503 )
504 }
505 .map(|flush| flush.flush())
506 .map_err(|_| "Failed to map new 2M COW frame")
507 }
508 };
509 if remap_res.is_err() {
510 match mapping.page_size {
511 VmaPageSize::Small => unsafe {
512 let _ = mapper.map_to(
513 page,
514 x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(
515 phys_frame_addr,
516 ),
517 flags,
518 &mut frame_allocator,
519 );
520 },
521 VmaPageSize::Huge => unsafe {
522 let huge_page = Page::<Size2MiB>::containing_address(VirtAddr::new(page_start));
523 let _ = mapper.map_to(
524 huge_page,
525 x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(
526 phys_frame_addr,
527 ),
528 flags,
529 &mut frame_allocator,
530 );
531 },
532 }
533 crate::sync::with_irqs_disabled(|token| {
534 if order == 0 {
535 crate::memory::free_frame(token, new_frame);
536 } else {
537 crate::memory::free_phys_contiguous(token, new_frame, order);
538 }
539 });
540 return Err(remap_res.err().unwrap_or("Failed to map new COW frame"));
541 }
542
543 crate::memory::cow::handle_init_ref(new_handle);
547
548 if address_space
549 .register_effective_mapping(EffectiveMapping {
550 start: page_start,
551 cap_id: mapping.cap_id,
552 handle: new_handle,
553 flags: tracked_flags,
554 page_size: mapping.page_size,
555 })
556 .is_err()
557 {
558 match mapping.page_size {
559 VmaPageSize::Small => {
560 let _ = mapper.unmap(page);
561 let _ = unsafe {
562 mapper.map_to(
563 page,
564 x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(
565 phys_frame_addr,
566 ),
567 flags,
568 &mut frame_allocator,
569 )
570 }
571 .map(|flush| flush.flush());
572 }
573 VmaPageSize::Huge => {
574 let huge_page = Page::<Size2MiB>::containing_address(VirtAddr::new(page_start));
575 let _ = mapper.unmap(huge_page);
576 let _ = unsafe {
577 mapper.map_to(
578 huge_page,
579 x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(
580 phys_frame_addr,
581 ),
582 flags,
583 &mut frame_allocator,
584 )
585 }
586 .map(|flush| flush.flush());
587 }
588 }
589 crate::sync::with_irqs_disabled(|token| {
590 if order == 0 {
591 crate::memory::free_frame(token, new_frame);
592 } else {
593 crate::memory::free_phys_contiguous(token, new_frame, order);
594 }
595 });
596 return Err("Failed to track new COW mapping");
597 }
598
599 local_invlpg(virt_addr);
601
602 Ok(())
607}