1use crate::{
5 memory::{AddressSpace, UserSliceRead, VmaFlags, VmaPageSize, VmaType},
6 process::{
7 current_task_clone,
8 elf::{load_elf_image, LoadedElfInfo, USER_STACK_BASE, USER_STACK_PAGES, USER_STACK_TOP},
9 get_task_ids_in_tgid,
10 },
11 syscall::{error::SyscallError, SyscallFrame},
12 vfs,
13};
14use alloc::vec::Vec;
15
16const AT_NULL: u64 = 0;
17const AT_PHDR: u64 = 3;
18const AT_PHENT: u64 = 4;
19const AT_PHNUM: u64 = 5;
20const AT_PAGESZ: u64 = 6;
21const AT_BASE: u64 = 7;
22const AT_ENTRY: u64 = 9;
23const AT_RANDOM: u64 = 25;
24const AT_EXECFN: u64 = 31;
25
26fn read_exec_image(path: &str) -> Result<Option<Vec<u8>>, SyscallError> {
28 if crate::vfs::get_initfs_file_bytes(path).is_some() {
29 return Ok(None);
30 }
31
32 let fd = vfs::open(path, vfs::OpenFlags::READ)?;
33
34 const MAX_EXEC_SIZE: usize = 64 * 1024 * 1024;
35 let mut elf_data = Vec::new();
36 let mut buf = [0u8; 4096];
37 loop {
38 match vfs::read(fd, &mut buf) {
39 Ok(n) => {
40 if n == 0 {
41 break;
42 }
43 if elf_data.len() + n > MAX_EXEC_SIZE {
44 let _ = vfs::close(fd);
45 return Err(SyscallError::OutOfMemory);
46 }
47 elf_data.extend_from_slice(&buf[..n]);
48 }
49 Err(e) => {
50 let _ = vfs::close(fd);
51 return Err(e);
52 }
53 }
54 }
55 let _ = vfs::close(fd);
56
57 Ok(Some(elf_data))
58}
59
60pub fn sys_execve(
75 frame: &mut SyscallFrame,
76 path_ptr: u64,
77 argv_ptr: u64,
78 envp_ptr: u64,
79) -> Result<u64, SyscallError> {
80 let current = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
81
82 if get_task_ids_in_tgid(current.tgid).len() > 1 {
86 return Err(SyscallError::NotSupported);
87 }
88
89 let mut path_buf = [0u8; 4096];
90 let path_slice = UserSliceRead::new(path_ptr, 4096).map_err(|_| SyscallError::Fault)?;
91
92 let mut len = 0;
93
94 loop {
95 if len >= 4096 {
96 return Err(SyscallError::ArgumentListTooLong);
97 } let b = path_slice.read_u8(len).map_err(|_| SyscallError::Fault)?;
99 if b == 0 {
100 break;
101 }
102 path_buf[len] = b;
103 len += 1;
104 }
105 let path_str =
106 core::str::from_utf8(&path_buf[..len]).map_err(|_| SyscallError::InvalidArgument)?;
107
108 let owned_elf_data = read_exec_image(path_str)?;
109 let elf_data = owned_elf_data
110 .as_deref()
111 .or_else(|| crate::vfs::get_initfs_file_bytes(path_str))
112 .ok_or(SyscallError::NotFound)?;
113
114 if elf_data.len() < 4 {
115 return Err(SyscallError::ExecFormatError);
116 }
117
118 let new_as = AddressSpace::new_user().map_err(|_| SyscallError::OutOfMemory)?;
119 let new_as_arc = alloc::sync::Arc::new(new_as);
120 new_as_arc.set_owner_pid(current.pid);
121
122 let load_info =
123 load_elf_image(elf_data, &new_as_arc).map_err(|_| SyscallError::ExecFormatError)?;
124
125 let stack_flags = VmaFlags {
126 readable: true,
127 writable: true,
128 executable: false,
129 user_accessible: true,
130 };
131 new_as_arc
132 .map_region(
133 USER_STACK_BASE,
134 USER_STACK_PAGES,
135 stack_flags,
136 VmaType::Stack,
137 VmaPageSize::Small,
138 )
139 .map_err(|_| SyscallError::OutOfMemory)?;
140
141 let sp = setup_user_stack(
142 &new_as_arc,
143 argv_ptr,
144 envp_ptr,
145 &load_info,
146 path_str.as_bytes(),
147 )?;
148
149 let mut new_fs_base = 0u64;
151 if load_info.tls_memsz > 0 {
152 let tls_align = core::cmp::max(load_info.tls_align, 8).next_power_of_two();
153 let aligned_memsz = (load_info.tls_memsz + tls_align - 1) & !(tls_align - 1);
154 let total_size = aligned_memsz + 8;
155 let n_pages = ((total_size + 4095) / 4096) as usize;
156 let tls_flags = VmaFlags {
157 readable: true,
158 writable: true,
159 executable: false,
160 user_accessible: true,
161 };
162 let tls_base = new_as_arc
163 .find_free_vma_range(0x7FFF_E000_0000, n_pages, VmaPageSize::Small)
164 .ok_or(SyscallError::OutOfMemory)?;
165 new_as_arc
166 .map_region(
167 tls_base,
168 n_pages,
169 tls_flags,
170 VmaType::Anonymous,
171 VmaPageSize::Small,
172 )
173 .map_err(|_| SyscallError::OutOfMemory)?;
174 if load_info.tls_filesz > 0 && load_info.tls_vaddr != 0 {
175 let src_vaddr = load_info.tls_vaddr;
176 let mut off = 0u64;
177 let mut tmp = [0u8; 256];
178 while off < load_info.tls_filesz {
179 let chunk = core::cmp::min(256, (load_info.tls_filesz - off) as usize);
180 crate::process::elf::read_user_mapped_bytes_pub(
181 &new_as_arc,
182 src_vaddr + off,
183 &mut tmp[..chunk],
184 )
185 .map_err(|_| SyscallError::Fault)?;
186 crate::process::elf::write_user_mapped_bytes_pub(
187 &new_as_arc,
188 tls_base + off,
189 &tmp[..chunk],
190 )
191 .map_err(|_| SyscallError::Fault)?;
192 off += chunk as u64;
193 }
194 }
195 let tp = tls_base + aligned_memsz;
196 crate::process::elf::write_user_u64_pub(&new_as_arc, tp, tp)
197 .map_err(|_| SyscallError::Fault)?;
198 new_fs_base = tp;
199 }
200
201 unsafe {
206 let fd_table = &mut *current.process.fd_table.get();
207 fd_table.close_cloexec();
208 }
209
210 current.reset_signals();
212
213 unsafe {
215 *current.signal_stack.get() = None;
216 }
217
218 current
220 .clear_child_tid
221 .store(0, core::sync::atomic::Ordering::Relaxed);
222 current
223 .user_fs_base
224 .store(new_fs_base, core::sync::atomic::Ordering::Relaxed);
225
226 current
228 .process
229 .brk
230 .store(0, core::sync::atomic::Ordering::Relaxed);
231 current
232 .process
233 .mmap_hint
234 .store(0x0000_0000_6000_0000, core::sync::atomic::Ordering::Relaxed);
235 unsafe {
237 let lo = new_fs_base as u32;
238 let hi = (new_fs_base >> 32) as u32;
239 core::arch::asm!(
240 "mov ecx, 0xC0000100", "wrmsr",
242 in("eax") lo,
243 in("edx") hi,
244 options(nostack, preserves_flags),
245 );
246 }
247
248 let old_as = current.process.replace_address_space(new_as_arc.clone());
249
250 unsafe {
251 current.process.address_space_arc().switch_to();
252 }
253
254 frame.iret_rip = load_info.runtime_entry;
255 frame.iret_rsp = sp;
256 frame.iret_rflags = 0x200; frame.rdi = 0;
259 frame.rsi = 0;
260 frame.rdx = 0;
261 frame.rcx = 0;
262 frame.r8 = 0;
263 frame.r9 = 0;
264 frame.r10 = 0;
265 frame.r11 = 0;
266 frame.rbx = 0;
267 frame.rbp = 0;
268 frame.r12 = 0;
269 frame.r13 = 0;
270 frame.r14 = 0;
271 frame.r15 = 0;
272 frame.rax = 0;
273
274 drop(old_as);
276
277 Ok(0)
278}
279
280fn setup_user_stack(
282 new_as: &AddressSpace,
283 argv_ptr: u64,
284 envp_ptr: u64,
285 elf_info: &LoadedElfInfo,
286 exec_path: &[u8],
287) -> Result<u64, SyscallError> {
288 let args = read_string_array(argv_ptr)?;
289 let envs = read_string_array(envp_ptr)?;
290
291 let mut sp = USER_STACK_TOP;
292 let mut str_ptrs: Vec<u64> = Vec::with_capacity(args.len()); let mut env_ptrs: Vec<u64> = Vec::with_capacity(envs.len()); for env in envs.iter().rev() {
302 let len = (env.len() + 1) as u64;
303 sp -= len;
304 write_bytes_to_as(new_as, sp, env)?;
305 write_bytes_to_as(new_as, sp + env.len() as u64, &[0])?;
306 env_ptrs.push(sp);
307 }
308 env_ptrs.reverse();
312
313 for arg in args.iter().rev() {
315 let len = (arg.len() + 1) as u64;
316 sp -= len;
317 write_bytes_to_as(new_as, sp, arg)?;
318 write_bytes_to_as(new_as, sp + arg.len() as u64, &[0])?;
319 str_ptrs.push(sp);
320 }
321 str_ptrs.reverse();
322
323 let mut execfn_ptr = 0u64;
325 if !exec_path.is_empty() {
326 let len = (exec_path.len() + 1) as u64;
327 sp -= len;
328 write_bytes_to_as(new_as, sp, exec_path)?;
329 write_bytes_to_as(new_as, sp + exec_path.len() as u64, &[0])?;
330 execfn_ptr = sp;
331 }
332
333 sp -= 16;
335 let rand_ptr = sp;
336 let seed = generate_aux_random_seed();
337 write_bytes_to_as(new_as, rand_ptr, &seed)?;
338
339 sp &= !0xF;
341
342 let size_ptr = 8u64;
344
345 let mut auxv: Vec<(u64, u64)> = Vec::with_capacity(10);
347 auxv.push((AT_PHDR, elf_info.phdr_vaddr));
348 auxv.push((AT_PHENT, elf_info.phent as u64));
349 auxv.push((AT_PHNUM, elf_info.phnum as u64));
350 auxv.push((AT_PAGESZ, 4096));
351 if let Some(base) = elf_info.interp_base {
352 auxv.push((AT_BASE, base));
353 }
354 auxv.push((AT_ENTRY, elf_info.program_entry));
355 auxv.push((AT_RANDOM, rand_ptr));
356 if execfn_ptr != 0 {
357 auxv.push((AT_EXECFN, execfn_ptr));
358 }
359
360 sp -= size_ptr;
362 write_u64_to_as(new_as, sp, 0)?;
363 sp -= size_ptr;
364 write_u64_to_as(new_as, sp, AT_NULL)?;
365 for &(key, val) in auxv.iter().rev() {
366 sp -= size_ptr;
367 write_u64_to_as(new_as, sp, val)?;
368 sp -= size_ptr;
369 write_u64_to_as(new_as, sp, key)?;
370 }
371
372 sp -= size_ptr;
378 write_u64_to_as(new_as, sp, 0)?; for &ptr in env_ptrs.iter().rev() {
381 sp -= size_ptr;
382 write_u64_to_as(new_as, sp, ptr)?;
383 }
384 sp -= size_ptr;
392 write_u64_to_as(new_as, sp, 0)?; for &ptr in str_ptrs.iter().rev() {
395 sp -= size_ptr;
396 write_u64_to_as(new_as, sp, ptr)?;
397 }
398 sp -= size_ptr;
402 write_u64_to_as(new_as, sp, args.len() as u64)?;
403
404 Ok(sp)
405}
406
407fn read_string_array(ptr: u64) -> Result<Vec<Vec<u8>>, SyscallError> {
409 let mut res = Vec::new();
410 if ptr == 0 {
411 return Ok(res);
412 }
413
414 let mut arr_off = 0;
415 loop {
416 let str_ptr = match UserSliceRead::new(ptr + arr_off, 8) {
418 Ok(slice) => match slice.read_u64(0) {
419 Ok(p) => p,
420 Err(_) => return Err(SyscallError::Fault),
421 },
422 Err(_) => return Err(SyscallError::Fault),
423 };
424
425 if str_ptr == 0 {
426 break;
427 }
428 if res.len() > 1024 {
429 return Err(SyscallError::ArgumentListTooLong);
430 }
431
432 let mut s = Vec::new();
433 let mut i = 0;
434 loop {
435 if i > 4096 {
436 return Err(SyscallError::ArgumentListTooLong);
437 }
438 let b = match UserSliceRead::new(str_ptr + i, 1) {
439 Ok(slice) => match slice.read_u8(0) {
440 Ok(byte) => byte,
441 Err(_) => return Err(SyscallError::Fault),
442 },
443 Err(_) => return Err(SyscallError::Fault),
444 };
445 if b == 0 {
446 break;
447 }
448 s.push(b);
449 i += 1;
450 }
451 res.push(s);
452 arr_off += 8;
453 }
454 Ok(res)
455}
456
457fn write_bytes_to_as(as_ref: &AddressSpace, vaddr: u64, data: &[u8]) -> Result<(), SyscallError> {
459 use x86_64::VirtAddr;
460 let mut written = 0;
461 while written < data.len() {
472 let curr_vaddr = vaddr + written as u64;
473 let page_offset = (curr_vaddr & 0xFFF) as usize;
474 let chunk_size = core::cmp::min(data.len() - written, 4096 - page_offset);
475
476 let phys = as_ref
479 .translate(VirtAddr::new(curr_vaddr))
480 .ok_or(SyscallError::Fault)?;
481 let virt = crate::memory::phys_to_virt(phys.as_u64()) as *mut u8;
482
483 unsafe {
484 core::ptr::copy_nonoverlapping(data.as_ptr().add(written), virt, chunk_size);
485 }
486 written += chunk_size;
487 }
488 Ok(())
489}
490
491fn write_u64_to_as(as_ref: &AddressSpace, vaddr: u64, val: u64) -> Result<(), SyscallError> {
493 let bytes = val.to_ne_bytes();
494 write_bytes_to_as(as_ref, vaddr, &bytes)
495}
496
497fn generate_aux_random_seed() -> [u8; 16] {
499 use x86_64::registers::control::Cr3;
500 let mut s = [0u8; 16];
501 let t = crate::process::scheduler::ticks();
502 let (cr3, _) = Cr3::read();
503 let x = t
504 ^ (cr3
505 .start_address()
506 .as_u64()
507 .wrapping_mul(0x9e37_79b9_7f4a_7c15));
508 s[..8].copy_from_slice(&x.to_le_bytes());
509 s[8..].copy_from_slice(&(x.rotate_left(17) ^ 0xa076_1d64_78bd_642f).to_le_bytes());
510 s
511}