1use crate::{
11 memory::address_space::{VmaFlags, VmaType},
12 process::current_task_clone,
13 syscall::error::SyscallError,
14};
15use core::sync::atomic::Ordering;
16use strat9_abi::data::MemoryRegionInfo as MemoryRegionInfoAbi;
17use x86_64::VirtAddr;
18
19pub const BRK_BASE: u64 = 0x0000_0000_2000_0000; pub const MMAP_BASE: u64 = 0x0000_0000_6000_0000; const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
31
32const PROT_READ: u32 = 1 << 0;
37const PROT_WRITE: u32 = 1 << 1;
38const PROT_EXEC: u32 = 1 << 2;
39
40const MAP_SHARED: u32 = 1 << 0;
45const MAP_PRIVATE: u32 = 1 << 1;
46const MAP_FIXED: u32 = 1 << 4;
47const MAP_ANONYMOUS: u32 = 1 << 5;
48const MAP_HUGETLB: u32 = 1 << 11; const MAP_FIXED_NOREPLACE: u32 = 1 << 20; const MREMAP_MAYMOVE: u64 = 1 << 0;
52
53#[inline]
59fn page_align_up(addr: u64) -> u64 {
60 (addr.wrapping_add(4095)) & !4095u64
61}
62
63#[inline]
65fn huge_page_align_up(addr: u64) -> u64 {
66 (addr.wrapping_add((2 * 1024 * 1024) - 1)) & !((2 * 1024 * 1024) - 1)
67}
68
69fn prot_to_vma_flags(prot: u32) -> VmaFlags {
71 VmaFlags {
72 readable: prot & PROT_READ != 0,
73 writable: prot & PROT_WRITE != 0,
74 executable: prot & PROT_EXEC != 0,
75 user_accessible: true,
76 }
77}
78
79fn vma_flags_to_prot(flags: VmaFlags) -> u32 {
81 (if flags.readable { PROT_READ } else { 0 })
82 | (if flags.writable { PROT_WRITE } else { 0 })
83 | (if flags.executable { PROT_EXEC } else { 0 })
84}
85
86pub fn sys_mmap(
98 addr: u64,
99 len: u64,
100 prot: u32,
101 flags: u32,
102 fd_raw: u64,
103 offset: u64,
104) -> Result<u64, SyscallError> {
105 if len == 0 {
107 return Err(SyscallError::InvalidArgument);
108 }
109
110 let known_flags =
111 MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED_NOREPLACE;
112 if flags & !known_flags != 0 {
113 return Err(SyscallError::InvalidArgument);
114 }
115
116 let is_huge = flags & MAP_HUGETLB != 0;
117 let page_size = if is_huge {
118 crate::memory::address_space::VmaPageSize::Huge
119 } else {
120 crate::memory::address_space::VmaPageSize::Small
121 };
122 let page_bytes = page_size.bytes();
123
124 if flags & MAP_ANONYMOUS == 0 {
126 let fd = fd_raw as u32;
127 let file_offset = offset;
128
129 let is_private = flags & MAP_PRIVATE != 0;
130 let is_shared = flags & MAP_SHARED != 0;
131 if is_private == is_shared {
132 return Err(SyscallError::InvalidArgument);
133 }
134 if !is_private {
135 log::warn!("sys_mmap: file-backed MAP_SHARED not yet supported");
136 return Err(SyscallError::NotImplemented);
137 }
138 if prot & !(PROT_READ | PROT_WRITE | PROT_EXEC) != 0 {
139 return Err(SyscallError::InvalidArgument);
140 }
141
142 let len_aligned = if is_huge {
143 huge_page_align_up(len)
144 } else {
145 page_align_up(len)
146 };
147 if len_aligned == 0 {
148 return Err(SyscallError::InvalidArgument);
149 }
150 let n_pages = (len_aligned / page_bytes) as usize;
151
152 let task = current_task_clone().ok_or(SyscallError::Fault)?;
153 let open_file = {
154 let fd_table = unsafe { &*task.process.fd_table.get() };
155 fd_table.get(fd)?
156 };
157 let addr_space = task.process.address_space_arc();
158
159 let target = if flags & MAP_FIXED != 0 {
160 if addr % page_bytes != 0 || addr == 0 {
161 return Err(SyscallError::InvalidArgument);
162 }
163 if addr.saturating_add(len_aligned) > USER_SPACE_END {
164 return Err(SyscallError::InvalidArgument);
165 }
166 if flags & MAP_FIXED_NOREPLACE != 0 {
167 if addr_space.has_mapping_in_range(addr, len_aligned) {
168 return Err(SyscallError::AlreadyExists);
169 }
170 } else {
171 addr_space
172 .unmap_range(addr, len_aligned)
173 .map_err(|_| SyscallError::InvalidArgument)?;
174 }
175 addr
176 } else {
177 let hint = if addr != 0 {
178 addr
179 } else {
180 task.process.mmap_hint.load(Ordering::Relaxed)
181 };
182 addr_space
183 .find_free_vma_range(hint, n_pages, page_size)
184 .or_else(|| addr_space.find_free_vma_range(MMAP_BASE, n_pages, page_size))
185 .ok_or(SyscallError::OutOfMemory)?
186 };
187
188 let vma_flags = prot_to_vma_flags(prot);
189 addr_space
190 .map_region(target, n_pages, vma_flags, VmaType::Anonymous, page_size)
191 .map_err(|_| SyscallError::OutOfMemory)?;
192
193 let read_len = len as usize;
195 let mut kbuf = [0u8; 4096];
196 let mut file_off = file_offset;
197 let mut dst_off = 0usize;
198 while dst_off < read_len {
199 let chunk = core::cmp::min(4096, read_len - dst_off);
200 let n = open_file.pread(file_off, &mut kbuf[..chunk]).unwrap_or(0);
201 if n == 0 {
202 break;
203 }
204 let mut written = 0;
205 while written < n {
206 let vaddr = target + (dst_off + written) as u64;
207 let page_off = (vaddr & 0xFFF) as usize;
208 let to_write = core::cmp::min(n - written, 4096 - page_off);
209 let phys = addr_space
210 .translate(VirtAddr::new(vaddr))
211 .ok_or(SyscallError::Fault)?;
212 let hhdm_ptr = crate::memory::phys_to_virt(phys.as_u64()) as *mut u8;
213 unsafe {
214 core::ptr::copy_nonoverlapping(kbuf.as_ptr().add(written), hhdm_ptr, to_write);
215 }
216 written += to_write;
217 }
218 file_off += n as u64;
219 dst_off += n;
220 }
221
222 if flags & MAP_FIXED == 0 {
223 let new_hint = target.saturating_add(len_aligned);
224 let _ = task
225 .process
226 .mmap_hint
227 .fetch_max(new_hint, Ordering::Relaxed);
228 }
229
230 log::trace!(
231 "sys_mmap: file-backed {:#x}..{:#x} (fd={}, off={:#x})",
232 target,
233 target + len_aligned,
234 fd,
235 file_offset,
236 );
237 return Ok(target);
238 }
239
240 let is_private = flags & MAP_PRIVATE != 0;
241 let is_shared = flags & MAP_SHARED != 0;
242 if is_private == is_shared {
244 return Err(SyscallError::InvalidArgument);
245 }
246
247 if offset != 0 {
249 return Err(SyscallError::InvalidArgument);
250 }
251
252 if prot & !(PROT_READ | PROT_WRITE | PROT_EXEC) != 0 {
254 return Err(SyscallError::InvalidArgument);
255 }
256
257 let len_aligned = if is_huge {
259 huge_page_align_up(len)
260 } else {
261 page_align_up(len)
262 };
263 if len_aligned == 0 {
264 return Err(SyscallError::InvalidArgument);
266 }
267 let n_pages = (len_aligned / page_bytes) as usize;
268
269 let task = current_task_clone().ok_or(SyscallError::Fault)?;
271 let addr_space = task.process.address_space_arc();
272
273 let target = if flags & MAP_FIXED != 0 {
274 if addr % page_bytes != 0 || addr == 0 {
276 return Err(SyscallError::InvalidArgument);
277 }
278 if addr.saturating_add(len_aligned) > USER_SPACE_END {
279 return Err(SyscallError::InvalidArgument);
280 }
281 if flags & MAP_FIXED_NOREPLACE != 0 {
282 if addr_space.has_mapping_in_range(addr, len_aligned) {
284 return Err(SyscallError::AlreadyExists);
285 }
286 } else {
287 addr_space
289 .unmap_range(addr, len_aligned)
290 .map_err(|_| SyscallError::InvalidArgument)?;
291 }
292 addr
293 } else {
294 let hint = if addr != 0 {
296 addr
297 } else {
298 task.process.mmap_hint.load(Ordering::Relaxed)
299 };
300
301 addr_space
303 .find_free_vma_range(hint, n_pages, page_size)
304 .or_else(|| addr_space.find_free_vma_range(MMAP_BASE, n_pages, page_size))
305 .ok_or(SyscallError::OutOfMemory)?
306 };
307
308 let vma_flags = prot_to_vma_flags(prot);
310 addr_space
311 .reserve_region(target, n_pages, vma_flags, VmaType::Anonymous, page_size)
312 .map_err(|_| SyscallError::OutOfMemory)?;
313
314 if flags & MAP_FIXED == 0 {
316 let new_hint = target.saturating_add(len_aligned);
317 let _ = task
319 .process
320 .mmap_hint
321 .fetch_max(new_hint, Ordering::Relaxed);
322 }
323
324 log::trace!(
325 "sys_mmap: mapped {:#x}..{:#x} ({} pages, prot={:#x}, flags={:#x})",
326 target,
327 target + len_aligned,
328 n_pages,
329 prot,
330 flags,
331 );
332
333 Ok(target)
334}
335
336pub fn sys_munmap(addr: u64, len: u64) -> Result<u64, SyscallError> {
346 if addr == 0 || addr & 0xFFF != 0 {
347 return Err(SyscallError::InvalidArgument);
348 }
349 if len == 0 {
350 return Err(SyscallError::InvalidArgument);
351 }
352
353 let len_aligned = page_align_up(len);
354 if len_aligned == 0 {
355 return Err(SyscallError::InvalidArgument);
356 }
357 if addr.saturating_add(len_aligned) > USER_SPACE_END {
358 return Err(SyscallError::InvalidArgument);
359 }
360
361 let task = current_task_clone().ok_or(SyscallError::Fault)?;
362 task.process
363 .address_space_arc()
364 .unmap_range(addr, len_aligned)
365 .map_err(|_| SyscallError::InvalidArgument)?;
366
367 log::trace!(
368 "sys_munmap: unmapped {:#x}..{:#x}",
369 addr,
370 addr + len_aligned
371 );
372
373 Ok(0)
374}
375
376pub fn sys_mremap(
384 old_addr: u64,
385 old_size: u64,
386 new_size: u64,
387 flags: u64,
388) -> Result<u64, SyscallError> {
389 if old_size == 0 || new_size == 0 {
390 return Err(SyscallError::InvalidArgument);
391 }
392 if flags & !MREMAP_MAYMOVE != 0 {
393 return Err(SyscallError::InvalidArgument);
394 }
395
396 let task = current_task_clone().ok_or(SyscallError::Fault)?;
397 let addr_space = task.process.address_space_arc();
398 let vma = addr_space
399 .region_by_start(old_addr)
400 .ok_or(SyscallError::Fault)?;
401
402 let page_bytes = vma.page_size.bytes();
403 if old_addr % page_bytes != 0 {
404 return Err(SyscallError::InvalidArgument);
405 }
406
407 let old_len_aligned = if vma.page_size == crate::memory::address_space::VmaPageSize::Huge {
408 huge_page_align_up(old_size)
409 } else {
410 page_align_up(old_size)
411 };
412 let new_len_aligned = if vma.page_size == crate::memory::address_space::VmaPageSize::Huge {
413 huge_page_align_up(new_size)
414 } else {
415 page_align_up(new_size)
416 };
417 if old_len_aligned == 0 || new_len_aligned == 0 {
418 return Err(SyscallError::InvalidArgument);
419 }
420
421 let tracked_len = (vma.page_count as u64)
422 .checked_mul(page_bytes)
423 .ok_or(SyscallError::InvalidArgument)?;
424 if old_len_aligned != tracked_len {
425 return Err(SyscallError::InvalidArgument);
426 }
427
428 if new_len_aligned == old_len_aligned {
429 return Ok(old_addr);
430 }
431
432 if new_len_aligned < old_len_aligned {
433 let tail_addr = old_addr
434 .checked_add(new_len_aligned)
435 .ok_or(SyscallError::InvalidArgument)?;
436 let tail_len = old_len_aligned - new_len_aligned;
437 addr_space
438 .unmap_range(tail_addr, tail_len)
439 .map_err(|_| SyscallError::InvalidArgument)?;
440 return Ok(old_addr);
441 }
442
443 let grow_len = new_len_aligned - old_len_aligned;
444 let grow_start = old_addr
445 .checked_add(old_len_aligned)
446 .ok_or(SyscallError::InvalidArgument)?;
447
448 if !addr_space.has_mapping_in_range(grow_start, grow_len) {
449 let grow_pages = (grow_len / page_bytes) as usize;
450 addr_space
451 .reserve_region(
452 grow_start,
453 grow_pages,
454 vma.flags,
455 vma.vma_type,
456 vma.page_size,
457 )
458 .map_err(|_| SyscallError::OutOfMemory)?;
459 return Ok(old_addr);
460 }
461
462 if flags & MREMAP_MAYMOVE == 0 {
463 return Err(SyscallError::OutOfMemory);
464 }
465
466 let has_present_pages = addr_space
467 .any_mapped_in_range(old_addr, old_len_aligned, vma.page_size)
468 .map_err(|_| SyscallError::InvalidArgument)?;
469 if has_present_pages {
470 return Err(SyscallError::OutOfMemory);
471 }
472
473 let new_pages = (new_len_aligned / page_bytes) as usize;
474 let new_addr = addr_space
475 .find_free_vma_range(MMAP_BASE, new_pages, vma.page_size)
476 .ok_or(SyscallError::OutOfMemory)?;
477
478 addr_space
479 .unmap_range(old_addr, old_len_aligned)
480 .map_err(|_| SyscallError::InvalidArgument)?;
481 addr_space
482 .reserve_region(new_addr, new_pages, vma.flags, vma.vma_type, vma.page_size)
483 .map_err(|_| SyscallError::OutOfMemory)?;
484 Ok(new_addr)
485}
486
487pub fn sys_mprotect(addr: u64, len: u64, prot: u64) -> Result<u64, SyscallError> {
489 if len == 0 || addr == 0 || addr & 0xFFF != 0 {
490 return Err(SyscallError::InvalidArgument);
491 }
492 let prot_u32 = u32::try_from(prot).map_err(|_| SyscallError::InvalidArgument)?;
493 if prot_u32 & !(PROT_READ | PROT_WRITE | PROT_EXEC) != 0 {
494 return Err(SyscallError::InvalidArgument);
495 }
496
497 let len_aligned = page_align_up(len);
498 if len_aligned == 0 {
499 return Err(SyscallError::InvalidArgument);
500 }
501 if addr.saturating_add(len_aligned) > USER_SPACE_END {
502 return Err(SyscallError::InvalidArgument);
503 }
504
505 let task = current_task_clone().ok_or(SyscallError::Fault)?;
506 let addr_space = task.process.address_space_arc();
507 let flags = prot_to_vma_flags(prot_u32);
508
509 addr_space
510 .protect_range(addr, len_aligned, flags)
511 .map_err(|_| SyscallError::InvalidArgument)?;
512
513 Ok(0)
514}
515
516pub fn sys_mem_region_export(addr: u64) -> Result<u64, SyscallError> {
518 let task = current_task_clone().ok_or(SyscallError::Fault)?;
519 let address_space = task.process.address_space_arc();
520 let handle_cap = crate::capability::CapId::new();
521 let resource_id = crate::memory::memory_region_registry()
522 .export_region(&address_space, addr, handle_cap)
523 .map_err(|error| match error {
524 crate::memory::RegionCapError::InvalidRegion
525 | crate::memory::RegionCapError::IncompleteRegion
526 | crate::memory::RegionCapError::InvalidAddress => SyscallError::InvalidArgument,
527 crate::memory::RegionCapError::PermissionDenied => SyscallError::PermissionDenied,
528 crate::memory::RegionCapError::OutOfMemory => SyscallError::OutOfMemory,
529 crate::memory::RegionCapError::InconsistentState => SyscallError::IoError,
530 crate::memory::RegionCapError::NotFound => SyscallError::NotFound,
531 })?;
532
533 let cap = crate::capability::Capability {
534 id: handle_cap,
535 resource_type: crate::capability::ResourceType::MemoryRegion,
536 permissions: crate::capability::CapPermissions {
537 read: true,
538 write: true,
539 execute: true,
540 grant: true,
541 revoke: true,
542 },
543 resource: resource_id as usize,
544 };
545 let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
546 Ok(cap_id.as_u64())
547}
548
549pub fn sys_mem_region_map(handle: u64, addr_hint: u64, out_ptr: u64) -> Result<u64, SyscallError> {
551 crate::silo::enforce_cap_for_current_task(handle)?;
552 if out_ptr == 0 {
553 return Err(SyscallError::Fault);
554 }
555
556 let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
557 let caps = unsafe { &*task.process.capabilities.get() };
558 let cap = caps
559 .get(crate::capability::CapId::from_raw(handle))
560 .ok_or(SyscallError::BadHandle)?;
561 if cap.resource_type != crate::capability::ResourceType::MemoryRegion {
562 return Err(SyscallError::BadHandle);
563 }
564
565 let requested_flags = VmaFlags {
566 readable: cap.permissions.read,
567 writable: cap.permissions.write,
568 executable: cap.permissions.execute,
569 user_accessible: true,
570 };
571 let address_space = task.process.address_space_arc();
572 let (base, size) = crate::memory::memory_region_registry()
573 .map_region(
574 cap.resource as u64,
575 &address_space,
576 addr_hint,
577 requested_flags,
578 )
579 .map_err(|error| match error {
580 crate::memory::RegionCapError::NotFound => SyscallError::NotFound,
581 crate::memory::RegionCapError::InvalidRegion
582 | crate::memory::RegionCapError::IncompleteRegion
583 | crate::memory::RegionCapError::InvalidAddress => SyscallError::InvalidArgument,
584 crate::memory::RegionCapError::PermissionDenied => SyscallError::PermissionDenied,
585 crate::memory::RegionCapError::OutOfMemory => SyscallError::OutOfMemory,
586 crate::memory::RegionCapError::InconsistentState => SyscallError::IoError,
587 })?;
588
589 let user = crate::memory::UserSliceWrite::new(out_ptr, core::mem::size_of::<u64>())?;
590 user.copy_from(&base.to_ne_bytes());
591 Ok(size)
592}
593
594pub fn sys_mem_region_info(handle: u64, out_ptr: u64) -> Result<u64, SyscallError> {
596 crate::silo::enforce_cap_for_current_task(handle)?;
597 if out_ptr == 0 {
598 return Err(SyscallError::Fault);
599 }
600
601 let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
602 let caps = unsafe { &*task.process.capabilities.get() };
603 let cap = caps
604 .get(crate::capability::CapId::from_raw(handle))
605 .ok_or(SyscallError::BadHandle)?;
606 if cap.resource_type != crate::capability::ResourceType::MemoryRegion {
607 return Err(SyscallError::BadHandle);
608 }
609
610 let info = crate::memory::memory_region_registry()
611 .info(cap.resource as u64)
612 .ok_or(SyscallError::NotFound)?;
613 let abi = MemoryRegionInfoAbi {
614 size: info.size,
615 page_size: info.page_size.bytes(),
616 flags: vma_flags_to_prot(info.flags),
617 _reserved: 0,
618 };
619 let user =
620 crate::memory::UserSliceWrite::new(out_ptr, core::mem::size_of::<MemoryRegionInfoAbi>())?;
621 let bytes = unsafe {
622 core::slice::from_raw_parts(
623 &abi as *const MemoryRegionInfoAbi as *const u8,
624 core::mem::size_of::<MemoryRegionInfoAbi>(),
625 )
626 };
627 user.copy_from(bytes);
628 Ok(0)
629}
630
631pub fn sys_brk(addr: u64) -> Result<u64, SyscallError> {
649 let task = current_task_clone().ok_or(SyscallError::Fault)?;
650
651 let current_brk = {
655 let raw = task.process.brk.load(Ordering::Relaxed);
656 if raw == 0 {
657 task.process.brk.store(BRK_BASE, Ordering::Relaxed);
658 BRK_BASE
659 } else {
660 raw
661 }
662 };
663
664 if addr == 0 {
666 return Ok(current_brk);
667 }
668
669 if addr < BRK_BASE || addr >= USER_SPACE_END {
672 return Ok(current_brk); }
674
675 let old_page_end = page_align_up(current_brk);
680 let new_page_end = page_align_up(addr);
681
682 if new_page_end > old_page_end {
683 let n_pages = ((new_page_end - old_page_end) / 4096) as usize;
685 let vma_flags = VmaFlags {
686 readable: true,
687 writable: true,
688 executable: false,
689 user_accessible: true,
690 };
691 if task
692 .process
693 .address_space_arc()
694 .reserve_region(
695 old_page_end,
696 n_pages,
697 vma_flags,
698 VmaType::Anonymous,
699 crate::memory::address_space::VmaPageSize::Small,
700 )
701 .is_err()
702 {
703 return Ok(current_brk);
705 }
706 log::trace!(
707 "sys_brk: grow {:#x}..{:#x} ({} pages, lazy)",
708 old_page_end,
709 new_page_end,
710 n_pages,
711 );
712 } else if new_page_end < old_page_end {
713 let len = old_page_end - new_page_end;
715 if task
716 .process
717 .address_space_arc()
718 .unmap_range(new_page_end, len)
719 .is_err()
720 {
721 return Ok(current_brk);
722 }
723 log::trace!(
724 "sys_brk: shrink {:#x}..{:#x} (-{} pages)",
725 new_page_end,
726 old_page_end,
727 len / 4096,
728 );
729 }
730 task.process.brk.store(addr, Ordering::Relaxed);
735 Ok(addr)
736}