Skip to main content

strat9_kernel/syscall/
mmap.rs

1//! Memory-management syscall handlers: mmap, munmap, brk.
2//!
3//! Implements:
4//!  - [`sys_mmap`]   – map anonymous virtual memory (SYS_MMAP = 100)
5//!  - [`sys_munmap`] – unmap a virtual memory range (SYS_MUNMAP = 101)
6//!  - [`sys_brk`]    – set / query the program break / heap top (SYS_BRK = 102)
7//!  - [`sys_mremap`] – resize/remap an existing region (SYS_MREMAP = 103)
8//!  - [`sys_mprotect`] – change page permissions (SYS_MPROTECT = 104)
9
10use crate::{
11    memory::address_space::{VmaFlags, VmaType},
12    process::current_task_clone,
13    syscall::error::SyscallError,
14};
15use core::sync::atomic::Ordering;
16use strat9_abi::data::MemoryRegionInfo as MemoryRegionInfoAbi;
17use x86_64::VirtAddr;
18
19// ================================================================================
20// Virtual address layout constants
21// ================================================================================
22
23/// Base virtual address for the heap (`brk`-managed region).
24pub const BRK_BASE: u64 = 0x0000_0000_2000_0000; // 512 MiB
25
26/// Initial hint address for anonymous `mmap` allocations.
27pub const MMAP_BASE: u64 = 0x0000_0000_6000_0000; // 1.5 GiB
28
29/// Exclusive upper bound of the canonical user-space address range.
30const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
31
32// ================================================================================
33// PROT flags (arg3 of mmap)
34// ================================================================================
35
36const PROT_READ: u32 = 1 << 0;
37const PROT_WRITE: u32 = 1 << 1;
38const PROT_EXEC: u32 = 1 << 2;
39
40// ================================================================================
41// MAP flags (arg4 of mmap)
42// ================================================================================
43
44const MAP_SHARED: u32 = 1 << 0;
45const MAP_PRIVATE: u32 = 1 << 1;
46const MAP_FIXED: u32 = 1 << 4;
47const MAP_ANONYMOUS: u32 = 1 << 5;
48const MAP_HUGETLB: u32 = 1 << 11; // Standard Linux flag for huge pages
49const MAP_FIXED_NOREPLACE: u32 = 1 << 20; // Linux-compatible extension bit.
50
51const MREMAP_MAYMOVE: u64 = 1 << 0;
52
53// ================================================================================
54// Helpers
55// ================================================================================
56
57/// Round `addr` up to the nearest 4 KiB page boundary.
58#[inline]
59fn page_align_up(addr: u64) -> u64 {
60    (addr.wrapping_add(4095)) & !4095u64
61}
62
63/// Round `addr` up to the nearest 2 MiB boundary.
64#[inline]
65fn huge_page_align_up(addr: u64) -> u64 {
66    (addr.wrapping_add((2 * 1024 * 1024) - 1)) & !((2 * 1024 * 1024) - 1)
67}
68
69/// Convert POSIX protection flags to `VmaFlags`.
70fn prot_to_vma_flags(prot: u32) -> VmaFlags {
71    VmaFlags {
72        readable: prot & PROT_READ != 0,
73        writable: prot & PROT_WRITE != 0,
74        executable: prot & PROT_EXEC != 0,
75        user_accessible: true,
76    }
77}
78
79/// Convert `VmaFlags` into ABI protection bits.
80fn vma_flags_to_prot(flags: VmaFlags) -> u32 {
81    (if flags.readable { PROT_READ } else { 0 })
82        | (if flags.writable { PROT_WRITE } else { 0 })
83        | (if flags.executable { PROT_EXEC } else { 0 })
84}
85
86// ================================================================================
87// sys_mmap
88// ================================================================================
89
90/// SYS_MMAP (100): map anonymous virtual memory.
91///
92/// Only `MAP_ANONYMOUS` mappings are supported at this stage; file-backed mmaps
93/// return `NotImplemented`.  Both `MAP_PRIVATE` and `MAP_SHARED` are accepted
94/// for anonymous memory (they are equivalent when there is no backing file).
95///
96/// Returns the mapped virtual address on success, or a negative error code.
97pub fn sys_mmap(
98    addr: u64,
99    len: u64,
100    prot: u32,
101    flags: u32,
102    fd_raw: u64,
103    offset: u64,
104) -> Result<u64, SyscallError> {
105    //  Validate arguments
106    if len == 0 {
107        return Err(SyscallError::InvalidArgument);
108    }
109
110    let known_flags =
111        MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED_NOREPLACE;
112    if flags & !known_flags != 0 {
113        return Err(SyscallError::InvalidArgument);
114    }
115
116    let is_huge = flags & MAP_HUGETLB != 0;
117    let page_size = if is_huge {
118        crate::memory::address_space::VmaPageSize::Huge
119    } else {
120        crate::memory::address_space::VmaPageSize::Small
121    };
122    let page_bytes = page_size.bytes();
123
124    // File-backed mappings: MAP_PRIVATE + fd → copy file data into anonymous pages.
125    if flags & MAP_ANONYMOUS == 0 {
126        let fd = fd_raw as u32;
127        let file_offset = offset;
128
129        let is_private = flags & MAP_PRIVATE != 0;
130        let is_shared = flags & MAP_SHARED != 0;
131        if is_private == is_shared {
132            return Err(SyscallError::InvalidArgument);
133        }
134        if !is_private {
135            log::warn!("sys_mmap: file-backed MAP_SHARED not yet supported");
136            return Err(SyscallError::NotImplemented);
137        }
138        if prot & !(PROT_READ | PROT_WRITE | PROT_EXEC) != 0 {
139            return Err(SyscallError::InvalidArgument);
140        }
141
142        let len_aligned = if is_huge {
143            huge_page_align_up(len)
144        } else {
145            page_align_up(len)
146        };
147        if len_aligned == 0 {
148            return Err(SyscallError::InvalidArgument);
149        }
150        let n_pages = (len_aligned / page_bytes) as usize;
151
152        let task = current_task_clone().ok_or(SyscallError::Fault)?;
153        let open_file = {
154            let fd_table = unsafe { &*task.process.fd_table.get() };
155            fd_table.get(fd)?
156        };
157        let addr_space = task.process.address_space_arc();
158
159        let target = if flags & MAP_FIXED != 0 {
160            if addr % page_bytes != 0 || addr == 0 {
161                return Err(SyscallError::InvalidArgument);
162            }
163            if addr.saturating_add(len_aligned) > USER_SPACE_END {
164                return Err(SyscallError::InvalidArgument);
165            }
166            if flags & MAP_FIXED_NOREPLACE != 0 {
167                if addr_space.has_mapping_in_range(addr, len_aligned) {
168                    return Err(SyscallError::AlreadyExists);
169                }
170            } else {
171                addr_space
172                    .unmap_range(addr, len_aligned)
173                    .map_err(|_| SyscallError::InvalidArgument)?;
174            }
175            addr
176        } else {
177            let hint = if addr != 0 {
178                addr
179            } else {
180                task.process.mmap_hint.load(Ordering::Relaxed)
181            };
182            addr_space
183                .find_free_vma_range(hint, n_pages, page_size)
184                .or_else(|| addr_space.find_free_vma_range(MMAP_BASE, n_pages, page_size))
185                .ok_or(SyscallError::OutOfMemory)?
186        };
187
188        let vma_flags = prot_to_vma_flags(prot);
189        addr_space
190            .map_region(target, n_pages, vma_flags, VmaType::Anonymous, page_size)
191            .map_err(|_| SyscallError::OutOfMemory)?;
192
193        // Copy file content into the mapped pages via HHDM.
194        let read_len = len as usize;
195        let mut kbuf = [0u8; 4096];
196        let mut file_off = file_offset;
197        let mut dst_off = 0usize;
198        while dst_off < read_len {
199            let chunk = core::cmp::min(4096, read_len - dst_off);
200            let n = open_file.pread(file_off, &mut kbuf[..chunk]).unwrap_or(0);
201            if n == 0 {
202                break;
203            }
204            let mut written = 0;
205            while written < n {
206                let vaddr = target + (dst_off + written) as u64;
207                let page_off = (vaddr & 0xFFF) as usize;
208                let to_write = core::cmp::min(n - written, 4096 - page_off);
209                let phys = addr_space
210                    .translate(VirtAddr::new(vaddr))
211                    .ok_or(SyscallError::Fault)?;
212                let hhdm_ptr = crate::memory::phys_to_virt(phys.as_u64()) as *mut u8;
213                unsafe {
214                    core::ptr::copy_nonoverlapping(kbuf.as_ptr().add(written), hhdm_ptr, to_write);
215                }
216                written += to_write;
217            }
218            file_off += n as u64;
219            dst_off += n;
220        }
221
222        if flags & MAP_FIXED == 0 {
223            let new_hint = target.saturating_add(len_aligned);
224            let _ = task
225                .process
226                .mmap_hint
227                .fetch_max(new_hint, Ordering::Relaxed);
228        }
229
230        log::trace!(
231            "sys_mmap: file-backed {:#x}..{:#x} (fd={}, off={:#x})",
232            target,
233            target + len_aligned,
234            fd,
235            file_offset,
236        );
237        return Ok(target);
238    }
239
240    let is_private = flags & MAP_PRIVATE != 0;
241    let is_shared = flags & MAP_SHARED != 0;
242    // Exactly one of MAP_PRIVATE / MAP_SHARED.
243    if is_private == is_shared {
244        return Err(SyscallError::InvalidArgument);
245    }
246
247    // Anonymous mapping currently requires page-aligned zero offset.
248    if offset != 0 {
249        return Err(SyscallError::InvalidArgument);
250    }
251
252    // Reject unknown PROT bits.
253    if prot & !(PROT_READ | PROT_WRITE | PROT_EXEC) != 0 {
254        return Err(SyscallError::InvalidArgument);
255    }
256
257    // Round len up to a page boundary.  Overflow of len itself is caught here.
258    let len_aligned = if is_huge {
259        huge_page_align_up(len)
260    } else {
261        page_align_up(len)
262    };
263    if len_aligned == 0 {
264        // len was so large that aligning it overflowed to 0.
265        return Err(SyscallError::InvalidArgument);
266    }
267    let n_pages = (len_aligned / page_bytes) as usize;
268
269    //  Determine the target virtual address
270    let task = current_task_clone().ok_or(SyscallError::Fault)?;
271    let addr_space = task.process.address_space_arc();
272
273    let target = if flags & MAP_FIXED != 0 {
274        // MAP_FIXED: the caller demands this exact page-aligned address.
275        if addr % page_bytes != 0 || addr == 0 {
276            return Err(SyscallError::InvalidArgument);
277        }
278        if addr.saturating_add(len_aligned) > USER_SPACE_END {
279            return Err(SyscallError::InvalidArgument);
280        }
281        if flags & MAP_FIXED_NOREPLACE != 0 {
282            // MAP_FIXED_NOREPLACE: fail if any mapping overlaps.
283            if addr_space.has_mapping_in_range(addr, len_aligned) {
284                return Err(SyscallError::AlreadyExists);
285            }
286        } else {
287            // Linux MAP_FIXED semantics: unmap overlaps before remap.
288            addr_space
289                .unmap_range(addr, len_aligned)
290                .map_err(|_| SyscallError::InvalidArgument)?;
291        }
292        addr
293    } else {
294        // Hint-based: use addr as a hint when non-zero, else use mmap_hint.
295        let hint = if addr != 0 {
296            addr
297        } else {
298            task.process.mmap_hint.load(Ordering::Relaxed)
299        };
300
301        // Try the hint first, then fall back to MMAP_BASE.
302        addr_space
303            .find_free_vma_range(hint, n_pages, page_size)
304            .or_else(|| addr_space.find_free_vma_range(MMAP_BASE, n_pages, page_size))
305            .ok_or(SyscallError::OutOfMemory)?
306    };
307
308    //  Map the region (lazily)
309    let vma_flags = prot_to_vma_flags(prot);
310    addr_space
311        .reserve_region(target, n_pages, vma_flags, VmaType::Anonymous, page_size)
312        .map_err(|_| SyscallError::OutOfMemory)?;
313
314    //  Advance mmap_hint past the new mapping (non-fixed only)
315    if flags & MAP_FIXED == 0 {
316        let new_hint = target.saturating_add(len_aligned);
317        // Atomically advance: only update if it moves forward.
318        let _ = task
319            .process
320            .mmap_hint
321            .fetch_max(new_hint, Ordering::Relaxed);
322    }
323
324    log::trace!(
325        "sys_mmap: mapped {:#x}..{:#x} ({} pages, prot={:#x}, flags={:#x})",
326        target,
327        target + len_aligned,
328        n_pages,
329        prot,
330        flags,
331    );
332
333    Ok(target)
334}
335
336// ================================================================================
337// sys_munmap
338// ================================================================================
339
340/// SYS_MUNMAP (101): unmap a virtual memory range.
341///
342/// `addr` must be page-aligned.  `len` is rounded up to a page boundary.
343/// Unmapping an address range that contains no mappings is silently ignored
344/// (POSIX behaviour).
345pub fn sys_munmap(addr: u64, len: u64) -> Result<u64, SyscallError> {
346    if addr == 0 || addr & 0xFFF != 0 {
347        return Err(SyscallError::InvalidArgument);
348    }
349    if len == 0 {
350        return Err(SyscallError::InvalidArgument);
351    }
352
353    let len_aligned = page_align_up(len);
354    if len_aligned == 0 {
355        return Err(SyscallError::InvalidArgument);
356    }
357    if addr.saturating_add(len_aligned) > USER_SPACE_END {
358        return Err(SyscallError::InvalidArgument);
359    }
360
361    let task = current_task_clone().ok_or(SyscallError::Fault)?;
362    task.process
363        .address_space_arc()
364        .unmap_range(addr, len_aligned)
365        .map_err(|_| SyscallError::InvalidArgument)?;
366
367    log::trace!(
368        "sys_munmap: unmapped {:#x}..{:#x}",
369        addr,
370        addr + len_aligned
371    );
372
373    Ok(0)
374}
375
376/// SYS_MREMAP (103): resize an existing mapping.
377///
378/// Current support:
379/// - Shrink in place.
380/// - Grow in place when the following range is free.
381/// - If `MREMAP_MAYMOVE` is set and growth in place fails, relocate only when
382///   the source mapping is still fully lazy (no present pages yet).
383pub fn sys_mremap(
384    old_addr: u64,
385    old_size: u64,
386    new_size: u64,
387    flags: u64,
388) -> Result<u64, SyscallError> {
389    if old_size == 0 || new_size == 0 {
390        return Err(SyscallError::InvalidArgument);
391    }
392    if flags & !MREMAP_MAYMOVE != 0 {
393        return Err(SyscallError::InvalidArgument);
394    }
395
396    let task = current_task_clone().ok_or(SyscallError::Fault)?;
397    let addr_space = task.process.address_space_arc();
398    let vma = addr_space
399        .region_by_start(old_addr)
400        .ok_or(SyscallError::Fault)?;
401
402    let page_bytes = vma.page_size.bytes();
403    if old_addr % page_bytes != 0 {
404        return Err(SyscallError::InvalidArgument);
405    }
406
407    let old_len_aligned = if vma.page_size == crate::memory::address_space::VmaPageSize::Huge {
408        huge_page_align_up(old_size)
409    } else {
410        page_align_up(old_size)
411    };
412    let new_len_aligned = if vma.page_size == crate::memory::address_space::VmaPageSize::Huge {
413        huge_page_align_up(new_size)
414    } else {
415        page_align_up(new_size)
416    };
417    if old_len_aligned == 0 || new_len_aligned == 0 {
418        return Err(SyscallError::InvalidArgument);
419    }
420
421    let tracked_len = (vma.page_count as u64)
422        .checked_mul(page_bytes)
423        .ok_or(SyscallError::InvalidArgument)?;
424    if old_len_aligned != tracked_len {
425        return Err(SyscallError::InvalidArgument);
426    }
427
428    if new_len_aligned == old_len_aligned {
429        return Ok(old_addr);
430    }
431
432    if new_len_aligned < old_len_aligned {
433        let tail_addr = old_addr
434            .checked_add(new_len_aligned)
435            .ok_or(SyscallError::InvalidArgument)?;
436        let tail_len = old_len_aligned - new_len_aligned;
437        addr_space
438            .unmap_range(tail_addr, tail_len)
439            .map_err(|_| SyscallError::InvalidArgument)?;
440        return Ok(old_addr);
441    }
442
443    let grow_len = new_len_aligned - old_len_aligned;
444    let grow_start = old_addr
445        .checked_add(old_len_aligned)
446        .ok_or(SyscallError::InvalidArgument)?;
447
448    if !addr_space.has_mapping_in_range(grow_start, grow_len) {
449        let grow_pages = (grow_len / page_bytes) as usize;
450        addr_space
451            .reserve_region(
452                grow_start,
453                grow_pages,
454                vma.flags,
455                vma.vma_type,
456                vma.page_size,
457            )
458            .map_err(|_| SyscallError::OutOfMemory)?;
459        return Ok(old_addr);
460    }
461
462    if flags & MREMAP_MAYMOVE == 0 {
463        return Err(SyscallError::OutOfMemory);
464    }
465
466    let has_present_pages = addr_space
467        .any_mapped_in_range(old_addr, old_len_aligned, vma.page_size)
468        .map_err(|_| SyscallError::InvalidArgument)?;
469    if has_present_pages {
470        return Err(SyscallError::OutOfMemory);
471    }
472
473    let new_pages = (new_len_aligned / page_bytes) as usize;
474    let new_addr = addr_space
475        .find_free_vma_range(MMAP_BASE, new_pages, vma.page_size)
476        .ok_or(SyscallError::OutOfMemory)?;
477
478    addr_space
479        .unmap_range(old_addr, old_len_aligned)
480        .map_err(|_| SyscallError::InvalidArgument)?;
481    addr_space
482        .reserve_region(new_addr, new_pages, vma.flags, vma.vma_type, vma.page_size)
483        .map_err(|_| SyscallError::OutOfMemory)?;
484    Ok(new_addr)
485}
486
487/// SYS_MPROTECT (104): change permissions in an existing mapping range.
488pub fn sys_mprotect(addr: u64, len: u64, prot: u64) -> Result<u64, SyscallError> {
489    if len == 0 || addr == 0 || addr & 0xFFF != 0 {
490        return Err(SyscallError::InvalidArgument);
491    }
492    let prot_u32 = u32::try_from(prot).map_err(|_| SyscallError::InvalidArgument)?;
493    if prot_u32 & !(PROT_READ | PROT_WRITE | PROT_EXEC) != 0 {
494        return Err(SyscallError::InvalidArgument);
495    }
496
497    let len_aligned = page_align_up(len);
498    if len_aligned == 0 {
499        return Err(SyscallError::InvalidArgument);
500    }
501    if addr.saturating_add(len_aligned) > USER_SPACE_END {
502        return Err(SyscallError::InvalidArgument);
503    }
504
505    let task = current_task_clone().ok_or(SyscallError::Fault)?;
506    let addr_space = task.process.address_space_arc();
507    let flags = prot_to_vma_flags(prot_u32);
508
509    addr_space
510        .protect_range(addr, len_aligned, flags)
511        .map_err(|_| SyscallError::InvalidArgument)?;
512
513    Ok(0)
514}
515
516/// SYS_MEM_REGION_EXPORT (105): export a tracked region as a public handle.
517pub fn sys_mem_region_export(addr: u64) -> Result<u64, SyscallError> {
518    let task = current_task_clone().ok_or(SyscallError::Fault)?;
519    let address_space = task.process.address_space_arc();
520    let handle_cap = crate::capability::CapId::new();
521    let resource_id = crate::memory::memory_region_registry()
522        .export_region(&address_space, addr, handle_cap)
523        .map_err(|error| match error {
524            crate::memory::RegionCapError::InvalidRegion
525            | crate::memory::RegionCapError::IncompleteRegion
526            | crate::memory::RegionCapError::InvalidAddress => SyscallError::InvalidArgument,
527            crate::memory::RegionCapError::PermissionDenied => SyscallError::PermissionDenied,
528            crate::memory::RegionCapError::OutOfMemory => SyscallError::OutOfMemory,
529            crate::memory::RegionCapError::InconsistentState => SyscallError::IoError,
530            crate::memory::RegionCapError::NotFound => SyscallError::NotFound,
531        })?;
532
533    let cap = crate::capability::Capability {
534        id: handle_cap,
535        resource_type: crate::capability::ResourceType::MemoryRegion,
536        permissions: crate::capability::CapPermissions {
537            read: true,
538            write: true,
539            execute: true,
540            grant: true,
541            revoke: true,
542        },
543        resource: resource_id as usize,
544    };
545    let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
546    Ok(cap_id.as_u64())
547}
548
549/// SYS_MEM_REGION_MAP (106): map an exported region into the caller.
550pub fn sys_mem_region_map(handle: u64, addr_hint: u64, out_ptr: u64) -> Result<u64, SyscallError> {
551    crate::silo::enforce_cap_for_current_task(handle)?;
552    if out_ptr == 0 {
553        return Err(SyscallError::Fault);
554    }
555
556    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
557    let caps = unsafe { &*task.process.capabilities.get() };
558    let cap = caps
559        .get(crate::capability::CapId::from_raw(handle))
560        .ok_or(SyscallError::BadHandle)?;
561    if cap.resource_type != crate::capability::ResourceType::MemoryRegion {
562        return Err(SyscallError::BadHandle);
563    }
564
565    let requested_flags = VmaFlags {
566        readable: cap.permissions.read,
567        writable: cap.permissions.write,
568        executable: cap.permissions.execute,
569        user_accessible: true,
570    };
571    let address_space = task.process.address_space_arc();
572    let (base, size) = crate::memory::memory_region_registry()
573        .map_region(
574            cap.resource as u64,
575            &address_space,
576            addr_hint,
577            requested_flags,
578        )
579        .map_err(|error| match error {
580            crate::memory::RegionCapError::NotFound => SyscallError::NotFound,
581            crate::memory::RegionCapError::InvalidRegion
582            | crate::memory::RegionCapError::IncompleteRegion
583            | crate::memory::RegionCapError::InvalidAddress => SyscallError::InvalidArgument,
584            crate::memory::RegionCapError::PermissionDenied => SyscallError::PermissionDenied,
585            crate::memory::RegionCapError::OutOfMemory => SyscallError::OutOfMemory,
586            crate::memory::RegionCapError::InconsistentState => SyscallError::IoError,
587        })?;
588
589    let user = crate::memory::UserSliceWrite::new(out_ptr, core::mem::size_of::<u64>())?;
590    user.copy_from(&base.to_ne_bytes());
591    Ok(size)
592}
593
594/// SYS_MEM_REGION_INFO (107): query metadata about an exported region.
595pub fn sys_mem_region_info(handle: u64, out_ptr: u64) -> Result<u64, SyscallError> {
596    crate::silo::enforce_cap_for_current_task(handle)?;
597    if out_ptr == 0 {
598        return Err(SyscallError::Fault);
599    }
600
601    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
602    let caps = unsafe { &*task.process.capabilities.get() };
603    let cap = caps
604        .get(crate::capability::CapId::from_raw(handle))
605        .ok_or(SyscallError::BadHandle)?;
606    if cap.resource_type != crate::capability::ResourceType::MemoryRegion {
607        return Err(SyscallError::BadHandle);
608    }
609
610    let info = crate::memory::memory_region_registry()
611        .info(cap.resource as u64)
612        .ok_or(SyscallError::NotFound)?;
613    let abi = MemoryRegionInfoAbi {
614        size: info.size,
615        page_size: info.page_size.bytes(),
616        flags: vma_flags_to_prot(info.flags),
617        _reserved: 0,
618    };
619    let user =
620        crate::memory::UserSliceWrite::new(out_ptr, core::mem::size_of::<MemoryRegionInfoAbi>())?;
621    let bytes = unsafe {
622        core::slice::from_raw_parts(
623            &abi as *const MemoryRegionInfoAbi as *const u8,
624            core::mem::size_of::<MemoryRegionInfoAbi>(),
625        )
626    };
627    user.copy_from(bytes);
628    Ok(0)
629}
630
631// ================================================================================
632// sys_brk
633// ================================================================================
634
635/// SYS_BRK (102): set or query the program break (top of heap).
636///
637/// Calling convention (matches Linux):
638///
639/// | `addr`          | Behaviour                                              |
640/// |-----------------|--------------------------------------------------------|
641/// | `0`             | Query : return current break unchanged.                |
642/// | `> current_brk` | Extend heap; new pages are zero-filled RW anonymous.   |
643/// | `< current_brk` | Shrink heap; backing pages are freed.                  |
644/// | `< BRK_BASE`    | Invalid : return current break unchanged (Linux compat).|
645///
646/// On any error (OOM, out-of-range) the **unchanged** break is returned rather
647/// than a negative code : this is the Linux `brk(2)` contract.
648pub fn sys_brk(addr: u64) -> Result<u64, SyscallError> {
649    let task = current_task_clone().ok_or(SyscallError::Fault)?;
650
651    //  Lazy initialisation ======================================================================================================================================================
652    // `task.process.brk == 0` means this task has never called brk.  The heap starts
653    // empty at BRK_BASE; no pages are mapped yet.
654    let current_brk = {
655        let raw = task.process.brk.load(Ordering::Relaxed);
656        if raw == 0 {
657            task.process.brk.store(BRK_BASE, Ordering::Relaxed);
658            BRK_BASE
659        } else {
660            raw
661        }
662    };
663
664    //  Query ==============================
665    if addr == 0 {
666        return Ok(current_brk);
667    }
668
669    //  Range checks ==========================================================================================================================================================================
670    // Reject attempts to move the break below the heap base or into kernel AS.
671    if addr < BRK_BASE || addr >= USER_SPACE_END {
672        return Ok(current_brk); // return unchanged (Linux behaviour)
673    }
674
675    //  Compute page-aligned extents ========================================================================================================================
676    // The heap occupies [BRK_BASE, page_align_up(current_brk)).
677    // Any bytes in the last partial page are already backed but not accounted
678    // for in the page-end calculation : they stay mapped on shrink.
679    let old_page_end = page_align_up(current_brk);
680    let new_page_end = page_align_up(addr);
681
682    if new_page_end > old_page_end {
683        //  Grow: map [old_page_end, new_page_end) ================================================================================
684        let n_pages = ((new_page_end - old_page_end) / 4096) as usize;
685        let vma_flags = VmaFlags {
686            readable: true,
687            writable: true,
688            executable: false,
689            user_accessible: true,
690        };
691        if task
692            .process
693            .address_space_arc()
694            .reserve_region(
695                old_page_end,
696                n_pages,
697                vma_flags,
698                VmaType::Anonymous,
699                crate::memory::address_space::VmaPageSize::Small,
700            )
701            .is_err()
702        {
703            // OOM : return the unchanged break (Linux behaviour).
704            return Ok(current_brk);
705        }
706        log::trace!(
707            "sys_brk: grow {:#x}..{:#x} ({} pages, lazy)",
708            old_page_end,
709            new_page_end,
710            n_pages,
711        );
712    } else if new_page_end < old_page_end {
713        //  Shrink: unmap [new_page_end, old_page_end) ============================================================
714        let len = old_page_end - new_page_end;
715        if task
716            .process
717            .address_space_arc()
718            .unmap_range(new_page_end, len)
719            .is_err()
720        {
721            return Ok(current_brk);
722        }
723        log::trace!(
724            "sys_brk: shrink {:#x}..{:#x} (-{} pages)",
725            new_page_end,
726            old_page_end,
727            len / 4096,
728        );
729    }
730    // If new_page_end == old_page_end, only the sub-page byte offset changed;
731    // no page-table operations are needed.
732
733    //  Commit the new exact-byte program break ==========================================================================================
734    task.process.brk.store(addr, Ordering::Relaxed);
735    Ok(addr)
736}