Skip to main content

strat9_kernel/memory/
address_space.rs

1//! Per-process address spaces for Strat9-OS.
2//!
3//! Each task owns an `AddressSpace` backed by a PML4 page table.
4//! Kernel tasks share a single kernel address space. User tasks get a fresh
5//! PML4 with the kernel half (entries 256..512) cloned from the kernel's table.
6//!
7//! PML4 is the Page Map Level 4, the top-level page table in x86_64's 4-level paging scheme. It
8//! contains 512 entries, each covering a 512 GiB region of the virtual address space. By cloning the
9//! kernel half of the PML4, each user address space automatically shares the kernel mappings without needing to duplicate them.
10//! Source : https://wiki.osdev.org/Memory_Management
11//!
12//!
13//! x86_64 virtual address space layout:
14//!   - PML4[0..256]   => user space (per-process, zeroed for new AS)
15//!   - PML4[256..512] => kernel space (shared, cloned from kernel L4)
16//!
17
18use alloc::{collections::BTreeMap, sync::Arc, vec::Vec};
19use core::sync::atomic::{AtomicU32, Ordering};
20
21use spin::Once;
22use x86_64::{
23    registers::control::{Cr3, Cr3Flags},
24    structures::paging::{
25        mapper::TranslateResult, Mapper, OffsetPageTable, Page, PageTable, PageTableFlags,
26        PhysFrame as X86PhysFrame, Size2MiB, Size4KiB, Translate,
27    },
28    PhysAddr, VirtAddr,
29};
30
31use crate::{
32    capability::CapId,
33    memory::{
34        allocate_mapping_cap_id, mapping_index, paging::BuddyFrameAllocator, release_owned_block,
35        resolve_handle, try_register_mapping_identity, unregister_mapping_identity, BlockHandle,
36        MappingRef,
37    },
38    process::task::Pid,
39    sync::SpinLock,
40};
41
42/// Flags describing permissions for a virtual memory region.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub struct VmaFlags {
45    pub readable: bool,
46    pub writable: bool,
47    pub executable: bool,
48    pub user_accessible: bool,
49}
50
51impl VmaFlags {
52    /// Convert to x86_64 page table flags.
53    pub fn to_page_flags(self) -> PageTableFlags {
54        let mut flags = PageTableFlags::PRESENT;
55        if self.writable {
56            flags |= PageTableFlags::WRITABLE;
57        }
58        if !self.executable {
59            flags |= PageTableFlags::NO_EXECUTE;
60        }
61        if self.user_accessible {
62            flags |= PageTableFlags::USER_ACCESSIBLE;
63        }
64        flags
65    }
66}
67
68/// Type/purpose of a virtual memory region.
69#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub enum VmaType {
71    /// Zero-filled anonymous memory (heap, mmap).
72    Anonymous,
73    /// Stack region (grows downward).
74    Stack,
75    /// Code/text segment (typically RX).
76    Code,
77    /// Kernel-internal mapping.
78    Kernel,
79}
80
81/// Supported page sizes for VMAs.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum VmaPageSize {
84    /// Standard 4 KiB page.
85    Small,
86    /// Huge 2 MiB page.
87    Huge,
88}
89
90impl VmaPageSize {
91    /// Performs the bytes operation.
92    pub fn bytes(self) -> u64 {
93        match self {
94            VmaPageSize::Small => 4096,
95            VmaPageSize::Huge => 2 * 1024 * 1024,
96        }
97    }
98}
99
100/// A tracked virtual memory region within an address space.
101#[derive(Debug, Clone)]
102pub struct VirtualMemoryRegion {
103    /// Start virtual address (page-aligned).
104    pub start: u64,
105    /// Number of pages in this region (size depends on `page_size`).
106    pub page_count: usize,
107    /// Access permissions.
108    pub flags: VmaFlags,
109    /// Purpose of this region.
110    pub vma_type: VmaType,
111    /// Size of each page in this region.
112    pub page_size: VmaPageSize,
113}
114
115/// An effective mapping currently installed in the page tables.
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117pub struct EffectiveMapping {
118    /// Start virtual address of the mapping.
119    pub start: u64,
120    /// Internal capability identifier associated with this mapping.
121    pub cap_id: CapId,
122    /// Physical block currently backing this mapping.
123    pub handle: BlockHandle,
124    /// Hardware page-table flags currently installed for this mapping.
125    pub flags: PageTableFlags,
126    /// Page size of the mapping.
127    pub page_size: VmaPageSize,
128}
129
130/// A per-process address space backed by a PML4 page table.
131///
132/// Kernel tasks share a single `AddressSpace` (the kernel AS).
133/// User tasks each get their own, with kernel entries (PML4[256..512]) cloned
134/// so that the kernel is always mapped regardless of which AS is active.
135pub struct AddressSpace {
136    /// Physical address of the PML4 table (loaded into CR3).
137    cr3_phys: PhysAddr,
138    /// Virtual address of the PML4 table (via HHDM, for reading/modifying).
139    l4_table_virt: VirtAddr,
140    /// Whether this is the kernel address space (never freed).
141    is_kernel: bool,
142    /// Tracked virtual memory regions (key = start address).
143    regions: SpinLock<BTreeMap<u64, VirtualMemoryRegion>>,
144    /// Tracked effective mappings (key = mapping start address).
145    effective_mappings: SpinLock<BTreeMap<u64, EffectiveMapping>>,
146    /// Process identifier owning this address space, when bound to a process.
147    owner_pid: AtomicU32,
148}
149
150// SAFETY: AddressSpace is protected by the scheduler lock and per-task ownership.
151// The PML4 table is accessed through HHDM virtual addresses which are valid on all CPUs.
152unsafe impl Send for AddressSpace {}
153unsafe impl Sync for AddressSpace {}
154
155impl AddressSpace {
156    /// Create the kernel address space by wrapping the current (boot) CR3.
157    ///
158    /// # Safety
159    /// Must be called exactly once, during single-threaded init, after paging is initialized.
160    pub unsafe fn new_kernel() -> Self {
161        let (level_4_frame, _flags) = Cr3::read();
162        let cr3_phys = level_4_frame.start_address();
163        let l4_table_virt = VirtAddr::new(crate::memory::phys_to_virt(cr3_phys.as_u64()));
164
165        log::info!(
166            "Kernel address space initialized: CR3={:#x}",
167            cr3_phys.as_u64()
168        );
169
170        AddressSpace {
171            cr3_phys,
172            l4_table_virt,
173            is_kernel: true,
174            regions: SpinLock::new(BTreeMap::new()),
175            effective_mappings: SpinLock::new(BTreeMap::new()),
176            owner_pid: AtomicU32::new(0),
177        }
178    }
179
180    /// Create a new user address space with the kernel half cloned.
181    ///
182    /// Allocates a fresh PML4 frame, zeroes it, then copies entries 256..512
183    /// from the kernel PML4. This shares the kernel's L3/L2/L1 subtrees so
184    /// kernel mapping changes propagate automatically.
185    pub fn new_user() -> Result<Self, &'static str> {
186        // Allocate a frame for the new PML4 table.
187        let new_l4_phys =
188            crate::sync::with_irqs_disabled(|token| crate::memory::allocate_frame(token))
189                .map_err(|_| "Failed to allocate PML4 frame")?
190                .start_address;
191
192        let new_l4_virt = VirtAddr::new(crate::memory::phys_to_virt(new_l4_phys.as_u64()));
193
194        // Zero the entire table first (clears user-half entries 0..256).
195        // SAFETY: new_l4_virt points to a freshly allocated, HHDM-mapped frame.
196        unsafe {
197            core::ptr::write_bytes(new_l4_virt.as_mut_ptr::<u8>(), 0, 4096);
198        }
199
200        // Clone kernel entries (PML4[256..512]) from the kernel's L4 table.
201        let kernel_l4_phys = crate::memory::paging::kernel_l4_phys();
202        let kernel_l4_virt = VirtAddr::new(crate::memory::phys_to_virt(kernel_l4_phys.as_u64()));
203
204        // SAFETY: Both pointers are valid HHDM-mapped page tables. We only read
205        // from the kernel table and write to the freshly allocated table.
206        unsafe {
207            let kernel_l4 = &*(kernel_l4_virt.as_ptr::<PageTable>());
208            let new_l4 = &mut *(new_l4_virt.as_mut_ptr::<PageTable>());
209            for i in 256..512 {
210                new_l4[i] = kernel_l4[i].clone();
211            }
212        }
213
214        // ---------- LAPIC low-half mapping (HHDM=0 workaround) ----------
215        //
216        // When Limine provides a non-zero HHDM offset the LAPIC is mapped in
217        // PML4[256..512] (kernel half) and is already shared above.
218        //
219        // When HHDM=0 the LAPIC is identity-mapped at its physical address
220        // (0xFEE00000) in the low half (PML4[0]).  Every Ring-0 interrupt
221        // handler calls apic::eoi() which writes to this address.  If the
222        // handler fires while a user CR3 is active the write faults because
223        // PML4[0] is absent in the user page tables.
224        //
225        // Fix: map just the LAPIC 4KiB MMIO page into every new user AS using
226        // a fresh private L3/L2/L1 hierarchy (no sharing with the kernel's
227        // page table subtrees at the LAPIC virtual address).
228        {
229            let lapic_phys = crate::arch::x86_64::apic::lapic_phys();
230            if lapic_phys != 0 {
231                let lapic_virt = crate::memory::phys_to_virt(lapic_phys);
232                // Only needed when LAPIC is in the low half.
233                if lapic_virt < 0xFFFF_8000_0000_0000 {
234                    let phys_offset = VirtAddr::new(crate::memory::hhdm_offset());
235                    // SAFETY: new_l4_virt is the freshly allocated user PML4.
236                    let l4 = unsafe { &mut *new_l4_virt.as_mut_ptr::<PageTable>() };
237                    let mut mapper = unsafe { OffsetPageTable::new(l4, phys_offset) };
238                    let mut buddy = crate::memory::paging::BuddyFrameAllocator;
239                    let mmio_flags = PageTableFlags::PRESENT
240                        | PageTableFlags::WRITABLE
241                        | PageTableFlags::NO_CACHE;
242                    let lapic_page =
243                        Page::<Size4KiB>::containing_address(VirtAddr::new(lapic_virt));
244                    let lapic_frame =
245                        X86PhysFrame::<Size4KiB>::containing_address(PhysAddr::new(lapic_phys));
246                    // Use map_to_with_table_flags to avoid USER_ACCESSIBLE on
247                    // intermediate tables so user code cannot reach LAPIC MMIO.
248                    match unsafe { mapper.map_to(lapic_page, lapic_frame, mmio_flags, &mut buddy) }
249                    {
250                        Ok(flush) => flush.flush(),
251                        Err(e) => {
252                            crate::serial_println!(
253                                "[as] WARN: failed to map LAPIC ({:#x}) in user AS: {:?}",
254                                lapic_phys,
255                                e
256                            );
257                        }
258                    }
259                }
260            }
261        }
262
263        log::debug!(
264            "User address space created: CR3={:#x} (kernel entries cloned from {:#x})",
265            new_l4_phys.as_u64(),
266            kernel_l4_phys.as_u64()
267        );
268
269        Ok(AddressSpace {
270            cr3_phys: new_l4_phys,
271            l4_table_virt: new_l4_virt,
272            is_kernel: false,
273            regions: SpinLock::new(BTreeMap::new()),
274            effective_mappings: SpinLock::new(BTreeMap::new()),
275            owner_pid: AtomicU32::new(0),
276        })
277    }
278
279    /// Registers an effective mapping in the address space tracking table.
280    pub fn register_effective_mapping(
281        &self,
282        mapping: EffectiveMapping,
283    ) -> Result<(), &'static str> {
284        let previous_at_start = self.effective_mapping_by_start(mapping.start);
285        if let Some(previous) = previous_at_start {
286            if previous.handle == mapping.handle && previous.cap_id == mapping.cap_id {
287                self.effective_mappings
288                    .lock()
289                    .insert(mapping.start, mapping);
290                if let Some(pid) = self.owner_pid() {
291                    mapping_index().unregister(mapping.cap_id, pid, VirtAddr::new(mapping.start));
292                    mapping_index().register(
293                        mapping.cap_id,
294                        MappingRef {
295                            pid,
296                            vaddr: VirtAddr::new(mapping.start),
297                            page_size: mapping.page_size,
298                        },
299                    );
300                }
301                return Ok(());
302            }
303        }
304
305        if let Err(error) = try_register_mapping_identity(mapping.handle, mapping.cap_id) {
306            if error != crate::memory::OwnerError::CapAlreadyPresent {
307                log::warn!(
308                    "memory: failed to register effective mapping identity cap={} block={:#x}/{} vaddr={:#x}: {:?}",
309                    mapping.cap_id.as_u64(),
310                    mapping.handle.base.as_u64(),
311                    mapping.handle.order,
312                    mapping.start,
313                    error
314                );
315                return Err("Failed to register effective mapping identity");
316            }
317        }
318
319        let replaced = self
320            .effective_mappings
321            .lock()
322            .insert(mapping.start, mapping);
323        if let Some(previous) = replaced {
324            if let Some(block) = unregister_mapping_identity(previous.handle, previous.cap_id) {
325                release_owned_block(block);
326            }
327            if let Some(pid) = self.owner_pid() {
328                mapping_index().unregister(previous.cap_id, pid, VirtAddr::new(previous.start));
329            }
330        }
331
332        if let Some(pid) = self.owner_pid() {
333            mapping_index().register(
334                mapping.cap_id,
335                MappingRef {
336                    pid,
337                    vaddr: VirtAddr::new(mapping.start),
338                    page_size: mapping.page_size,
339                },
340            );
341        }
342        Ok(())
343    }
344
345    /// Removes an effective mapping from the address space tracking table.
346    pub fn unregister_effective_mapping(&self, start: u64) -> Option<EffectiveMapping> {
347        let mapping = self.effective_mappings.lock().remove(&start);
348        if let Some(mapping) = mapping {
349            if let Some(block) = unregister_mapping_identity(mapping.handle, mapping.cap_id) {
350                release_owned_block(block);
351            }
352            if let Some(pid) = self.owner_pid() {
353                mapping_index().unregister(mapping.cap_id, pid, VirtAddr::new(mapping.start));
354            }
355            Some(mapping)
356        } else {
357            None
358        }
359    }
360
361    /// Updates the hardware flags recorded for an effective mapping.
362    pub fn update_effective_mapping_flags(&self, start: u64, flags: PageTableFlags) -> bool {
363        if let Some(mapping) = self.effective_mappings.lock().get_mut(&start) {
364            mapping.flags = flags;
365            true
366        } else {
367            false
368        }
369    }
370
371    /// Returns the effective mapping that starts exactly at `start`.
372    pub fn effective_mapping_by_start(&self, start: u64) -> Option<EffectiveMapping> {
373        self.effective_mappings.lock().get(&start).copied()
374    }
375
376    /// Unmaps the effective mapping that starts at `start`.
377    pub fn unmap_effective_mapping(&self, start: u64) -> Result<(), &'static str> {
378        let mapping = self
379            .effective_mapping_by_start(start)
380            .ok_or("Mapping not found")?;
381        self.unmap_range(start, mapping.page_size.bytes())
382    }
383
384    /// Returns the effective mapping covering `addr`, if any.
385    pub fn effective_mapping_containing(&self, addr: u64) -> Option<EffectiveMapping> {
386        let mappings = self.effective_mappings.lock();
387        if let Some(mapping) = mappings.get(&(addr & !(VmaPageSize::Small.bytes() - 1))) {
388            if mapping.page_size == VmaPageSize::Small {
389                return Some(*mapping);
390            }
391        }
392        mappings
393            .get(&(addr & !(VmaPageSize::Huge.bytes() - 1)))
394            .copied()
395    }
396
397    /// Binds this address space to the given process identifier.
398    pub fn set_owner_pid(&self, pid: Pid) {
399        let previous = self.owner_pid.swap(pid, Ordering::Relaxed);
400        let mappings: Vec<EffectiveMapping> = {
401            let guard = self.effective_mappings.lock();
402            guard.values().copied().collect()
403        };
404
405        if previous != 0 && previous != pid {
406            for mapping in mappings.iter().copied() {
407                mapping_index().unregister(mapping.cap_id, previous, VirtAddr::new(mapping.start));
408            }
409        }
410
411        if pid != 0 {
412            for mapping in mappings {
413                mapping_index().register(
414                    mapping.cap_id,
415                    MappingRef {
416                        pid,
417                        vaddr: VirtAddr::new(mapping.start),
418                        page_size: mapping.page_size,
419                    },
420                );
421            }
422        }
423    }
424
425    /// Returns the owning process identifier, if one has been assigned.
426    pub fn owner_pid(&self) -> Option<Pid> {
427        match self.owner_pid.load(Ordering::Relaxed) {
428            0 => None,
429            pid => Some(pid),
430        }
431    }
432
433    /// Construct a temporary `OffsetPageTable` mapper for this address space.
434    ///
435    /// # Safety
436    /// The caller must ensure exclusive access to the page tables (e.g. via
437    /// the scheduler lock or single-threaded context).
438    pub(crate) unsafe fn mapper(&self) -> OffsetPageTable<'_> {
439        let phys_offset = VirtAddr::new(crate::memory::hhdm_offset());
440        // SAFETY: l4_table_virt is the HHDM-mapped address of our PML4.
441        // The caller guarantees exclusive access.
442        unsafe {
443            OffsetPageTable::new(
444                &mut *self.l4_table_virt.as_mut_ptr::<PageTable>(),
445                phys_offset,
446            )
447        }
448    }
449
450    /// Reserve a contiguous region of virtual pages without allocating physical frames.
451    ///
452    /// The pages will be mapped lazily during page faults (Demand Paging).
453    pub fn reserve_region(
454        &self,
455        start: u64,
456        page_count: usize,
457        flags: VmaFlags,
458        vma_type: VmaType,
459        page_size: VmaPageSize,
460    ) -> Result<(), &'static str> {
461        let page_bytes = page_size.bytes();
462        if page_count == 0 || start % page_bytes != 0 {
463            return Err("Invalid region arguments");
464        }
465        let len = (page_count as u64)
466            .checked_mul(page_bytes)
467            .ok_or("Region length overflow")?;
468        let end = start.checked_add(len).ok_or("Region end overflow")?;
469        const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
470        if end > USER_SPACE_END {
471            return Err("Region out of user-space range");
472        }
473
474        // Reject overlapping VMAs
475        {
476            let regions = self.regions.lock();
477            if regions.iter().any(|(&vma_start, vma)| {
478                let vma_end = vma_start
479                    .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
480                vma_start < end && vma_end > start
481            }) {
482                return Err("Region overlaps existing mapping");
483            }
484        }
485
486        // Enforce per-silo memory quota (best effort; non-silo tasks are ignored).
487        crate::silo::charge_current_task_memory(len).map_err(|_| "Silo memory quota exceeded")?;
488
489        // Track the region, attempting to merge with previous.
490        let mut regions = self.regions.lock();
491        let mut merged = false;
492
493        if let Some((&prev_start, prev_vma)) = regions.range(..start).next_back() {
494            let prev_end = prev_start + (prev_vma.page_count as u64) * prev_vma.page_size.bytes();
495            if prev_end == start
496                && prev_vma.flags == flags
497                && prev_vma.vma_type == vma_type
498                && prev_vma.page_size == page_size
499            {
500                let new_count = prev_vma
501                    .page_count
502                    .checked_add(page_count)
503                    .ok_or("Region page_count overflow")?;
504                let updated_vma = VirtualMemoryRegion {
505                    start: prev_start,
506                    page_count: new_count,
507                    flags,
508                    vma_type,
509                    page_size,
510                };
511                regions.insert(prev_start, updated_vma);
512                merged = true;
513            }
514        }
515
516        if !merged {
517            let region = VirtualMemoryRegion {
518                start,
519                page_count,
520                flags,
521                vma_type,
522                page_size,
523            };
524            regions.insert(start, region);
525        }
526
527        log::trace!(
528            "Reserved lazy region: {:#x} ({} pages, size={:?})",
529            start,
530            page_count,
531            page_size
532        );
533        Ok(())
534    }
535
536    /// Handle a page fault by checking if the address falls within a reserved VMA.
537    ///
538    /// If it does, allocates a physical frame and maps it.
539    pub fn handle_fault(&self, fault_addr: u64) -> Result<(), &'static str> {
540        use x86_64::structures::paging::mapper::MapToError;
541
542        // 1. Find the VMA covering this address
543        let vma = {
544            let regions = self.regions.lock();
545            let mut iter = regions.range(..=fault_addr);
546            let (&start, vma) = iter.next_back().ok_or("No VMA found for address")?;
547            let end = start + (vma.page_count as u64) * vma.page_size.bytes();
548            if fault_addr >= end {
549                return Err("Address outside VMA bounds");
550            }
551            vma.clone()
552        };
553
554        // Align fault address to the page size used by this VMA.
555        let page_bytes = vma.page_size.bytes();
556        let page_addr = fault_addr & !(page_bytes - 1);
557
558        // 2. Only Anonymous/Stack regions support demand paging for now
559        match vma.vma_type {
560            VmaType::Anonymous | VmaType::Stack | VmaType::Code => {}
561            _ => return Err("VMA type does not support demand paging"),
562        }
563
564        // 3. If already mapped (race/re-fault), treat as handled.
565        if self.translate(VirtAddr::new(page_addr)).is_some() {
566            return Ok(());
567        }
568
569        // 4. Allocate and map a single page of the required size.
570        //
571        // IMPORTANT: `allocate_frame` (order-0) now goes through
572        // `FrameAllocOptions::new()` which zeroes by default.  For order > 0
573        // (huge pages) we still need a manual zero via the HHDM.
574        //
575        // The zero MUST go through phys_to_virt (HHDM), NOT through the user
576        // virtual address, because the user address space is not necessarily
577        // the currently active CR3.  Writing through `page_addr as *mut u8`
578        // would either write into a different process's memory or fault.
579        let mut frame_allocator = crate::memory::paging::BuddyFrameAllocator;
580        let order = match vma.page_size {
581            VmaPageSize::Small => 0,
582            VmaPageSize::Huge => 9,
583        };
584
585        let frame = crate::sync::with_irqs_disabled(|token| {
586            if order == 0 {
587                crate::memory::allocate_frame(token)
588            } else {
589                let f = crate::memory::allocate_phys_contiguous(token, order)?;
590                // SAFETY: phys_to_virt gives a valid HHDM pointer for this
591                // frame; we have exclusive ownership from the buddy allocator.
592                unsafe {
593                    core::ptr::write_bytes(
594                        crate::memory::phys_to_virt(f.start_address.as_u64()) as *mut u8,
595                        0,
596                        page_bytes as usize,
597                    );
598                }
599                Ok(f)
600            }
601        })
602        .map_err(|_| "OOM during demand paging")?;
603
604        let mut page_flags = vma.flags.to_page_flags();
605
606        // SAFETY: We own the address space.
607        unsafe {
608            let mut mapper = self.mapper();
609            match vma.page_size {
610                VmaPageSize::Small => {
611                    let page =
612                        Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr)).unwrap();
613                    let phys_frame =
614                        x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(
615                            frame.start_address,
616                        );
617                    match mapper.map_to(page, phys_frame, page_flags, &mut frame_allocator) {
618                        Ok(flush) => {
619                            flush.flush();
620                        }
621                        Err(MapToError::PageAlreadyMapped(_)) => {
622                            crate::sync::with_irqs_disabled(|token| {
623                                crate::memory::free_phys_contiguous(token, frame, order);
624                            });
625                            return Ok(());
626                        }
627                        Err(_) => {
628                            crate::sync::with_irqs_disabled(|token| {
629                                crate::memory::free_phys_contiguous(token, frame, order);
630                            });
631                            return Err("Failed to map demand page (4K)");
632                        }
633                    }
634                }
635                VmaPageSize::Huge => {
636                    let page =
637                        Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr)).unwrap();
638                    let phys_frame =
639                        x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(
640                            frame.start_address,
641                        );
642                    page_flags |= PageTableFlags::HUGE_PAGE;
643                    match mapper.map_to(page, phys_frame, page_flags, &mut frame_allocator) {
644                        Ok(flush) => {
645                            flush.flush();
646                        }
647                        Err(MapToError::PageAlreadyMapped(_)) => {
648                            crate::sync::with_irqs_disabled(|token| {
649                                crate::memory::free_phys_contiguous(token, frame, order);
650                            });
651                            return Ok(());
652                        }
653                        Err(_) => {
654                            crate::sync::with_irqs_disabled(|token| {
655                                crate::memory::free_phys_contiguous(token, frame, order);
656                            });
657                            return Err("Failed to map demand page (2M)");
658                        }
659                    }
660                }
661            }
662        }
663
664        if self
665            .register_effective_mapping(EffectiveMapping {
666                start: page_addr,
667                cap_id: allocate_mapping_cap_id(),
668                handle: resolve_handle(frame.start_address),
669                flags: page_flags,
670                page_size: vma.page_size,
671            })
672            .is_err()
673        {
674            unsafe {
675                let mut mapper = self.mapper();
676                match vma.page_size {
677                    VmaPageSize::Small => {
678                        let page =
679                            Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr)).unwrap();
680                        if let Ok((_, flush)) = mapper.unmap(page) {
681                            flush.flush();
682                        }
683                    }
684                    VmaPageSize::Huge => {
685                        let page =
686                            Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr)).unwrap();
687                        if let Ok((_, flush)) = mapper.unmap(page) {
688                            flush.flush();
689                        }
690                    }
691                }
692            }
693            crate::sync::with_irqs_disabled(|token| {
694                crate::memory::free_phys_contiguous(token, frame, order);
695            });
696            return Err("Failed to track demand page mapping");
697        }
698
699        // Initialize COW refcount.
700        //
701        // Order-0 frames come from FrameAllocOptions which stamps refcount=1
702        // via CAS(REFCOUNT_UNUSED → 1) : the frame is already "sole owner".
703        // Huge pages (order > 0) are raw-allocated with REFCOUNT_UNUSED still
704        // in the metadata; initialise explicitly to 1 here.
705        //
706        // Do NOT call frame_inc_ref for fresh allocations: that would push the
707        // count to 2, breaking the COW semantics (refcount==1 means sole owner).
708        // frame_inc_ref is correct only when sharing an existing frame (fork).
709        if order != 0 {
710            crate::memory::cow::handle_init_ref(resolve_handle(frame.start_address));
711        }
712
713        Ok(())
714    }
715
716    /// Map a contiguous region of pages backed by newly allocated physical frames.
717    ///
718    /// Frames are allocated from the buddy allocator and zero-filled.
719    /// The region is tracked in the VMA list.
720    pub fn map_region(
721        &self,
722        start: u64,
723        page_count: usize,
724        flags: VmaFlags,
725        vma_type: VmaType,
726        page_size: VmaPageSize,
727    ) -> Result<(), &'static str> {
728        let page_bytes = page_size.bytes();
729        if page_count == 0 || start % page_bytes != 0 {
730            return Err("Invalid region arguments");
731        }
732        let len = (page_count as u64)
733            .checked_mul(page_bytes)
734            .ok_or("Region length overflow")?;
735        let end = start.checked_add(len).ok_or("Region end overflow")?;
736        const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
737        if end > USER_SPACE_END {
738            return Err("Region out of user-space range");
739        }
740
741        // Reject overlapping VMAs early
742        {
743            let regions = self.regions.lock();
744            if regions.iter().any(|(&vma_start, vma)| {
745                let vma_end = vma_start
746                    .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
747                vma_start < end && vma_end > start
748            }) {
749                return Err("Region overlaps existing mapping");
750            }
751        }
752
753        // Enforce per-silo memory quota for eagerly mapped regions.
754        crate::silo::charge_current_task_memory(len).map_err(|_| "Silo memory quota exceeded")?;
755
756        let page_flags = flags.to_page_flags();
757        let mut frame_allocator = BuddyFrameAllocator;
758
759        // SAFETY: we have logical ownership of this address space.
760        let mut mapper = unsafe { self.mapper() };
761        let mut mapped_pages = 0usize;
762
763        for i in 0..page_count {
764            let page_addr = start
765                .checked_add((i as u64).saturating_mul(page_bytes))
766                .ok_or("Page address overflow")?;
767
768            // Allocate a physical frame of appropriate size.
769            //
770            // order-0 frames go through FrameAllocOptions (zeroed + metadata
771            // stamped).  order > 0 (huge pages) are zeroed manually via HHDM.
772            let order = match page_size {
773                VmaPageSize::Small => 0,
774                VmaPageSize::Huge => 9,
775            };
776
777            let frame = crate::sync::with_irqs_disabled(|token| {
778                if order == 0 {
779                    crate::memory::allocate_frame(token)
780                } else {
781                    let f = crate::memory::allocate_phys_contiguous(token, order)?;
782                    unsafe {
783                        let virt = crate::memory::phys_to_virt(f.start_address.as_u64());
784                        core::ptr::write_bytes(virt as *mut u8, 0, page_bytes as usize);
785                    }
786                    Ok(f)
787                }
788            })
789            .map_err(|_| "Failed to allocate frame")?;
790
791            // Map the page.
792            let map_ok = match page_size {
793                VmaPageSize::Small => {
794                    use x86_64::structures::paging::Size4KiB;
795                    let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
796                        .map_err(|_| "Map 4K: invalid page address")?;
797                    let phys_frame =
798                        x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(
799                            frame.start_address,
800                        );
801                    unsafe {
802                        mapper
803                            .map_to(page, phys_frame, page_flags, &mut frame_allocator)
804                            .map(|flush| flush.flush())
805                            .is_ok()
806                    }
807                }
808                VmaPageSize::Huge => {
809                    use x86_64::structures::paging::Size2MiB;
810                    let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
811                        .map_err(|_| "Map 2M: invalid page address")?;
812                    let phys_frame =
813                        x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(
814                            frame.start_address,
815                        );
816                    let mut huge_flags = page_flags;
817                    huge_flags |= PageTableFlags::HUGE_PAGE;
818                    unsafe {
819                        mapper
820                            .map_to(page, phys_frame, huge_flags, &mut frame_allocator)
821                            .map(|flush| flush.flush())
822                            .is_ok()
823                    }
824                }
825            };
826
827            if !map_ok {
828                log::error!(
829                    "map_region: map_to failed at page {} vaddr={:#x} size={:?}",
830                    i,
831                    page_addr,
832                    page_size
833                );
834                // Free frame for this page that failed to map.
835                crate::sync::with_irqs_disabled(|token| {
836                    crate::memory::free_phys_contiguous(token, frame, order);
837                });
838
839                // Roll back already mapped pages to keep state consistent.
840                for j in (0..mapped_pages).rev() {
841                    let rb_addr = start + (j as u64) * page_bytes;
842                    match page_size {
843                        VmaPageSize::Small => {
844                            use x86_64::structures::paging::Size4KiB;
845                            let rb_page =
846                                Page::<Size4KiB>::from_start_address(VirtAddr::new(rb_addr))
847                                    .map_err(|_| "Rollback: invalid 4K page address")?;
848                            if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
849                                rb_flush.flush();
850                                let _ = self.unregister_effective_mapping(rb_addr);
851                            }
852                        }
853                        VmaPageSize::Huge => {
854                            use x86_64::structures::paging::Size2MiB;
855                            let rb_page =
856                                Page::<Size2MiB>::from_start_address(VirtAddr::new(rb_addr))
857                                    .map_err(|_| "Rollback: invalid 2M page address")?;
858                            if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
859                                rb_flush.flush();
860                                let _ = self.unregister_effective_mapping(rb_addr);
861                            }
862                        }
863                    }
864                }
865
866                crate::silo::release_current_task_memory(len);
867                return Err("Failed to map page");
868            }
869
870            // Initialize COW refcount (same logic as demand_page above).
871            let effective_flags = match page_size {
872                VmaPageSize::Small => page_flags,
873                VmaPageSize::Huge => page_flags | PageTableFlags::HUGE_PAGE,
874            };
875            if self
876                .register_effective_mapping(EffectiveMapping {
877                    start: page_addr,
878                    cap_id: allocate_mapping_cap_id(),
879                    handle: resolve_handle(frame.start_address),
880                    flags: effective_flags,
881                    page_size,
882                })
883                .is_err()
884            {
885                match page_size {
886                    VmaPageSize::Small => {
887                        use x86_64::structures::paging::Size4KiB;
888                        let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
889                            .map_err(|_| "Rollback: invalid 4K page address")?;
890                        if let Ok((_, flush)) = mapper.unmap(page) {
891                            flush.flush();
892                        }
893                    }
894                    VmaPageSize::Huge => {
895                        use x86_64::structures::paging::Size2MiB;
896                        let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
897                            .map_err(|_| "Rollback: invalid 2M page address")?;
898                        if let Ok((_, flush)) = mapper.unmap(page) {
899                            flush.flush();
900                        }
901                    }
902                }
903                crate::sync::with_irqs_disabled(|token| {
904                    crate::memory::free_phys_contiguous(token, frame, order);
905                });
906                for j in (0..mapped_pages).rev() {
907                    let rb_addr = start + (j as u64) * page_bytes;
908                    match page_size {
909                        VmaPageSize::Small => {
910                            use x86_64::structures::paging::Size4KiB;
911                            let rb_page =
912                                Page::<Size4KiB>::from_start_address(VirtAddr::new(rb_addr))
913                                    .map_err(|_| "Rollback: invalid 4K page address")?;
914                            if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
915                                rb_flush.flush();
916                                let _ = self.unregister_effective_mapping(rb_addr);
917                            }
918                        }
919                        VmaPageSize::Huge => {
920                            use x86_64::structures::paging::Size2MiB;
921                            let rb_page =
922                                Page::<Size2MiB>::from_start_address(VirtAddr::new(rb_addr))
923                                    .map_err(|_| "Rollback: invalid 2M page address")?;
924                            if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
925                                rb_flush.flush();
926                                let _ = self.unregister_effective_mapping(rb_addr);
927                            }
928                        }
929                    }
930                }
931                crate::silo::release_current_task_memory(len);
932                return Err("Failed to track mapped region page");
933            }
934
935            mapped_pages += 1;
936        }
937
938        // Track the region
939        let mut regions = self.regions.lock();
940        let region = VirtualMemoryRegion {
941            start,
942            page_count,
943            flags,
944            vma_type,
945            page_size,
946        };
947        regions.insert(start, region);
948
949        let end = start + (page_count as u64) * page_bytes;
950        crate::trace_mem!(
951            crate::trace::category::MEM_MAP,
952            crate::trace::TraceKind::MemMap,
953            page_size.bytes(),
954            crate::trace::TraceTaskCtx {
955                task_id: 0,
956                pid: 0,
957                tid: 0,
958                cr3: self.cr3_phys.as_u64(),
959            },
960            0,
961            start,
962            end,
963            page_count as u64
964        );
965
966        Ok(())
967    }
968
969    /// Maps shared frames.
970    pub fn map_shared_frames(
971        &self,
972        start: u64,
973        frame_phys_addrs: &[u64],
974        flags: VmaFlags,
975        vma_type: VmaType,
976    ) -> Result<(), &'static str> {
977        self.map_shared_frames_with_cap_ids(start, frame_phys_addrs, None, flags, vma_type)
978    }
979
980    /// Maps shared physical blocks with optional stable mapping identities.
981    pub fn map_shared_handles_with_cap_ids(
982        &self,
983        start: u64,
984        handles: &[BlockHandle],
985        mapping_cap_ids: Option<&[CapId]>,
986        flags: VmaFlags,
987        vma_type: VmaType,
988        page_size: VmaPageSize,
989    ) -> Result<(), &'static str> {
990        let page_count = handles.len();
991        let page_bytes = page_size.bytes();
992        if page_count == 0 || start % page_bytes != 0 {
993            return Err("Invalid shared region arguments");
994        }
995        if mapping_cap_ids.is_some_and(|cap_ids| cap_ids.len() != page_count) {
996            return Err("Shared mapping identity count mismatch");
997        }
998        let len = (page_count as u64)
999            .checked_mul(page_bytes)
1000            .ok_or("Shared region length overflow")?;
1001        let end = start.checked_add(len).ok_or("Shared region end overflow")?;
1002        const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
1003        if end > USER_SPACE_END {
1004            return Err("Shared region out of user-space range");
1005        }
1006
1007        {
1008            let regions = self.regions.lock();
1009            if regions.iter().any(|(&vma_start, vma)| {
1010                let vma_end = vma_start
1011                    .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
1012                vma_start < end && vma_end > start
1013            }) {
1014                return Err("Shared region overlaps existing mapping");
1015            }
1016        }
1017
1018        let mut page_flags = flags.to_page_flags();
1019        if page_size == VmaPageSize::Huge {
1020            page_flags |= PageTableFlags::HUGE_PAGE;
1021        }
1022        let mut frame_allocator = BuddyFrameAllocator;
1023        let mut mapper = unsafe { self.mapper() };
1024        let mut mapped_pages = 0usize;
1025
1026        for (index, handle) in handles.iter().copied().enumerate() {
1027            let page_addr = start
1028                .checked_add((index as u64) * page_bytes)
1029                .ok_or("Shared page address overflow")?;
1030
1031            let map_ok = match page_size {
1032                VmaPageSize::Small => {
1033                    let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
1034                        .map_err(|_| "Map shared: invalid 4K page address")?;
1035                    let frame = X86PhysFrame::<Size4KiB>::containing_address(handle.base);
1036                    unsafe {
1037                        mapper
1038                            .map_to(page, frame, page_flags, &mut frame_allocator)
1039                            .map(|flush| flush.flush())
1040                            .is_ok()
1041                    }
1042                }
1043                VmaPageSize::Huge => {
1044                    let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
1045                        .map_err(|_| "Map shared: invalid 2M page address")?;
1046                    let frame = X86PhysFrame::<Size2MiB>::containing_address(handle.base);
1047                    unsafe {
1048                        mapper
1049                            .map_to(page, frame, page_flags, &mut frame_allocator)
1050                            .map(|flush| flush.flush())
1051                            .is_ok()
1052                    }
1053                }
1054            };
1055
1056            if !map_ok {
1057                for rollback in (0..mapped_pages).rev() {
1058                    let rb_addr = start + (rollback as u64) * page_bytes;
1059                    match page_size {
1060                        VmaPageSize::Small => {
1061                            if let Ok(rb_page) =
1062                                Page::<Size4KiB>::from_start_address(VirtAddr::new(rb_addr))
1063                            {
1064                                if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
1065                                    rb_flush.flush();
1066                                    let _ = self.unregister_effective_mapping(rb_addr);
1067                                }
1068                            }
1069                        }
1070                        VmaPageSize::Huge => {
1071                            if let Ok(rb_page) =
1072                                Page::<Size2MiB>::from_start_address(VirtAddr::new(rb_addr))
1073                            {
1074                                if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
1075                                    rb_flush.flush();
1076                                    let _ = self.unregister_effective_mapping(rb_addr);
1077                                }
1078                            }
1079                        }
1080                    }
1081                }
1082                return Err("Failed to map shared page");
1083            }
1084
1085            if self
1086                .register_effective_mapping(EffectiveMapping {
1087                    start: page_addr,
1088                    cap_id: mapping_cap_ids
1089                        .and_then(|cap_ids| cap_ids.get(index).copied())
1090                        .unwrap_or_else(allocate_mapping_cap_id),
1091                    handle,
1092                    flags: page_flags,
1093                    page_size,
1094                })
1095                .is_err()
1096            {
1097                match page_size {
1098                    VmaPageSize::Small => {
1099                        if let Ok(page) =
1100                            Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
1101                        {
1102                            if let Ok((_, flush)) = mapper.unmap(page) {
1103                                flush.flush();
1104                            }
1105                        }
1106                    }
1107                    VmaPageSize::Huge => {
1108                        if let Ok(page) =
1109                            Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
1110                        {
1111                            if let Ok((_, flush)) = mapper.unmap(page) {
1112                                flush.flush();
1113                            }
1114                        }
1115                    }
1116                }
1117                for rollback in (0..mapped_pages).rev() {
1118                    let rb_addr = start + (rollback as u64) * page_bytes;
1119                    match page_size {
1120                        VmaPageSize::Small => {
1121                            if let Ok(rb_page) =
1122                                Page::<Size4KiB>::from_start_address(VirtAddr::new(rb_addr))
1123                            {
1124                                if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
1125                                    rb_flush.flush();
1126                                    let _ = self.unregister_effective_mapping(rb_addr);
1127                                }
1128                            }
1129                        }
1130                        VmaPageSize::Huge => {
1131                            if let Ok(rb_page) =
1132                                Page::<Size2MiB>::from_start_address(VirtAddr::new(rb_addr))
1133                            {
1134                                if let Ok((_, rb_flush)) = mapper.unmap(rb_page) {
1135                                    rb_flush.flush();
1136                                    let _ = self.unregister_effective_mapping(rb_addr);
1137                                }
1138                            }
1139                        }
1140                    }
1141                }
1142                return Err("Failed to track shared mapping");
1143            }
1144            mapped_pages += 1;
1145        }
1146
1147        self.regions.lock().insert(
1148            start,
1149            VirtualMemoryRegion {
1150                start,
1151                page_count,
1152                flags,
1153                vma_type,
1154                page_size,
1155            },
1156        );
1157        Ok(())
1158    }
1159
1160    /// Maps shared frames with optional stable mapping identities.
1161    pub fn map_shared_frames_with_cap_ids(
1162        &self,
1163        start: u64,
1164        frame_phys_addrs: &[u64],
1165        mapping_cap_ids: Option<&[CapId]>,
1166        flags: VmaFlags,
1167        vma_type: VmaType,
1168    ) -> Result<(), &'static str> {
1169        let handles = frame_phys_addrs
1170            .iter()
1171            .copied()
1172            .map(|phys_addr| resolve_handle(PhysAddr::new(phys_addr)))
1173            .collect::<Vec<_>>();
1174        self.map_shared_handles_with_cap_ids(
1175            start,
1176            &handles,
1177            mapping_cap_ids,
1178            flags,
1179            vma_type,
1180            VmaPageSize::Small,
1181        )
1182    }
1183
1184    /// Unmap a previously mapped region and free the backing frames.
1185    pub fn unmap_region(
1186        &self,
1187        start: u64,
1188        page_count: usize,
1189        page_size: VmaPageSize,
1190    ) -> Result<(), &'static str> {
1191        let page_bytes = page_size.bytes();
1192        // SAFETY: We have logical ownership of this address space.
1193        let mut mapper = unsafe { self.mapper() };
1194
1195        for i in 0..page_count {
1196            let page_addr = start + (i as u64) * page_bytes;
1197
1198            let _frame_addr = match page_size {
1199                VmaPageSize::Small => {
1200                    use x86_64::structures::paging::Size4KiB;
1201                    let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
1202                        .map_err(|_| "Failed to unmap: invalid 4K page address")?;
1203                    let (frame, flush) =
1204                        mapper.unmap(page).map_err(|_| "Failed to unmap 4K page")?;
1205                    flush.flush();
1206                    frame.start_address()
1207                }
1208                VmaPageSize::Huge => {
1209                    use x86_64::structures::paging::Size2MiB;
1210                    let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
1211                        .map_err(|_| "Failed to unmap: invalid 2M page address")?;
1212                    let (frame, flush) =
1213                        mapper.unmap(page).map_err(|_| "Failed to unmap 2M page")?;
1214                    flush.flush();
1215                    frame.start_address()
1216                }
1217            };
1218
1219            // COW-aware refcount decrement: free only when last mapping disappears.
1220            let _ = self.unregister_effective_mapping(page_addr);
1221        }
1222
1223        // Remove from VMA tracking.
1224        self.regions.lock().remove(&start);
1225
1226        log::trace!(
1227            "Unmapped region: {:#x}..{:#x} ({} pages, size={:?})",
1228            start,
1229            start + (page_count as u64) * page_bytes,
1230            page_count,
1231            page_size
1232        );
1233
1234        let end = start + (page_count as u64) * page_bytes;
1235        crate::trace_mem!(
1236            crate::trace::category::MEM_UNMAP,
1237            crate::trace::TraceKind::MemUnmap,
1238            page_size.bytes(),
1239            crate::trace::TraceTaskCtx {
1240                task_id: 0,
1241                pid: 0,
1242                tid: 0,
1243                cr3: self.cr3_phys.as_u64(),
1244            },
1245            0,
1246            start,
1247            end,
1248            page_count as u64
1249        );
1250
1251        let released = (page_count as u64).saturating_mul(page_bytes);
1252        crate::silo::release_current_task_memory(released);
1253
1254        Ok(())
1255    }
1256
1257    /// Find a free virtual address range of `n_pages` pages of `page_size` starting at or after `hint`.
1258    pub fn find_free_vma_range(
1259        &self,
1260        hint: u64,
1261        n_pages: usize,
1262        page_size: VmaPageSize,
1263    ) -> Option<u64> {
1264        if n_pages == 0 {
1265            return None;
1266        }
1267        let page_bytes = page_size.bytes();
1268        let length = (n_pages as u64).checked_mul(page_bytes)?;
1269        let upper_limit: u64 = 0x0000_8000_0000_0000; // USER_SPACE_END
1270
1271        // Round hint up to a page boundary
1272        let mut candidate = (hint.saturating_add(page_bytes - 1)) & !(page_bytes - 1);
1273        if candidate == 0 {
1274            candidate = page_bytes;
1275        }
1276
1277        let regions = self.regions.lock();
1278        for (&vma_start, vma) in regions.iter() {
1279            let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1280
1281            // A gap exists before this VMA : candidate fits.
1282            if candidate.saturating_add(length) <= vma_start {
1283                break;
1284            }
1285
1286            // Candidate overlaps this VMA; skip past it.
1287            if vma_end > candidate {
1288                candidate = (vma_end.saturating_add(page_bytes - 1)) & !(page_bytes - 1);
1289            }
1290        }
1291
1292        // Final bounds check.
1293        if candidate.checked_add(length)? <= upper_limit {
1294            Some(candidate)
1295        } else {
1296            None
1297        }
1298    }
1299
1300    /// Return true if any tracked VMA overlaps `[addr, addr + len)`.
1301    pub fn has_mapping_in_range(&self, addr: u64, len: u64) -> bool {
1302        let end = match addr.checked_add(len) {
1303            Some(v) => v,
1304            None => return true,
1305        };
1306        let regions = self.regions.lock();
1307        regions.iter().any(|(&vma_start, vma)| {
1308            let vma_end = vma_start
1309                .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
1310            vma_start < end && vma_end > addr
1311        })
1312    }
1313
1314    /// Return the tracked VMA that starts exactly at `start`.
1315    pub fn region_by_start(&self, start: u64) -> Option<VirtualMemoryRegion> {
1316        let regions = self.regions.lock();
1317        regions.get(&start).cloned()
1318    }
1319
1320    /// Returns true if any page in `[addr, addr + len)` is currently mapped.
1321    pub fn any_mapped_in_range(
1322        &self,
1323        addr: u64,
1324        len: u64,
1325        page_size: VmaPageSize,
1326    ) -> Result<bool, &'static str> {
1327        if len == 0 {
1328            return Ok(false);
1329        }
1330        let end = addr
1331            .checked_add(len)
1332            .ok_or("any_mapped_in_range: address overflow")?;
1333        let step = page_size.bytes();
1334        let mut cur = addr;
1335        while cur < end {
1336            if self.translate(VirtAddr::new(cur)).is_some() {
1337                return Ok(true);
1338            }
1339            cur = cur
1340                .checked_add(step)
1341                .ok_or("any_mapped_in_range: loop overflow")?;
1342        }
1343        Ok(false)
1344    }
1345
1346    /// Performs the protect range operation.
1347    pub fn protect_range(&self, addr: u64, len: u64, flags: VmaFlags) -> Result<(), &'static str> {
1348        if len == 0 {
1349            return Ok(());
1350        }
1351        let end = addr
1352            .checked_add(len)
1353            .ok_or("protect_range: address overflow")?;
1354        let mut cursor = addr;
1355
1356        {
1357            let regions = self.regions.lock();
1358            for (&vma_start, vma) in regions.iter() {
1359                let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1360                if vma_start >= end || vma_end <= addr {
1361                    continue;
1362                }
1363                if vma.page_size == VmaPageSize::Huge {
1364                    let range_start = core::cmp::max(vma_start, addr);
1365                    let range_end = core::cmp::min(vma_end, end);
1366                    if range_start % vma.page_size.bytes() != 0
1367                        || range_end % vma.page_size.bytes() != 0
1368                    {
1369                        return Err(
1370                            "protect_range: partial mprotect of 2MiB pages is not supported",
1371                        );
1372                    }
1373                }
1374            }
1375        }
1376
1377        let mut touched = false;
1378        while cursor < end {
1379            let region_info = {
1380                let regions = self.regions.lock();
1381                regions
1382                    .iter()
1383                    .find(|(&vma_start, vma)| {
1384                        let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1385                        vma_start < end && vma_end > cursor
1386                    })
1387                    .map(|(&k, v)| (k, v.clone()))
1388            };
1389
1390            let Some((vma_start, vma)) = region_info else {
1391                break;
1392            };
1393            touched = true;
1394
1395            let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1396            let range_start = core::cmp::max(vma_start, cursor);
1397            let range_end = core::cmp::min(vma_end, end);
1398            let page_bytes = vma.page_size.bytes();
1399            let new_pt_flags = flags.to_page_flags();
1400
1401            let mut mapper = unsafe { self.mapper() };
1402            let mut page_addr = range_start;
1403            while page_addr < range_end {
1404                if mapper.translate_addr(VirtAddr::new(page_addr)).is_none() {
1405                    page_addr += page_bytes;
1406                    continue;
1407                }
1408                unsafe {
1409                    match vma.page_size {
1410                        VmaPageSize::Small => {
1411                            let page =
1412                                Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
1413                                    .map_err(|_| "protect_range: invalid 4K page address")?;
1414                            mapper
1415                                .update_flags(page, new_pt_flags)
1416                                .map(|f| f.ignore())
1417                                .map_err(|_| "protect_range: update 4K flags failed")?;
1418                            let _ = self.update_effective_mapping_flags(page_addr, new_pt_flags);
1419                        }
1420                        VmaPageSize::Huge => {
1421                            let mut huge_flags = new_pt_flags;
1422                            huge_flags |= PageTableFlags::HUGE_PAGE;
1423                            let page =
1424                                Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
1425                                    .map_err(|_| "protect_range: invalid 2M page address")?;
1426                            mapper
1427                                .update_flags(page, huge_flags)
1428                                .map(|f| f.ignore())
1429                                .map_err(|_| "protect_range: update 2M flags failed")?;
1430                            let _ = self.update_effective_mapping_flags(page_addr, huge_flags);
1431                        }
1432                    }
1433                }
1434                page_addr += page_bytes;
1435            }
1436
1437            {
1438                let mut regions = self.regions.lock();
1439                regions.remove(&vma_start);
1440
1441                if range_start > vma_start {
1442                    let leading_pages = ((range_start - vma_start) / page_bytes) as usize;
1443                    regions.insert(
1444                        vma_start,
1445                        VirtualMemoryRegion {
1446                            start: vma_start,
1447                            page_count: leading_pages,
1448                            flags: vma.flags,
1449                            vma_type: vma.vma_type,
1450                            page_size: vma.page_size,
1451                        },
1452                    );
1453                }
1454
1455                let middle_pages = ((range_end - range_start) / page_bytes) as usize;
1456                if middle_pages > 0 {
1457                    regions.insert(
1458                        range_start,
1459                        VirtualMemoryRegion {
1460                            start: range_start,
1461                            page_count: middle_pages,
1462                            flags,
1463                            vma_type: vma.vma_type,
1464                            page_size: vma.page_size,
1465                        },
1466                    );
1467                }
1468
1469                if range_end < vma_end {
1470                    let trailing_pages = ((vma_end - range_end) / page_bytes) as usize;
1471                    regions.insert(
1472                        range_end,
1473                        VirtualMemoryRegion {
1474                            start: range_end,
1475                            page_count: trailing_pages,
1476                            flags: vma.flags,
1477                            vma_type: vma.vma_type,
1478                            page_size: vma.page_size,
1479                        },
1480                    );
1481                }
1482            }
1483
1484            cursor = range_end;
1485        }
1486
1487        if !touched {
1488            return Err("protect_range: no mapped region in range");
1489        }
1490        Ok(())
1491    }
1492
1493    /// Unmaps range.
1494    pub fn unmap_range(&self, addr: u64, len: u64) -> Result<(), &'static str> {
1495        if len == 0 {
1496            return Ok(());
1497        }
1498        let end = addr
1499            .checked_add(len)
1500            .ok_or("unmap_range: address overflow")?;
1501
1502        // Pre-validate huge-page overlaps: partial unmap of 2MiB mappings is
1503        // not supported yet. Callers must unmap on huge-page boundaries.
1504        {
1505            let regions = self.regions.lock();
1506            for (&vma_start, vma) in regions.iter() {
1507                let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1508                if vma_start >= end || vma_end <= addr {
1509                    continue;
1510                }
1511                if vma.page_size == VmaPageSize::Huge {
1512                    let range_start = core::cmp::max(vma_start, addr);
1513                    let range_end = core::cmp::min(vma_end, end);
1514                    if range_start % vma.page_size.bytes() != 0
1515                        || range_end % vma.page_size.bytes() != 0
1516                    {
1517                        return Err("unmap_range: partial unmap of 2MiB pages is not supported");
1518                    }
1519                }
1520            }
1521        }
1522
1523        // Process regions one by one to avoid heap allocation (Vec)
1524        let mut released_bytes = 0u64;
1525        loop {
1526            // Find the first overlapping region
1527            let region_info = {
1528                let regions = self.regions.lock();
1529                regions
1530                    .iter()
1531                    .find(|(&vma_start, vma)| {
1532                        let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1533                        vma_start < end && vma_end > addr
1534                    })
1535                    .map(|(&k, v)| (k, v.clone()))
1536            };
1537
1538            let Some((vma_start, vma)) = region_info else {
1539                break; // No more overlapping regions
1540            };
1541
1542            let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1543            let range_start = core::cmp::max(vma_start, addr);
1544            let range_end = core::cmp::min(vma_end, end);
1545            released_bytes = released_bytes.saturating_add(range_end.saturating_sub(range_start));
1546
1547            // 1. Hardware unmap
1548            // SAFETY: Logical ownership of address space.
1549            let mut mapper = unsafe { self.mapper() };
1550            let mut page_addr = range_start;
1551            let page_bytes = vma.page_size.bytes();
1552            while page_addr < range_end {
1553                // Lazy VMAs can contain unfaulted pages (no PTE). In that case
1554                // there is nothing to unmap in hardware; just update VMA metadata.
1555                if mapper.translate_addr(VirtAddr::new(page_addr)).is_none() {
1556                    page_addr += page_bytes;
1557                    continue;
1558                }
1559
1560                let _frame_addr = match vma.page_size {
1561                    VmaPageSize::Small => {
1562                        use x86_64::structures::paging::Size4KiB;
1563                        let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
1564                            .map_err(|_| "unmap_range: invalid 4K page address")?;
1565                        let (frame, flush) = mapper
1566                            .unmap(page)
1567                            .map_err(|_| "unmap_range: unmap 4K failed")?;
1568                        flush.flush();
1569                        frame.start_address()
1570                    }
1571                    VmaPageSize::Huge => {
1572                        use x86_64::structures::paging::Size2MiB;
1573                        let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
1574                            .map_err(|_| "unmap_range: invalid 2M page address")?;
1575                        let (frame, flush) = mapper
1576                            .unmap(page)
1577                            .map_err(|_| "unmap_range: unmap 2M failed")?;
1578                        flush.flush();
1579                        frame.start_address()
1580                    }
1581                };
1582
1583                let _ = self.unregister_effective_mapping(page_addr);
1584                page_addr += page_bytes;
1585            }
1586
1587            // 2. Update tracking: remove and re-insert fragments
1588            {
1589                let mut regions = self.regions.lock();
1590                regions.remove(&vma_start);
1591
1592                if range_start > vma_start {
1593                    let leading_pages =
1594                        ((range_start - vma_start) / vma.page_size.bytes()) as usize;
1595                    regions.insert(
1596                        vma_start,
1597                        VirtualMemoryRegion {
1598                            start: vma_start,
1599                            page_count: leading_pages,
1600                            flags: vma.flags,
1601                            vma_type: vma.vma_type,
1602                            page_size: vma.page_size,
1603                        },
1604                    );
1605                }
1606
1607                if range_end < vma_end {
1608                    let trailing_pages = ((vma_end - range_end) / vma.page_size.bytes()) as usize;
1609                    regions.insert(
1610                        range_end,
1611                        VirtualMemoryRegion {
1612                            start: range_end,
1613                            page_count: trailing_pages,
1614                            flags: vma.flags,
1615                            vma_type: vma.vma_type,
1616                            page_size: vma.page_size,
1617                        },
1618                    );
1619                }
1620            }
1621        }
1622
1623        crate::silo::release_current_task_memory(released_bytes);
1624        Ok(())
1625    }
1626
1627    /// Translate a virtual address to its mapped physical address.
1628    pub fn translate(&self, vaddr: VirtAddr) -> Option<PhysAddr> {
1629        // SAFETY: Read-only access to the page tables.
1630        let mapper = unsafe { self.mapper() };
1631        mapper.translate_addr(vaddr)
1632    }
1633
1634    /// Translate a virtual address to the current block handle and page-table flags.
1635    pub fn translate_to_handle(&self, vaddr: VirtAddr) -> Option<(BlockHandle, PageTableFlags)> {
1636        // SAFETY: Read-only access to the page tables.
1637        let mapper = unsafe { self.mapper() };
1638        let translated = mapper.translate(vaddr);
1639        match translated {
1640            TranslateResult::Mapped { frame, flags, .. } => {
1641                Some((resolve_handle(frame.start_address()), flags))
1642            }
1643            TranslateResult::NotMapped | TranslateResult::InvalidFrameAddress(_) => None,
1644        }
1645    }
1646
1647    /// Get the physical address of this address space's PML4 table.
1648    pub fn cr3(&self) -> PhysAddr {
1649        self.cr3_phys
1650    }
1651
1652    /// Switch the CPU to this address space by writing CR3.
1653    ///
1654    /// Skips the write if CR3 already points to this address space (avoids
1655    /// unnecessary TLB flush).
1656    ///
1657    /// # Safety
1658    /// The caller must ensure this address space's page tables are valid and
1659    /// that the kernel half is correctly mapped.
1660    pub unsafe fn switch_to(&self) {
1661        let (current_frame, _) = Cr3::read();
1662        if current_frame.start_address() == self.cr3_phys {
1663            return; // Already active : skip to avoid TLB flush.
1664        }
1665
1666        // SAFETY: cr3_phys points to a valid, 4KiB-aligned PML4 table with
1667        // the kernel half correctly populated.
1668        unsafe {
1669            let frame =
1670                X86PhysFrame::from_start_address(self.cr3_phys).expect("CR3 address not aligned");
1671            crate::e9_println!("C");
1672            Cr3::write(frame, Cr3Flags::empty());
1673            crate::e9_println!("c");
1674        }
1675    }
1676
1677    /// Whether this is the kernel address space.
1678    pub fn is_kernel(&self) -> bool {
1679        self.is_kernel
1680    }
1681
1682    /// Check if this address space has any user-space memory mappings.
1683    pub fn has_user_mappings(&self) -> bool {
1684        if self.is_kernel {
1685            return false;
1686        }
1687        let regions = self.regions.lock();
1688        // Check for any non-kernel mappings.
1689        regions.values().any(|vma| vma.vma_type != VmaType::Kernel)
1690    }
1691
1692    fn teardown_effective_mapping_for_drop(&self, mapping: EffectiveMapping) {
1693        // SAFETY: the address space is being torn down; any remaining user mapping
1694        // must be detached from ownership tracking before page-table reclamation.
1695        unsafe {
1696            let mut mapper = self.mapper();
1697            if mapper
1698                .translate_addr(VirtAddr::new(mapping.start))
1699                .is_some()
1700            {
1701                match mapping.page_size {
1702                    VmaPageSize::Small => {
1703                        let page =
1704                            Page::<Size4KiB>::from_start_address(VirtAddr::new(mapping.start))
1705                                .unwrap();
1706                        if let Err(error) = mapper.unmap(page) {
1707                            log::warn!(
1708                                "memory: drop cleanup failed to unmap 4K page at {:#x}: {:?}",
1709                                mapping.start,
1710                                error
1711                            );
1712                        }
1713                    }
1714                    VmaPageSize::Huge => {
1715                        let page =
1716                            Page::<Size2MiB>::from_start_address(VirtAddr::new(mapping.start))
1717                                .unwrap();
1718                        if let Err(error) = mapper.unmap(page) {
1719                            log::warn!(
1720                                "memory: drop cleanup failed to unmap 2M page at {:#x}: {:?}",
1721                                mapping.start,
1722                                error
1723                            );
1724                        }
1725                    }
1726                }
1727            }
1728        }
1729
1730        let _ = self.unregister_effective_mapping(mapping.start);
1731    }
1732
1733    fn teardown_region_for_drop(&self, start: u64, region: &VirtualMemoryRegion) {
1734        let len = (region.page_count as u64).saturating_mul(region.page_size.bytes());
1735        let mut page_addr = start;
1736        let page_bytes = region.page_size.bytes();
1737
1738        while page_addr < start.saturating_add(len) {
1739            if let Some(mapping) = self.effective_mapping_by_start(page_addr) {
1740                self.teardown_effective_mapping_for_drop(mapping);
1741            }
1742            page_addr += page_bytes;
1743        }
1744
1745        let _ = self.regions.lock().remove(&start);
1746        crate::silo::release_current_task_memory(len);
1747    }
1748
1749    /// Unmap all tracked user regions (best-effort).
1750    ///
1751    /// This frees user frames and clears the VMA list. Kernel mappings are untouched.
1752    /// Does not allocate memory.
1753    pub fn unmap_all_user_regions(&self) {
1754        if self.is_kernel {
1755            return;
1756        }
1757
1758        loop {
1759            let first = {
1760                let guard = self.regions.lock();
1761                guard
1762                    .iter()
1763                    .next()
1764                    .map(|(&start, region)| (start, region.clone()))
1765            };
1766
1767            let Some((start, region)) = first else {
1768                break;
1769            };
1770
1771            let len = (region.page_count as u64).saturating_mul(region.page_size.bytes());
1772            if self.unmap_range(region.start, len).is_err() {
1773                log::warn!(
1774                    "memory: unmap_all_user_regions fallback cleanup for {:#x}..{:#x}",
1775                    start,
1776                    start.saturating_add(len)
1777                );
1778                self.teardown_region_for_drop(start, &region);
1779            }
1780        }
1781
1782        let residual_mappings: Vec<EffectiveMapping> = {
1783            let guard = self.effective_mappings.lock();
1784            guard.values().copied().collect()
1785        };
1786        for mapping in residual_mappings {
1787            log::warn!(
1788                "memory: drop cleanup removing orphan effective mapping at {:#x} cap={}",
1789                mapping.start,
1790                mapping.cap_id.as_u64()
1791            );
1792            self.teardown_effective_mapping_for_drop(mapping);
1793            crate::silo::release_current_task_memory(mapping.page_size.bytes());
1794        }
1795    }
1796
1797    /// Performs the clone cow operation.
1798    pub fn clone_cow(&self) -> Result<Arc<AddressSpace>, &'static str> {
1799        if self.is_kernel {
1800            return Err("Cannot fork kernel address space");
1801        }
1802
1803        let child = Arc::new(AddressSpace::new_user()?);
1804
1805        let regions: Vec<VirtualMemoryRegion> = {
1806            let guard = self.regions.lock();
1807            guard.values().cloned().collect()
1808        };
1809        let effective_mappings: Vec<EffectiveMapping> = {
1810            let guard = self.effective_mappings.lock();
1811            guard.values().copied().collect()
1812        };
1813
1814        let mut tlb_flush_needed = false;
1815        let mut processed_pages = Vec::new();
1816
1817        let res: Result<(), &'static str> = (|| {
1818            let mut parent_mapper = unsafe { self.mapper() };
1819            let mut child_mapper = unsafe { child.mapper() };
1820            let mut frame_allocator = BuddyFrameAllocator;
1821
1822            for region in regions.iter() {
1823                // Register VMA in child.
1824                {
1825                    let mut child_regions = child.regions.lock();
1826                    child_regions.insert(region.start, region.clone());
1827                }
1828            }
1829
1830            for mapping in effective_mappings.iter().copied() {
1831                let vaddr = VirtAddr::new(mapping.start);
1832                let phys_frame_addr = mapping.handle.base;
1833                let mut new_flags = mapping.flags;
1834                let is_writable = mapping.flags.contains(PageTableFlags::WRITABLE);
1835                const COW_BIT: PageTableFlags = PageTableFlags::BIT_9;
1836
1837                if is_writable {
1838                    new_flags.remove(PageTableFlags::WRITABLE);
1839                    new_flags.insert(COW_BIT);
1840
1841                    unsafe {
1842                        let res: Result<(), &'static str> = match mapping.page_size {
1843                            VmaPageSize::Small => parent_mapper
1844                                .update_flags(
1845                                    Page::<Size4KiB>::from_start_address(vaddr).unwrap(),
1846                                    new_flags,
1847                                )
1848                                .map(|f| f.ignore())
1849                                .map_err(|_| "Failed to update parent 4K flags"),
1850                            VmaPageSize::Huge => parent_mapper
1851                                .update_flags(
1852                                    Page::<Size2MiB>::from_start_address(vaddr).unwrap(),
1853                                    new_flags,
1854                                )
1855                                .map(|f| f.ignore())
1856                                .map_err(|_| "Failed to update parent 2M flags"),
1857                        };
1858                        if let Err(e) = res {
1859                            return Err(e);
1860                        }
1861                    }
1862                    let _ = self.update_effective_mapping_flags(vaddr.as_u64(), new_flags);
1863                    tlb_flush_needed = true;
1864                    processed_pages.push((vaddr.as_u64(), mapping.flags, mapping.page_size));
1865                }
1866
1867                let handle = mapping.handle;
1868                crate::memory::cow::handle_inc_ref(handle).map_err(|error| {
1869                    log::warn!(
1870                        "clone_cow: failed to pin source handle {:#x}/{} for vaddr={:#x}: {:?}",
1871                        handle.base.as_u64(),
1872                        handle.order,
1873                        vaddr.as_u64(),
1874                        error
1875                    );
1876                    "Failed to pin source COW frame"
1877                })?;
1878
1879                // Map in child. We map it as WRITABLE first to ensure intermediate
1880                // page tables (PDPT, PD) are created with WRITABLE bit set.
1881                // If we mapped directly as COW (Read-only), some Mapper implementations
1882                // might create Read-Only intermediate tables, blocking future COW resolution.
1883                let map_flags = new_flags | PageTableFlags::WRITABLE;
1884
1885                unsafe {
1886                    let map_res: Result<(), &'static str> = match mapping.page_size {
1887                        VmaPageSize::Small => {
1888                            let page = Page::<Size4KiB>::from_start_address(vaddr).unwrap();
1889                            let frame = x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(phys_frame_addr);
1890                            child_mapper
1891                                .map_to(page, frame, map_flags, &mut frame_allocator)
1892                                .map(|f| f.ignore())
1893                                .map_err(|_| "Failed to map 4K in child")
1894                        }
1895                        VmaPageSize::Huge => {
1896                            let page = Page::<Size2MiB>::from_start_address(vaddr).unwrap();
1897                            let frame = x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(phys_frame_addr);
1898                            child_mapper
1899                                .map_to(page, frame, map_flags, &mut frame_allocator)
1900                                .map(|f| f.ignore())
1901                                .map_err(|_| "Failed to map 2M in child")
1902                        }
1903                    };
1904
1905                    if let Err(e) = map_res {
1906                        crate::memory::cow::handle_dec_ref(handle);
1907                        return Err(e);
1908                    }
1909
1910                    // Now downgrade to the actual COW flags (which may be Read-Only).
1911                    if !new_flags.contains(PageTableFlags::WRITABLE) {
1912                        let downgrade_res: Result<(), &'static str> = match mapping.page_size {
1913                            VmaPageSize::Small => {
1914                                let page = Page::<Size4KiB>::from_start_address(vaddr).unwrap();
1915                                child_mapper
1916                                    .update_flags(page, new_flags)
1917                                    .map(|f| f.ignore())
1918                                    .map_err(|_| "Failed to update child 4K flags")
1919                            }
1920                            VmaPageSize::Huge => {
1921                                let page = Page::<Size2MiB>::from_start_address(vaddr).unwrap();
1922                                child_mapper
1923                                    .update_flags(page, new_flags)
1924                                    .map(|f| f.ignore())
1925                                    .map_err(|_| "Failed to update child 2M flags")
1926                            }
1927                        };
1928                        if let Err(e) = downgrade_res {
1929                            let unmapped = match mapping.page_size {
1930                                VmaPageSize::Small => {
1931                                    let page = Page::<Size4KiB>::from_start_address(vaddr).unwrap();
1932                                    child_mapper.unmap(page).map(|(_, f)| f.ignore()).is_ok()
1933                                }
1934                                VmaPageSize::Huge => {
1935                                    let page = Page::<Size2MiB>::from_start_address(vaddr).unwrap();
1936                                    child_mapper.unmap(page).map(|(_, f)| f.ignore()).is_ok()
1937                                }
1938                            };
1939                            if unmapped {
1940                                crate::memory::cow::handle_dec_ref(handle);
1941                            }
1942                            return Err(e);
1943                        }
1944                    }
1945                }
1946
1947                if child
1948                    .register_effective_mapping(EffectiveMapping {
1949                        start: vaddr.as_u64(),
1950                        cap_id: allocate_mapping_cap_id(),
1951                        handle,
1952                        flags: new_flags,
1953                        page_size: mapping.page_size,
1954                    })
1955                    .is_err()
1956                {
1957                    match mapping.page_size {
1958                        VmaPageSize::Small => {
1959                            let page = Page::<Size4KiB>::from_start_address(vaddr).unwrap();
1960                            if let Ok((_, flush)) = child_mapper.unmap(page) {
1961                                flush.ignore();
1962                            }
1963                        }
1964                        VmaPageSize::Huge => {
1965                            let page = Page::<Size2MiB>::from_start_address(vaddr).unwrap();
1966                            if let Ok((_, flush)) = child_mapper.unmap(page) {
1967                                flush.ignore();
1968                            }
1969                        }
1970                    }
1971                    crate::memory::cow::handle_dec_ref(handle);
1972                    return Err("Failed to track child COW mapping");
1973                }
1974
1975                crate::memory::cow::handle_dec_ref(handle);
1976            }
1977            Ok(())
1978        })();
1979
1980        let tlb_flush_range = if tlb_flush_needed && !processed_pages.is_empty() {
1981            let mut range_start = u64::MAX;
1982            let mut range_end = 0u64;
1983            for (vaddr, _, page_size) in &processed_pages {
1984                range_start = range_start.min(*vaddr);
1985                range_end = range_end.max(vaddr.saturating_add(page_size.bytes()));
1986            }
1987            if range_start < range_end {
1988                Some((range_start, range_end))
1989            } else {
1990                None
1991            }
1992        } else {
1993            None
1994        };
1995
1996        if let Err(e) = res {
1997            log::error!("clone_cow error: {}. Rolling back...", e);
1998            let mut parent_mapper = unsafe { self.mapper() };
1999            for &(vaddr, original_flags, page_size) in processed_pages.iter().rev() {
2000                if original_flags.contains(PageTableFlags::WRITABLE) {
2001                    unsafe {
2002                        match page_size {
2003                            VmaPageSize::Small => {
2004                                let _ = parent_mapper.update_flags(
2005                                    Page::<Size4KiB>::from_start_address(VirtAddr::new(vaddr))
2006                                        .unwrap(),
2007                                    original_flags,
2008                                );
2009                            }
2010                            VmaPageSize::Huge => {
2011                                let _ = parent_mapper.update_flags(
2012                                    Page::<Size2MiB>::from_start_address(VirtAddr::new(vaddr))
2013                                        .unwrap(),
2014                                    original_flags,
2015                                );
2016                            }
2017                        };
2018                    }
2019                    let _ = self.update_effective_mapping_flags(vaddr, original_flags);
2020                }
2021            }
2022            if let Some((range_start, range_end)) = tlb_flush_range {
2023                crate::arch::x86_64::tlb::shootdown_range(
2024                    VirtAddr::new(range_start),
2025                    VirtAddr::new(range_end),
2026                );
2027            }
2028            return Err(e);
2029        }
2030
2031        if let Some((range_start, range_end)) = tlb_flush_range {
2032            crate::arch::x86_64::tlb::shootdown_range(
2033                VirtAddr::new(range_start),
2034                VirtAddr::new(range_end),
2035            );
2036        }
2037        Ok(child)
2038    }
2039
2040    /// Releases user page tables.
2041    fn free_user_page_tables(&self) {
2042        if self.is_kernel {
2043            return;
2044        }
2045
2046        // SAFETY: We have logical ownership of this address space during drop.
2047        let l4 = unsafe { &mut *self.l4_table_virt.as_mut_ptr::<PageTable>() };
2048
2049        for i in 0..256 {
2050            if !l4[i].flags().contains(PageTableFlags::PRESENT) {
2051                continue;
2052            }
2053            let l3_frame = match l4[i].frame() {
2054                Ok(f) => f,
2055                Err(_) => {
2056                    l4[i].set_unused();
2057                    continue;
2058                }
2059            };
2060
2061            free_l3_table(l3_frame);
2062            l4[i].set_unused();
2063        }
2064    }
2065}
2066
2067impl Drop for AddressSpace {
2068    /// Performs the drop operation.
2069    fn drop(&mut self) {
2070        if self.is_kernel {
2071            return; // Never free the kernel address space.
2072        }
2073
2074        log::trace!("AddressSpace::drop begin CR3={:#x}", self.cr3_phys.as_u64());
2075
2076        // Best-effort cleanup of user mappings.
2077        self.unmap_all_user_regions();
2078        #[cfg(not(feature = "selftest"))]
2079        self.free_user_page_tables();
2080        #[cfg(feature = "selftest")]
2081        {
2082            // Runtime selftests create/destroy many temporary address spaces and
2083            // currently expose instability in recursive page-table teardown.
2084            // Keep tests deterministic by skipping deep PT reclaim in this mode.
2085            log::trace!(
2086                "AddressSpace::drop selftest mode: skipping deep page-table free for CR3={:#x}",
2087                self.cr3_phys.as_u64()
2088            );
2089        }
2090
2091        // Free the PML4 frame itself.
2092        // NOTE: Recursive freeing of intermediate page tables (L3/L2/L1) that
2093        // belong exclusively to the user half is deferred to P2.
2094        let phys_frame = crate::memory::PhysFrame {
2095            start_address: self.cr3_phys,
2096        };
2097        crate::sync::with_irqs_disabled(|token| {
2098            crate::memory::free_frame(token, phys_frame);
2099        });
2100
2101        log::trace!("AddressSpace::drop end CR3={:#x}", self.cr3_phys.as_u64());
2102        log::debug!(
2103            "User address space dropped: CR3={:#x}",
2104            self.cr3_phys.as_u64()
2105        );
2106    }
2107}
2108
2109// ---------------------------------------------------------------------------
2110// Page table cleanup helpers (user half only)
2111// ---------------------------------------------------------------------------
2112
2113/// Releases frame.
2114fn free_frame(phys: PhysAddr) {
2115    let phys_frame = crate::memory::PhysFrame {
2116        start_address: phys,
2117    };
2118    crate::sync::with_irqs_disabled(|token| {
2119        crate::memory::free_frame(token, phys_frame);
2120    });
2121}
2122
2123/// Releases l1 table.
2124fn free_l1_table(frame: X86PhysFrame<Size4KiB>) {
2125    let l1_virt = VirtAddr::new(crate::memory::phys_to_virt(frame.start_address().as_u64()));
2126    // SAFETY: l1_virt points to a valid page table frame in HHDM.
2127    let l1 = unsafe { &mut *l1_virt.as_mut_ptr::<PageTable>() };
2128    for entry in l1.iter_mut() {
2129        if entry.flags().contains(PageTableFlags::PRESENT) {
2130            // Mapped frames are already freed via unmap_all_user_regions.
2131            entry.set_unused();
2132        }
2133    }
2134    free_frame(frame.start_address());
2135}
2136
2137/// Releases l2 table.
2138fn free_l2_table(frame: X86PhysFrame<Size4KiB>) {
2139    let l2_virt = VirtAddr::new(crate::memory::phys_to_virt(frame.start_address().as_u64()));
2140    let l2 = unsafe { &mut *l2_virt.as_mut_ptr::<PageTable>() };
2141    for entry in l2.iter_mut() {
2142        if !entry.flags().contains(PageTableFlags::PRESENT) {
2143            continue;
2144        }
2145        if entry.flags().contains(PageTableFlags::HUGE_PAGE) {
2146            // 2 MiB pages are not expected in user space today.
2147            entry.set_unused();
2148            continue;
2149        }
2150        if let Ok(l1_frame) = entry.frame() {
2151            free_l1_table(l1_frame);
2152        }
2153        entry.set_unused();
2154    }
2155    free_frame(frame.start_address());
2156}
2157
2158/// Releases l3 table.
2159fn free_l3_table(frame: X86PhysFrame<Size4KiB>) {
2160    let l3_virt = VirtAddr::new(crate::memory::phys_to_virt(frame.start_address().as_u64()));
2161    let l3 = unsafe { &mut *l3_virt.as_mut_ptr::<PageTable>() };
2162    for entry in l3.iter_mut() {
2163        if !entry.flags().contains(PageTableFlags::PRESENT) {
2164            continue;
2165        }
2166        if entry.flags().contains(PageTableFlags::HUGE_PAGE) {
2167            // 1 GiB pages are not expected in user space today.
2168            entry.set_unused();
2169            continue;
2170        }
2171        if let Ok(l2_frame) = entry.frame() {
2172            free_l2_table(l2_frame);
2173        }
2174        entry.set_unused();
2175    }
2176    free_frame(frame.start_address());
2177}
2178
2179// ---------------------------------------------------------------------------
2180// Kernel address space singleton
2181// ---------------------------------------------------------------------------
2182
2183static KERNEL_ADDRESS_SPACE: Once<Arc<AddressSpace>> = Once::new();
2184
2185/// Initialize the kernel address space singleton.
2186///
2187/// Must be called once during boot, after paging is initialized, before the
2188/// scheduler creates any tasks.
2189///
2190/// # Safety
2191/// Must be called in single-threaded init context.
2192pub unsafe fn init_kernel_address_space() {
2193    KERNEL_ADDRESS_SPACE.call_once(|| {
2194        // SAFETY: Called once, single-threaded, paging initialized.
2195        Arc::new(unsafe { AddressSpace::new_kernel() })
2196    });
2197}
2198
2199/// Get a reference to the kernel address space.
2200///
2201/// Panics if called before `init_kernel_address_space()`.
2202pub fn kernel_address_space() -> &'static Arc<AddressSpace> {
2203    KERNEL_ADDRESS_SPACE
2204        .get()
2205        .expect("Kernel address space not initialized")
2206}