Skip to main content

strat9_kernel/memory/
address_space.rs

1//! Per-process address spaces for Strat9-OS.
2//!
3//! Each task owns an `AddressSpace` backed by a PML4 page table.
4//! Kernel tasks share a single kernel address space. User tasks get a fresh
5//! PML4 with the kernel half (entries 256..512) cloned from the kernel's table.
6//!
7//! x86_64 virtual address space layout:
8//! - PML4[0..256]   → User space (per-process, zeroed for new AS)
9//! - PML4[256..512] → Kernel space (shared, cloned from kernel L4)
10
11use alloc::{collections::BTreeMap, sync::Arc, vec::Vec};
12
13use spin::Once;
14use x86_64::{
15    registers::control::{Cr3, Cr3Flags},
16    structures::paging::{
17        mapper::TranslateResult, Mapper, OffsetPageTable, Page, PageTable, PageTableFlags,
18        PhysFrame as X86PhysFrame, Size2MiB, Size4KiB, Translate,
19    },
20    PhysAddr, VirtAddr,
21};
22
23use crate::{memory::paging::BuddyFrameAllocator, sync::SpinLock};
24
25/// Flags describing permissions for a virtual memory region.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub struct VmaFlags {
28    pub readable: bool,
29    pub writable: bool,
30    pub executable: bool,
31    pub user_accessible: bool,
32}
33
34impl VmaFlags {
35    /// Convert to x86_64 page table flags.
36    pub fn to_page_flags(self) -> PageTableFlags {
37        let mut flags = PageTableFlags::PRESENT;
38        if self.writable {
39            flags |= PageTableFlags::WRITABLE;
40        }
41        if !self.executable {
42            flags |= PageTableFlags::NO_EXECUTE;
43        }
44        if self.user_accessible {
45            flags |= PageTableFlags::USER_ACCESSIBLE;
46        }
47        flags
48    }
49}
50
51/// Type/purpose of a virtual memory region.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum VmaType {
54    /// Zero-filled anonymous memory (heap, mmap).
55    Anonymous,
56    /// Stack region (grows downward).
57    Stack,
58    /// Code/text segment (typically RX).
59    Code,
60    /// Kernel-internal mapping.
61    Kernel,
62}
63
64/// Supported page sizes for VMAs.
65#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66pub enum VmaPageSize {
67    /// Standard 4 KiB page.
68    Small,
69    /// Huge 2 MiB page.
70    Huge,
71}
72
73impl VmaPageSize {
74    /// Performs the bytes operation.
75    pub fn bytes(self) -> u64 {
76        match self {
77            VmaPageSize::Small => 4096,
78            VmaPageSize::Huge => 2 * 1024 * 1024,
79        }
80    }
81}
82
83/// A tracked virtual memory region within an address space.
84#[derive(Debug, Clone)]
85pub struct VirtualMemoryRegion {
86    /// Start virtual address (page-aligned).
87    pub start: u64,
88    /// Number of pages in this region (size depends on `page_size`).
89    pub page_count: usize,
90    /// Access permissions.
91    pub flags: VmaFlags,
92    /// Purpose of this region.
93    pub vma_type: VmaType,
94    /// Size of each page in this region.
95    pub page_size: VmaPageSize,
96}
97
98/// A per-process address space backed by a PML4 page table.
99///
100/// Kernel tasks share a single `AddressSpace` (the kernel AS).
101/// User tasks each get their own, with kernel entries (PML4[256..512]) cloned
102/// so that the kernel is always mapped regardless of which AS is active.
103pub struct AddressSpace {
104    /// Physical address of the PML4 table (loaded into CR3).
105    cr3_phys: PhysAddr,
106    /// Virtual address of the PML4 table (via HHDM, for reading/modifying).
107    l4_table_virt: VirtAddr,
108    /// Whether this is the kernel address space (never freed).
109    is_kernel: bool,
110    /// Tracked virtual memory regions (key = start address).
111    regions: SpinLock<BTreeMap<u64, VirtualMemoryRegion>>,
112}
113
114// SAFETY: AddressSpace is protected by the scheduler lock and per-task ownership.
115// The PML4 table is accessed through HHDM virtual addresses which are valid on all CPUs.
116unsafe impl Send for AddressSpace {}
117unsafe impl Sync for AddressSpace {}
118
119impl AddressSpace {
120    /// Create the kernel address space by wrapping the current (boot) CR3.
121    ///
122    /// # Safety
123    /// Must be called exactly once, during single-threaded init, after paging is initialized.
124    pub unsafe fn new_kernel() -> Self {
125        let (level_4_frame, _flags) = Cr3::read();
126        let cr3_phys = level_4_frame.start_address();
127        let l4_table_virt = VirtAddr::new(crate::memory::phys_to_virt(cr3_phys.as_u64()));
128
129        log::info!(
130            "Kernel address space initialized: CR3={:#x}",
131            cr3_phys.as_u64()
132        );
133
134        AddressSpace {
135            cr3_phys,
136            l4_table_virt,
137            is_kernel: true,
138            regions: SpinLock::new(BTreeMap::new()),
139        }
140    }
141
142    /// Create a new user address space with the kernel half cloned.
143    ///
144    /// Allocates a fresh PML4 frame, zeroes it, then copies entries 256..512
145    /// from the kernel PML4. This shares the kernel's L3/L2/L1 subtrees so
146    /// kernel mapping changes propagate automatically.
147    pub fn new_user() -> Result<Self, &'static str> {
148        // Allocate a frame for the new PML4 table.
149        let new_l4_phys = crate::sync::with_irqs_disabled(|token| {
150            crate::memory::allocate_frame(token)
151        })
152        .map_err(|_| "Failed to allocate PML4 frame")?
153        .start_address;
154
155        let new_l4_virt = VirtAddr::new(crate::memory::phys_to_virt(new_l4_phys.as_u64()));
156
157        // Zero the entire table first (clears user-half entries 0..256).
158        // SAFETY: new_l4_virt points to a freshly allocated, HHDM-mapped frame.
159        unsafe {
160            core::ptr::write_bytes(new_l4_virt.as_mut_ptr::<u8>(), 0, 4096);
161        }
162
163        // Clone kernel entries (PML4[256..512]) from the kernel's L4 table.
164        let kernel_l4_phys = crate::memory::paging::kernel_l4_phys();
165        let kernel_l4_virt = VirtAddr::new(crate::memory::phys_to_virt(kernel_l4_phys.as_u64()));
166
167        // SAFETY: Both pointers are valid HHDM-mapped page tables. We only read
168        // from the kernel table and write to the freshly allocated table.
169        unsafe {
170            let kernel_l4 = &*(kernel_l4_virt.as_ptr::<PageTable>());
171            let new_l4 = &mut *(new_l4_virt.as_mut_ptr::<PageTable>());
172            for i in 256..512 {
173                new_l4[i] = kernel_l4[i].clone();
174            }
175        }
176
177        // ---------- LAPIC low-half mapping (HHDM=0 workaround) ----------
178        //
179        // When Limine provides a non-zero HHDM offset the LAPIC is mapped in
180        // PML4[256..512] (kernel half) and is already shared above.
181        //
182        // When HHDM=0 the LAPIC is identity-mapped at its physical address
183        // (0xFEE00000) in the low half (PML4[0]).  Every Ring-0 interrupt
184        // handler calls apic::eoi() which writes to this address.  If the
185        // handler fires while a user CR3 is active the write faults because
186        // PML4[0] is absent in the user page tables.
187        //
188        // Fix: map just the LAPIC 4KiB MMIO page into every new user AS using
189        // a fresh private L3/L2/L1 hierarchy (no sharing with the kernel's
190        // page table subtrees at the LAPIC virtual address).
191        {
192            let lapic_phys = crate::arch::x86_64::apic::lapic_phys();
193            if lapic_phys != 0 {
194                let lapic_virt = crate::memory::phys_to_virt(lapic_phys);
195                // Only needed when LAPIC is in the low half.
196                if lapic_virt < 0xFFFF_8000_0000_0000 {
197                    let phys_offset = VirtAddr::new(crate::memory::hhdm_offset());
198                    // SAFETY: new_l4_virt is the freshly allocated user PML4.
199                    let l4 = unsafe { &mut *new_l4_virt.as_mut_ptr::<PageTable>() };
200                    let mut mapper = unsafe { OffsetPageTable::new(l4, phys_offset) };
201                    let mut buddy = crate::memory::paging::BuddyFrameAllocator;
202                    let mmio_flags = PageTableFlags::PRESENT
203                        | PageTableFlags::WRITABLE
204                        | PageTableFlags::NO_CACHE;
205                    let lapic_page =
206                        Page::<Size4KiB>::containing_address(VirtAddr::new(lapic_virt));
207                    let lapic_frame =
208                        X86PhysFrame::<Size4KiB>::containing_address(PhysAddr::new(lapic_phys));
209                    // Use map_to_with_table_flags to avoid USER_ACCESSIBLE on
210                    // intermediate tables so user code cannot reach LAPIC MMIO.
211                    match unsafe { mapper.map_to(lapic_page, lapic_frame, mmio_flags, &mut buddy) }
212                    {
213                        Ok(flush) => flush.flush(),
214                        Err(e) => {
215                            crate::serial_println!(
216                                "[as] WARN: failed to map LAPIC ({:#x}) in user AS: {:?}",
217                                lapic_phys,
218                                e
219                            );
220                        }
221                    }
222                }
223            }
224        }
225
226        log::debug!(
227            "User address space created: CR3={:#x} (kernel entries cloned from {:#x})",
228            new_l4_phys.as_u64(),
229            kernel_l4_phys.as_u64()
230        );
231
232        Ok(AddressSpace {
233            cr3_phys: new_l4_phys,
234            l4_table_virt: new_l4_virt,
235            is_kernel: false,
236            regions: SpinLock::new(BTreeMap::new()),
237        })
238    }
239
240    /// Construct a temporary `OffsetPageTable` mapper for this address space.
241    ///
242    /// # Safety
243    /// The caller must ensure exclusive access to the page tables (e.g. via
244    /// the scheduler lock or single-threaded context).
245    pub(crate) unsafe fn mapper(&self) -> OffsetPageTable<'_> {
246        let phys_offset = VirtAddr::new(crate::memory::hhdm_offset());
247        // SAFETY: l4_table_virt is the HHDM-mapped address of our PML4.
248        // The caller guarantees exclusive access.
249        unsafe {
250            OffsetPageTable::new(
251                &mut *self.l4_table_virt.as_mut_ptr::<PageTable>(),
252                phys_offset,
253            )
254        }
255    }
256
257    /// Reserve a contiguous region of virtual pages without allocating physical frames.
258    ///
259    /// The pages will be mapped lazily during page faults (Demand Paging).
260    pub fn reserve_region(
261        &self,
262        start: u64,
263        page_count: usize,
264        flags: VmaFlags,
265        vma_type: VmaType,
266        page_size: VmaPageSize,
267    ) -> Result<(), &'static str> {
268        let page_bytes = page_size.bytes();
269        if page_count == 0 || start % page_bytes != 0 {
270            return Err("Invalid region arguments");
271        }
272        let len = (page_count as u64)
273            .checked_mul(page_bytes)
274            .ok_or("Region length overflow")?;
275        let end = start.checked_add(len).ok_or("Region end overflow")?;
276        const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
277        if end > USER_SPACE_END {
278            return Err("Region out of user-space range");
279        }
280
281        // Reject overlapping VMAs
282        {
283            let regions = self.regions.lock();
284            if regions.iter().any(|(&vma_start, vma)| {
285                let vma_end = vma_start
286                    .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
287                vma_start < end && vma_end > start
288            }) {
289                return Err("Region overlaps existing mapping");
290            }
291        }
292
293        // Enforce per-silo memory quota (best effort; non-silo tasks are ignored).
294        crate::silo::charge_current_task_memory(len).map_err(|_| "Silo memory quota exceeded")?;
295
296        // Track the region, attempting to merge with previous.
297        let mut regions = self.regions.lock();
298        let mut merged = false;
299
300        if let Some((&prev_start, prev_vma)) = regions.range(..start).next_back() {
301            let prev_end = prev_start + (prev_vma.page_count as u64) * prev_vma.page_size.bytes();
302            if prev_end == start
303                && prev_vma.flags == flags
304                && prev_vma.vma_type == vma_type
305                && prev_vma.page_size == page_size
306            {
307                let new_count = prev_vma
308                    .page_count
309                    .checked_add(page_count)
310                    .ok_or("Region page_count overflow")?;
311                let updated_vma = VirtualMemoryRegion {
312                    start: prev_start,
313                    page_count: new_count,
314                    flags,
315                    vma_type,
316                    page_size,
317                };
318                regions.insert(prev_start, updated_vma);
319                merged = true;
320            }
321        }
322
323        if !merged {
324            let region = VirtualMemoryRegion {
325                start,
326                page_count,
327                flags,
328                vma_type,
329                page_size,
330            };
331            regions.insert(start, region);
332        }
333
334        log::trace!(
335            "Reserved lazy region: {:#x} ({} pages, size={:?})",
336            start,
337            page_count,
338            page_size
339        );
340        Ok(())
341    }
342
343    /// Handle a page fault by checking if the address falls within a reserved VMA.
344    ///
345    /// If it does, allocates a physical frame and maps it.
346    pub fn handle_fault(&self, fault_addr: u64) -> Result<(), &'static str> {
347        use x86_64::structures::paging::mapper::MapToError;
348
349        // 1. Find the VMA covering this address
350        let vma = {
351            let regions = self.regions.lock();
352            let mut iter = regions.range(..=fault_addr);
353            let (&start, vma) = iter.next_back().ok_or("No VMA found for address")?;
354            let end = start + (vma.page_count as u64) * vma.page_size.bytes();
355            if fault_addr >= end {
356                return Err("Address outside VMA bounds");
357            }
358            vma.clone()
359        };
360
361        // Align fault address to the page size used by this VMA.
362        let page_bytes = vma.page_size.bytes();
363        let page_addr = fault_addr & !(page_bytes - 1);
364
365        // 2. Only Anonymous/Stack regions support demand paging for now
366        match vma.vma_type {
367            VmaType::Anonymous | VmaType::Stack | VmaType::Code => {}
368            _ => return Err("VMA type does not support demand paging"),
369        }
370
371        // 3. If already mapped (race/re-fault), treat as handled.
372        if self.translate(VirtAddr::new(page_addr)).is_some() {
373            return Ok(());
374        }
375
376        // 4. Allocate and map a single page of the required size
377        let mut frame_allocator = crate::memory::paging::BuddyFrameAllocator;
378        let order = match vma.page_size {
379            VmaPageSize::Small => 0,
380            VmaPageSize::Huge => 9,
381        };
382
383        let frame = crate::sync::with_irqs_disabled(|token| {
384            crate::memory::allocate_frames(token, order)
385        })
386        .map_err(|_| "OOM during demand paging")?;
387
388        let mut page_flags = vma.flags.to_page_flags();
389
390        // SAFETY: We own the address space.
391        unsafe {
392            let mut mapper = self.mapper();
393            match vma.page_size {
394                VmaPageSize::Small => {
395                    let page =
396                        Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr)).unwrap();
397                    let phys_frame =
398                        x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(
399                            frame.start_address,
400                        );
401                    match mapper.map_to(page, phys_frame, page_flags, &mut frame_allocator) {
402                        Ok(flush) => {
403                            flush.flush();
404                            core::ptr::write_bytes(page_addr as *mut u8, 0, page_bytes as usize);
405                        }
406                        Err(MapToError::PageAlreadyMapped(_)) => {
407                            crate::sync::with_irqs_disabled(|token| {
408                                crate::memory::free_frames(token, frame, order);
409                            });
410                            return Ok(());
411                        }
412                        Err(_) => {
413                            crate::sync::with_irqs_disabled(|token| {
414                                crate::memory::free_frames(token, frame, order);
415                            });
416                            return Err("Failed to map demand page (4K)");
417                        }
418                    }
419                }
420                VmaPageSize::Huge => {
421                    let page =
422                        Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr)).unwrap();
423                    let phys_frame =
424                        x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(
425                            frame.start_address,
426                        );
427                    page_flags |= PageTableFlags::HUGE_PAGE;
428                    match mapper.map_to(page, phys_frame, page_flags, &mut frame_allocator) {
429                        Ok(flush) => {
430                            flush.flush();
431                            core::ptr::write_bytes(page_addr as *mut u8, 0, page_bytes as usize);
432                        }
433                        Err(MapToError::PageAlreadyMapped(_)) => {
434                            crate::sync::with_irqs_disabled(|token| {
435                                crate::memory::free_frames(token, frame, order);
436                            });
437                            return Ok(());
438                        }
439                        Err(_) => {
440                            crate::sync::with_irqs_disabled(|token| {
441                                crate::memory::free_frames(token, frame, order);
442                            });
443                            return Err("Failed to map demand page (2M)");
444                        }
445                    }
446                }
447            }
448        }
449
450        // Track refcount for COW
451        crate::memory::cow::frame_inc_ref(crate::memory::PhysFrame {
452            start_address: frame.start_address,
453        });
454
455        Ok(())
456    }
457
458    /// Map a contiguous region of pages backed by newly allocated physical frames.
459    ///
460    /// Frames are allocated from the buddy allocator and zero-filled.
461    /// The region is tracked in the VMA list.
462    pub fn map_region(
463        &self,
464        start: u64,
465        page_count: usize,
466        flags: VmaFlags,
467        vma_type: VmaType,
468        page_size: VmaPageSize,
469    ) -> Result<(), &'static str> {
470        let page_bytes = page_size.bytes();
471        if page_count == 0 || start % page_bytes != 0 {
472            return Err("Invalid region arguments");
473        }
474        let len = (page_count as u64)
475            .checked_mul(page_bytes)
476            .ok_or("Region length overflow")?;
477        let end = start.checked_add(len).ok_or("Region end overflow")?;
478        const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
479        if end > USER_SPACE_END {
480            return Err("Region out of user-space range");
481        }
482
483        // Reject overlapping VMAs early
484        {
485            let regions = self.regions.lock();
486            if regions.iter().any(|(&vma_start, vma)| {
487                let vma_end = vma_start
488                    .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
489                vma_start < end && vma_end > start
490            }) {
491                return Err("Region overlaps existing mapping");
492            }
493        }
494
495        // Enforce per-silo memory quota for eagerly mapped regions.
496        crate::silo::charge_current_task_memory(len).map_err(|_| "Silo memory quota exceeded")?;
497
498        let page_flags = flags.to_page_flags();
499        let mut frame_allocator = BuddyFrameAllocator;
500
501        // SAFETY: we have logical ownership of this address space.
502        let mut mapper = unsafe { self.mapper() };
503        let mut mapped_pages = 0usize;
504
505        for i in 0..page_count {
506            let page_addr = start
507                .checked_add((i as u64).saturating_mul(page_bytes))
508                .ok_or("Page address overflow")?;
509
510            // Allocate a physical frame of appropriate size.
511            let order = match page_size {
512                VmaPageSize::Small => 0,
513                VmaPageSize::Huge => 9,
514            };
515
516            let frame = crate::sync::with_irqs_disabled(|token| {
517                crate::memory::allocate_frames(token, order)
518            })
519            .map_err(|_| "Failed to allocate frame")?;
520
521            // Zero the frame
522            unsafe {
523                let frame_virt = crate::memory::phys_to_virt(frame.start_address.as_u64());
524                core::ptr::write_bytes(frame_virt as *mut u8, 0, page_bytes as usize);
525            }
526
527            // Map the page.
528            let map_ok = match page_size {
529                VmaPageSize::Small => {
530                    use x86_64::structures::paging::Size4KiB;
531                    let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
532                        .map_err(|_| "Map 4K: invalid page address")?;
533                    let phys_frame =
534                        x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(
535                            frame.start_address,
536                        );
537                    unsafe {
538                        mapper
539                            .map_to(page, phys_frame, page_flags, &mut frame_allocator)
540                            .map(|flush| flush.flush())
541                            .is_ok()
542                    }
543                }
544                VmaPageSize::Huge => {
545                    use x86_64::structures::paging::Size2MiB;
546                    let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
547                        .map_err(|_| "Map 2M: invalid page address")?;
548                    let phys_frame =
549                        x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(
550                            frame.start_address,
551                        );
552                    let mut huge_flags = page_flags;
553                    huge_flags |= PageTableFlags::HUGE_PAGE;
554                    unsafe {
555                        mapper
556                            .map_to(page, phys_frame, huge_flags, &mut frame_allocator)
557                            .map(|flush| flush.flush())
558                            .is_ok()
559                    }
560                }
561            };
562
563            if !map_ok {
564                log::error!(
565                    "map_region: map_to failed at page {} vaddr={:#x} size={:?}",
566                    i,
567                    page_addr,
568                    page_size
569                );
570                // Free frame for this page that failed to map.
571                crate::sync::with_irqs_disabled(|token| {
572                    crate::memory::free_frames(token, frame, order);
573                });
574
575                // Roll back already mapped pages to keep state consistent.
576                for j in (0..mapped_pages).rev() {
577                    let rb_addr = start + (j as u64) * page_bytes;
578                    match page_size {
579                        VmaPageSize::Small => {
580                            use x86_64::structures::paging::Size4KiB;
581                            let rb_page =
582                                Page::<Size4KiB>::from_start_address(VirtAddr::new(rb_addr))
583                                    .map_err(|_| "Rollback: invalid 4K page address")?;
584                            if let Ok((rb_frame, rb_flush)) = mapper.unmap(rb_page) {
585                                rb_flush.flush();
586                                crate::memory::cow::frame_dec_ref(crate::memory::PhysFrame {
587                                    start_address: rb_frame.start_address(),
588                                });
589                            }
590                        }
591                        VmaPageSize::Huge => {
592                            use x86_64::structures::paging::Size2MiB;
593                            let rb_page =
594                                Page::<Size2MiB>::from_start_address(VirtAddr::new(rb_addr))
595                                    .map_err(|_| "Rollback: invalid 2M page address")?;
596                            if let Ok((rb_frame, rb_flush)) = mapper.unmap(rb_page) {
597                                rb_flush.flush();
598                                crate::memory::cow::frame_dec_ref(crate::memory::PhysFrame {
599                                    start_address: rb_frame.start_address(),
600                                });
601                            }
602                        }
603                    }
604                }
605
606                crate::silo::release_current_task_memory(len);
607                return Err("Failed to map page");
608            }
609
610            // Track refcount for COW
611            crate::memory::cow::frame_inc_ref(crate::memory::PhysFrame {
612                start_address: frame.start_address,
613            });
614
615            mapped_pages += 1;
616        }
617
618        // Track the region
619        let mut regions = self.regions.lock();
620        let region = VirtualMemoryRegion {
621            start,
622            page_count,
623            flags,
624            vma_type,
625            page_size,
626        };
627        regions.insert(start, region);
628
629        let end = start + (page_count as u64) * page_bytes;
630        crate::trace_mem!(
631            crate::trace::category::MEM_MAP,
632            crate::trace::TraceKind::MemMap,
633            page_size.bytes(),
634            crate::trace::TraceTaskCtx {
635                task_id: 0,
636                pid: 0,
637                tid: 0,
638                cr3: self.cr3_phys.as_u64(),
639            },
640            0,
641            start,
642            end,
643            page_count as u64
644        );
645
646        Ok(())
647    }
648
649    /// Maps shared frames.
650    pub fn map_shared_frames(
651        &self,
652        start: u64,
653        frame_phys_addrs: &[u64],
654        flags: VmaFlags,
655        vma_type: VmaType,
656    ) -> Result<(), &'static str> {
657        let page_count = frame_phys_addrs.len();
658        if page_count == 0 || start % 4096 != 0 {
659            return Err("Invalid shared region arguments");
660        }
661        let len = (page_count as u64)
662            .checked_mul(4096)
663            .ok_or("Shared region length overflow")?;
664        let end = start.checked_add(len).ok_or("Shared region end overflow")?;
665        const USER_SPACE_END: u64 = 0x0000_8000_0000_0000;
666        if end > USER_SPACE_END {
667            return Err("Shared region out of user-space range");
668        }
669
670        {
671            let regions = self.regions.lock();
672            if regions.iter().any(|(&vma_start, vma)| {
673                let vma_end = vma_start
674                    .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
675                vma_start < end && vma_end > start
676            }) {
677                return Err("Shared region overlaps existing mapping");
678            }
679        }
680
681        let page_flags = flags.to_page_flags();
682        let mut frame_allocator = BuddyFrameAllocator;
683        let mut mapper = unsafe { self.mapper() };
684        let mut mapped_pages = 0usize;
685
686        for (i, phys_addr) in frame_phys_addrs.iter().copied().enumerate() {
687            let page_addr = start
688                .checked_add((i as u64) * 4096)
689                .ok_or("Shared page address overflow")?;
690            let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
691                .map_err(|_| "Map shared: invalid page address")?;
692            let frame = X86PhysFrame::<Size4KiB>::containing_address(PhysAddr::new(phys_addr));
693
694            let map_ok = unsafe {
695                mapper
696                    .map_to(page, frame, page_flags, &mut frame_allocator)
697                    .map(|flush| flush.flush())
698                    .is_ok()
699            };
700
701            if !map_ok {
702                for j in (0..mapped_pages).rev() {
703                    let rb_addr = start + (j as u64) * 4096;
704                    if let Ok(rb_page) =
705                        Page::<Size4KiB>::from_start_address(VirtAddr::new(rb_addr))
706                    {
707                        if let Ok((rb_frame, rb_flush)) = mapper.unmap(rb_page) {
708                            rb_flush.flush();
709                            crate::memory::cow::frame_dec_ref(crate::memory::PhysFrame {
710                                start_address: rb_frame.start_address(),
711                            });
712                        }
713                    }
714                }
715                return Err("Failed to map shared page");
716            }
717
718            crate::memory::cow::frame_inc_ref(crate::memory::PhysFrame {
719                start_address: PhysAddr::new(phys_addr),
720            });
721            mapped_pages += 1;
722        }
723
724        let mut regions = self.regions.lock();
725        regions.insert(
726            start,
727            VirtualMemoryRegion {
728                start,
729                page_count,
730                flags,
731                vma_type,
732                page_size: VmaPageSize::Small,
733            },
734        );
735        Ok(())
736    }
737
738    /// Unmap a previously mapped region and free the backing frames.
739    pub fn unmap_region(
740        &self,
741        start: u64,
742        page_count: usize,
743        page_size: VmaPageSize,
744    ) -> Result<(), &'static str> {
745        let page_bytes = page_size.bytes();
746        // SAFETY: We have logical ownership of this address space.
747        let mut mapper = unsafe { self.mapper() };
748
749        for i in 0..page_count {
750            let page_addr = start + (i as u64) * page_bytes;
751
752            let frame_addr = match page_size {
753                VmaPageSize::Small => {
754                    use x86_64::structures::paging::Size4KiB;
755                    let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
756                        .map_err(|_| "Failed to unmap: invalid 4K page address")?;
757                    let (frame, flush) =
758                        mapper.unmap(page).map_err(|_| "Failed to unmap 4K page")?;
759                    flush.flush();
760                    frame.start_address()
761                }
762                VmaPageSize::Huge => {
763                    use x86_64::structures::paging::Size2MiB;
764                    let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
765                        .map_err(|_| "Failed to unmap: invalid 2M page address")?;
766                    let (frame, flush) =
767                        mapper.unmap(page).map_err(|_| "Failed to unmap 2M page")?;
768                    flush.flush();
769                    frame.start_address()
770                }
771            };
772
773            // COW-aware refcount decrement: free only when last mapping disappears.
774            let phys_frame = crate::memory::PhysFrame {
775                start_address: frame_addr,
776            };
777            crate::memory::cow::frame_dec_ref(phys_frame);
778        }
779
780        // Remove from VMA tracking.
781        self.regions.lock().remove(&start);
782
783        log::trace!(
784            "Unmapped region: {:#x}..{:#x} ({} pages, size={:?})",
785            start,
786            start + (page_count as u64) * page_bytes,
787            page_count,
788            page_size
789        );
790
791        let end = start + (page_count as u64) * page_bytes;
792        crate::trace_mem!(
793            crate::trace::category::MEM_UNMAP,
794            crate::trace::TraceKind::MemUnmap,
795            page_size.bytes(),
796            crate::trace::TraceTaskCtx {
797                task_id: 0,
798                pid: 0,
799                tid: 0,
800                cr3: self.cr3_phys.as_u64(),
801            },
802            0,
803            start,
804            end,
805            page_count as u64
806        );
807
808        let released = (page_count as u64).saturating_mul(page_bytes);
809        crate::silo::release_current_task_memory(released);
810
811        Ok(())
812    }
813
814    /// Find a free virtual address range of `n_pages` pages of `page_size` starting at or after `hint`.
815    pub fn find_free_vma_range(
816        &self,
817        hint: u64,
818        n_pages: usize,
819        page_size: VmaPageSize,
820    ) -> Option<u64> {
821        if n_pages == 0 {
822            return None;
823        }
824        let page_bytes = page_size.bytes();
825        let length = (n_pages as u64).checked_mul(page_bytes)?;
826        let upper_limit: u64 = 0x0000_8000_0000_0000; // USER_SPACE_END
827
828        // Round hint up to a page boundary
829        let mut candidate = (hint.saturating_add(page_bytes - 1)) & !(page_bytes - 1);
830        if candidate == 0 {
831            candidate = page_bytes;
832        }
833
834        let regions = self.regions.lock();
835        for (&vma_start, vma) in regions.iter() {
836            let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
837
838            // A gap exists before this VMA — candidate fits.
839            if candidate.saturating_add(length) <= vma_start {
840                break;
841            }
842
843            // Candidate overlaps this VMA; skip past it.
844            if vma_end > candidate {
845                candidate = (vma_end.saturating_add(page_bytes - 1)) & !(page_bytes - 1);
846            }
847        }
848
849        // Final bounds check.
850        if candidate.checked_add(length)? <= upper_limit {
851            Some(candidate)
852        } else {
853            None
854        }
855    }
856
857    /// Return true if any tracked VMA overlaps `[addr, addr + len)`.
858    pub fn has_mapping_in_range(&self, addr: u64, len: u64) -> bool {
859        let end = match addr.checked_add(len) {
860            Some(v) => v,
861            None => return true,
862        };
863        let regions = self.regions.lock();
864        regions.iter().any(|(&vma_start, vma)| {
865            let vma_end = vma_start
866                .saturating_add((vma.page_count as u64).saturating_mul(vma.page_size.bytes()));
867            vma_start < end && vma_end > addr
868        })
869    }
870
871    /// Return the tracked VMA that starts exactly at `start`.
872    pub fn region_by_start(&self, start: u64) -> Option<VirtualMemoryRegion> {
873        let regions = self.regions.lock();
874        regions.get(&start).cloned()
875    }
876
877    /// Returns true if any page in `[addr, addr + len)` is currently mapped.
878    pub fn any_mapped_in_range(
879        &self,
880        addr: u64,
881        len: u64,
882        page_size: VmaPageSize,
883    ) -> Result<bool, &'static str> {
884        if len == 0 {
885            return Ok(false);
886        }
887        let end = addr
888            .checked_add(len)
889            .ok_or("any_mapped_in_range: address overflow")?;
890        let step = page_size.bytes();
891        let mut cur = addr;
892        while cur < end {
893            if self.translate(VirtAddr::new(cur)).is_some() {
894                return Ok(true);
895            }
896            cur = cur
897                .checked_add(step)
898                .ok_or("any_mapped_in_range: loop overflow")?;
899        }
900        Ok(false)
901    }
902
903    /// Performs the protect range operation.
904    pub fn protect_range(&self, addr: u64, len: u64, flags: VmaFlags) -> Result<(), &'static str> {
905        if len == 0 {
906            return Ok(());
907        }
908        let end = addr
909            .checked_add(len)
910            .ok_or("protect_range: address overflow")?;
911
912        {
913            let regions = self.regions.lock();
914            for (&vma_start, vma) in regions.iter() {
915                let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
916                if vma_start >= end || vma_end <= addr {
917                    continue;
918                }
919                if vma.page_size == VmaPageSize::Huge {
920                    let range_start = core::cmp::max(vma_start, addr);
921                    let range_end = core::cmp::min(vma_end, end);
922                    if range_start % vma.page_size.bytes() != 0
923                        || range_end % vma.page_size.bytes() != 0
924                    {
925                        return Err(
926                            "protect_range: partial mprotect of 2MiB pages is not supported",
927                        );
928                    }
929                }
930            }
931        }
932
933        let mut touched = false;
934        loop {
935            let region_info = {
936                let regions = self.regions.lock();
937                regions
938                    .iter()
939                    .find(|(&vma_start, vma)| {
940                        let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
941                        vma_start < end && vma_end > addr
942                    })
943                    .map(|(&k, v)| (k, v.clone()))
944            };
945
946            let Some((vma_start, vma)) = region_info else {
947                break;
948            };
949            touched = true;
950
951            let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
952            let range_start = core::cmp::max(vma_start, addr);
953            let range_end = core::cmp::min(vma_end, end);
954            let page_bytes = vma.page_size.bytes();
955            let new_pt_flags = flags.to_page_flags();
956
957            let mut mapper = unsafe { self.mapper() };
958            let mut page_addr = range_start;
959            while page_addr < range_end {
960                if mapper.translate_addr(VirtAddr::new(page_addr)).is_none() {
961                    page_addr += page_bytes;
962                    continue;
963                }
964                unsafe {
965                    match vma.page_size {
966                        VmaPageSize::Small => {
967                            let page =
968                                Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
969                                    .map_err(|_| "protect_range: invalid 4K page address")?;
970                            mapper
971                                .update_flags(page, new_pt_flags)
972                                .map(|f| f.ignore())
973                                .map_err(|_| "protect_range: update 4K flags failed")?;
974                        }
975                        VmaPageSize::Huge => {
976                            let mut huge_flags = new_pt_flags;
977                            huge_flags |= PageTableFlags::HUGE_PAGE;
978                            let page =
979                                Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
980                                    .map_err(|_| "protect_range: invalid 2M page address")?;
981                            mapper
982                                .update_flags(page, huge_flags)
983                                .map(|f| f.ignore())
984                                .map_err(|_| "protect_range: update 2M flags failed")?;
985                        }
986                    }
987                }
988                page_addr += page_bytes;
989            }
990
991            {
992                let mut regions = self.regions.lock();
993                regions.remove(&vma_start);
994
995                if range_start > vma_start {
996                    let leading_pages = ((range_start - vma_start) / page_bytes) as usize;
997                    regions.insert(
998                        vma_start,
999                        VirtualMemoryRegion {
1000                            start: vma_start,
1001                            page_count: leading_pages,
1002                            flags: vma.flags,
1003                            vma_type: vma.vma_type,
1004                            page_size: vma.page_size,
1005                        },
1006                    );
1007                }
1008
1009                let middle_pages = ((range_end - range_start) / page_bytes) as usize;
1010                if middle_pages > 0 {
1011                    regions.insert(
1012                        range_start,
1013                        VirtualMemoryRegion {
1014                            start: range_start,
1015                            page_count: middle_pages,
1016                            flags,
1017                            vma_type: vma.vma_type,
1018                            page_size: vma.page_size,
1019                        },
1020                    );
1021                }
1022
1023                if range_end < vma_end {
1024                    let trailing_pages = ((vma_end - range_end) / page_bytes) as usize;
1025                    regions.insert(
1026                        range_end,
1027                        VirtualMemoryRegion {
1028                            start: range_end,
1029                            page_count: trailing_pages,
1030                            flags: vma.flags,
1031                            vma_type: vma.vma_type,
1032                            page_size: vma.page_size,
1033                        },
1034                    );
1035                }
1036            }
1037        }
1038
1039        if !touched {
1040            return Err("protect_range: no mapped region in range");
1041        }
1042        Ok(())
1043    }
1044
1045    /// Unmaps range.
1046    pub fn unmap_range(&self, addr: u64, len: u64) -> Result<(), &'static str> {
1047        if len == 0 {
1048            return Ok(());
1049        }
1050        let end = addr
1051            .checked_add(len)
1052            .ok_or("unmap_range: address overflow")?;
1053
1054        // Pre-validate huge-page overlaps: partial unmap of 2MiB mappings is
1055        // not supported yet. Callers must unmap on huge-page boundaries.
1056        {
1057            let regions = self.regions.lock();
1058            for (&vma_start, vma) in regions.iter() {
1059                let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1060                if vma_start >= end || vma_end <= addr {
1061                    continue;
1062                }
1063                if vma.page_size == VmaPageSize::Huge {
1064                    let range_start = core::cmp::max(vma_start, addr);
1065                    let range_end = core::cmp::min(vma_end, end);
1066                    if range_start % vma.page_size.bytes() != 0
1067                        || range_end % vma.page_size.bytes() != 0
1068                    {
1069                        return Err("unmap_range: partial unmap of 2MiB pages is not supported");
1070                    }
1071                }
1072            }
1073        }
1074
1075        // Process regions one by one to avoid heap allocation (Vec)
1076        let mut released_bytes = 0u64;
1077        loop {
1078            // Find the first overlapping region
1079            let region_info = {
1080                let regions = self.regions.lock();
1081                regions
1082                    .iter()
1083                    .find(|(&vma_start, vma)| {
1084                        let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1085                        vma_start < end && vma_end > addr
1086                    })
1087                    .map(|(&k, v)| (k, v.clone()))
1088            };
1089
1090            let Some((vma_start, vma)) = region_info else {
1091                break; // No more overlapping regions
1092            };
1093
1094            let vma_end = vma_start + vma.page_count as u64 * vma.page_size.bytes();
1095            let range_start = core::cmp::max(vma_start, addr);
1096            let range_end = core::cmp::min(vma_end, end);
1097            released_bytes = released_bytes.saturating_add(range_end.saturating_sub(range_start));
1098
1099            // 1. Hardware unmap
1100            // SAFETY: Logical ownership of address space.
1101            let mut mapper = unsafe { self.mapper() };
1102            let mut page_addr = range_start;
1103            let page_bytes = vma.page_size.bytes();
1104            while page_addr < range_end {
1105                // Lazy VMAs can contain unfaulted pages (no PTE). In that case
1106                // there is nothing to unmap in hardware; just update VMA metadata.
1107                if mapper.translate_addr(VirtAddr::new(page_addr)).is_none() {
1108                    page_addr += page_bytes;
1109                    continue;
1110                }
1111
1112                let frame_addr = match vma.page_size {
1113                    VmaPageSize::Small => {
1114                        use x86_64::structures::paging::Size4KiB;
1115                        let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(page_addr))
1116                            .map_err(|_| "unmap_range: invalid 4K page address")?;
1117                        let (frame, flush) = mapper
1118                            .unmap(page)
1119                            .map_err(|_| "unmap_range: unmap 4K failed")?;
1120                        flush.flush();
1121                        frame.start_address()
1122                    }
1123                    VmaPageSize::Huge => {
1124                        use x86_64::structures::paging::Size2MiB;
1125                        let page = Page::<Size2MiB>::from_start_address(VirtAddr::new(page_addr))
1126                            .map_err(|_| "unmap_range: invalid 2M page address")?;
1127                        let (frame, flush) = mapper
1128                            .unmap(page)
1129                            .map_err(|_| "unmap_range: unmap 2M failed")?;
1130                        flush.flush();
1131                        frame.start_address()
1132                    }
1133                };
1134
1135                let phys = crate::memory::PhysFrame {
1136                    start_address: frame_addr,
1137                };
1138                crate::memory::cow::frame_dec_ref(phys);
1139                page_addr += page_bytes;
1140            }
1141
1142            // 2. Update tracking: remove and re-insert fragments
1143            {
1144                let mut regions = self.regions.lock();
1145                regions.remove(&vma_start);
1146
1147                if range_start > vma_start {
1148                    let leading_pages =
1149                        ((range_start - vma_start) / vma.page_size.bytes()) as usize;
1150                    regions.insert(
1151                        vma_start,
1152                        VirtualMemoryRegion {
1153                            start: vma_start,
1154                            page_count: leading_pages,
1155                            flags: vma.flags,
1156                            vma_type: vma.vma_type,
1157                            page_size: vma.page_size,
1158                        },
1159                    );
1160                }
1161
1162                if range_end < vma_end {
1163                    let trailing_pages = ((vma_end - range_end) / vma.page_size.bytes()) as usize;
1164                    regions.insert(
1165                        range_end,
1166                        VirtualMemoryRegion {
1167                            start: range_end,
1168                            page_count: trailing_pages,
1169                            flags: vma.flags,
1170                            vma_type: vma.vma_type,
1171                            page_size: vma.page_size,
1172                        },
1173                    );
1174                }
1175            }
1176        }
1177
1178        crate::silo::release_current_task_memory(released_bytes);
1179        Ok(())
1180    }
1181
1182    /// Translate a virtual address to its mapped physical address.
1183    pub fn translate(&self, vaddr: VirtAddr) -> Option<PhysAddr> {
1184        // SAFETY: Read-only access to the page tables.
1185        let mapper = unsafe { self.mapper() };
1186        mapper.translate_addr(vaddr)
1187    }
1188
1189    /// Get the physical address of this address space's PML4 table.
1190    pub fn cr3(&self) -> PhysAddr {
1191        self.cr3_phys
1192    }
1193
1194    /// Switch the CPU to this address space by writing CR3.
1195    ///
1196    /// Skips the write if CR3 already points to this address space (avoids
1197    /// unnecessary TLB flush).
1198    ///
1199    /// # Safety
1200    /// The caller must ensure this address space's page tables are valid and
1201    /// that the kernel half is correctly mapped.
1202    pub unsafe fn switch_to(&self) {
1203        let (current_frame, _) = Cr3::read();
1204        if current_frame.start_address() == self.cr3_phys {
1205            return; // Already active — skip to avoid TLB flush.
1206        }
1207
1208        // SAFETY: cr3_phys points to a valid, 4KiB-aligned PML4 table with
1209        // the kernel half correctly populated.
1210        unsafe {
1211            let frame =
1212                X86PhysFrame::from_start_address(self.cr3_phys).expect("CR3 address not aligned");
1213            Cr3::write(frame, Cr3Flags::empty());
1214        }
1215    }
1216
1217    /// Whether this is the kernel address space.
1218    pub fn is_kernel(&self) -> bool {
1219        self.is_kernel
1220    }
1221
1222    /// Check if this address space has any user-space memory mappings.
1223    pub fn has_user_mappings(&self) -> bool {
1224        if self.is_kernel {
1225            return false;
1226        }
1227        let regions = self.regions.lock();
1228        // Check for any non-kernel mappings.
1229        regions.values().any(|vma| vma.vma_type != VmaType::Kernel)
1230    }
1231
1232    /// Unmap all tracked user regions (best-effort).
1233    ///
1234    /// This frees user frames and clears the VMA list. Kernel mappings are untouched.
1235    /// Does not allocate memory.
1236    pub fn unmap_all_user_regions(&self) {
1237        if self.is_kernel {
1238            return;
1239        }
1240
1241        loop {
1242            // Pop the first region from the map to avoid allocation
1243            let first = {
1244                let mut guard = self.regions.lock();
1245                if let Some(&start) = guard.keys().next() {
1246                    guard.remove(&start)
1247                } else {
1248                    None
1249                }
1250            };
1251
1252            if let Some(region) = first {
1253                let _ = self.unmap_region(region.start, region.page_count, region.page_size);
1254            } else {
1255                break;
1256            }
1257        }
1258    }
1259
1260    /// Performs the clone cow operation.
1261    pub fn clone_cow(&self) -> Result<Arc<AddressSpace>, &'static str> {
1262        if self.is_kernel {
1263            return Err("Cannot fork kernel address space");
1264        }
1265
1266        let child = Arc::new(AddressSpace::new_user()?);
1267
1268        let regions: Vec<VirtualMemoryRegion> = {
1269            let guard = self.regions.lock();
1270            guard.values().cloned().collect()
1271        };
1272
1273        let mut tlb_flush_needed = false;
1274        let mut processed_pages = Vec::new();
1275
1276        let res: Result<(), &'static str> = (|| {
1277            let mut parent_mapper = unsafe { self.mapper() };
1278            let mut child_mapper = unsafe { child.mapper() };
1279            let mut frame_allocator = BuddyFrameAllocator;
1280
1281            for region in regions.iter() {
1282                // Register VMA in child
1283                {
1284                    let mut child_regions = child.regions.lock();
1285                    child_regions.insert(region.start, region.clone());
1286                }
1287
1288                let page_bytes = region.page_size.bytes();
1289
1290                for i in 0..region.page_count {
1291                    let vaddr = VirtAddr::new(region.start + (i as u64) * page_bytes);
1292
1293                    // Translate parent page to frame
1294                    let (phys_frame_addr, flags): (PhysAddr, PageTableFlags) =
1295                        match parent_mapper.translate(vaddr) {
1296                            TranslateResult::Mapped {
1297                                frame,
1298                                offset: _,
1299                                flags,
1300                            } => (frame.start_address(), flags),
1301                            _ => continue,
1302                        };
1303
1304                    let mut new_flags = flags;
1305                    let is_writable = flags.contains(PageTableFlags::WRITABLE);
1306                    const COW_BIT: PageTableFlags = PageTableFlags::BIT_9;
1307
1308                    if is_writable {
1309                        new_flags.remove(PageTableFlags::WRITABLE);
1310                        new_flags.insert(COW_BIT);
1311
1312                        unsafe {
1313                            let res: Result<(), &'static str> = match region.page_size {
1314                                VmaPageSize::Small => parent_mapper
1315                                    .update_flags(
1316                                        Page::<Size4KiB>::from_start_address(vaddr).unwrap(),
1317                                        new_flags,
1318                                    )
1319                                    .map(|f| f.ignore())
1320                                    .map_err(|_| "Failed to update parent 4K flags"),
1321                                VmaPageSize::Huge => parent_mapper
1322                                    .update_flags(
1323                                        Page::<Size2MiB>::from_start_address(vaddr).unwrap(),
1324                                        new_flags,
1325                                    )
1326                                    .map(|f| f.ignore())
1327                                    .map_err(|_| "Failed to update parent 2M flags"),
1328                            };
1329                            if let Err(e) = res {
1330                                return Err(e);
1331                            }
1332                        }
1333                        tlb_flush_needed = true;
1334                    }
1335
1336                    let phys = crate::memory::PhysFrame {
1337                        start_address: phys_frame_addr,
1338                    };
1339                    crate::memory::cow::frame_inc_ref(phys);
1340
1341                    // Map in child. We map it as WRITABLE first to ensure intermediate
1342                    // page tables (PDPT, PD) are created with WRITABLE bit set.
1343                    // If we mapped directly as COW (Read-only), some Mapper implementations
1344                    // might create Read-Only intermediate tables, blocking future COW resolution.
1345                    let map_flags = new_flags | PageTableFlags::WRITABLE;
1346
1347                    unsafe {
1348                        let map_res: Result<(), &'static str> = match region.page_size {
1349                            VmaPageSize::Small => {
1350                                let page = Page::<Size4KiB>::from_start_address(vaddr).unwrap();
1351                                let frame = x86_64::structures::paging::PhysFrame::<Size4KiB>::containing_address(phys_frame_addr);
1352                                child_mapper
1353                                    .map_to(page, frame, map_flags, &mut frame_allocator)
1354                                    .map(|f| f.ignore())
1355                                    .map_err(|_| "Failed to map 4K in child")
1356                            }
1357                            VmaPageSize::Huge => {
1358                                let page = Page::<Size2MiB>::from_start_address(vaddr).unwrap();
1359                                let frame = x86_64::structures::paging::PhysFrame::<Size2MiB>::containing_address(phys_frame_addr);
1360                                child_mapper
1361                                    .map_to(page, frame, map_flags, &mut frame_allocator)
1362                                    .map(|f| f.ignore())
1363                                    .map_err(|_| "Failed to map 2M in child")
1364                            }
1365                        };
1366
1367                        if let Err(e) = map_res {
1368                            crate::memory::cow::frame_dec_ref(phys);
1369                            return Err(e);
1370                        }
1371
1372                        // Now downgrade to the actual COW flags (which may be Read-Only).
1373                        if !new_flags.contains(PageTableFlags::WRITABLE) {
1374                            let downgrade_res: Result<(), &'static str> = match region.page_size {
1375                                VmaPageSize::Small => {
1376                                    let page = Page::<Size4KiB>::from_start_address(vaddr).unwrap();
1377                                    child_mapper
1378                                        .update_flags(page, new_flags)
1379                                        .map(|f| f.ignore())
1380                                        .map_err(|_| "Failed to update child 4K flags")
1381                                }
1382                                VmaPageSize::Huge => {
1383                                    let page = Page::<Size2MiB>::from_start_address(vaddr).unwrap();
1384                                    child_mapper
1385                                        .update_flags(page, new_flags)
1386                                        .map(|f| f.ignore())
1387                                        .map_err(|_| "Failed to update child 2M flags")
1388                                }
1389                            };
1390                            if let Err(e) = downgrade_res {
1391                                let unmapped = match region.page_size {
1392                                    VmaPageSize::Small => {
1393                                        let page =
1394                                            Page::<Size4KiB>::from_start_address(vaddr).unwrap();
1395                                        child_mapper.unmap(page).map(|(_, f)| f.ignore()).is_ok()
1396                                    }
1397                                    VmaPageSize::Huge => {
1398                                        let page =
1399                                            Page::<Size2MiB>::from_start_address(vaddr).unwrap();
1400                                        child_mapper.unmap(page).map(|(_, f)| f.ignore()).is_ok()
1401                                    }
1402                                };
1403                                if unmapped {
1404                                    crate::memory::cow::frame_dec_ref(phys);
1405                                }
1406                                return Err(e);
1407                            }
1408                        }
1409                    }
1410
1411                    processed_pages.push((vaddr.as_u64(), flags, phys, region.page_size));
1412                }
1413            }
1414            Ok(())
1415        })();
1416
1417        if let Err(e) = res {
1418            log::error!("clone_cow error: {}. Rolling back...", e);
1419            let mut parent_mapper = unsafe { self.mapper() };
1420            for (vaddr, original_flags, phys, page_size) in processed_pages.into_iter().rev() {
1421                if original_flags.contains(PageTableFlags::WRITABLE) {
1422                    unsafe {
1423                        match page_size {
1424                            VmaPageSize::Small => {
1425                                let _ = parent_mapper.update_flags(
1426                                    Page::<Size4KiB>::from_start_address(VirtAddr::new(vaddr))
1427                                        .unwrap(),
1428                                    original_flags,
1429                                );
1430                            }
1431                            VmaPageSize::Huge => {
1432                                let _ = parent_mapper.update_flags(
1433                                    Page::<Size2MiB>::from_start_address(VirtAddr::new(vaddr))
1434                                        .unwrap(),
1435                                    original_flags,
1436                                );
1437                            }
1438                        };
1439                    }
1440                }
1441                crate::memory::cow::frame_dec_ref(phys);
1442            }
1443            if tlb_flush_needed {
1444                crate::arch::x86_64::tlb::shootdown_all();
1445            }
1446            return Err(e);
1447        }
1448
1449        if tlb_flush_needed {
1450            crate::arch::x86_64::tlb::shootdown_all();
1451        }
1452        Ok(child)
1453    }
1454
1455    /// Releases user page tables.
1456    fn free_user_page_tables(&self) {
1457        if self.is_kernel {
1458            return;
1459        }
1460
1461        // SAFETY: We have logical ownership of this address space during drop.
1462        let l4 = unsafe { &mut *self.l4_table_virt.as_mut_ptr::<PageTable>() };
1463
1464        for i in 0..256 {
1465            if !l4[i].flags().contains(PageTableFlags::PRESENT) {
1466                continue;
1467            }
1468            let l3_frame = match l4[i].frame() {
1469                Ok(f) => f,
1470                Err(_) => {
1471                    l4[i].set_unused();
1472                    continue;
1473                }
1474            };
1475
1476            free_l3_table(l3_frame);
1477            l4[i].set_unused();
1478        }
1479    }
1480}
1481
1482impl Drop for AddressSpace {
1483    /// Performs the drop operation.
1484    fn drop(&mut self) {
1485        if self.is_kernel {
1486            return; // Never free the kernel address space.
1487        }
1488
1489        log::trace!("AddressSpace::drop begin CR3={:#x}", self.cr3_phys.as_u64());
1490
1491        // Best-effort cleanup of user mappings.
1492        self.unmap_all_user_regions();
1493        #[cfg(not(feature = "selftest"))]
1494        self.free_user_page_tables();
1495        #[cfg(feature = "selftest")]
1496        {
1497            // Runtime selftests create/destroy many temporary address spaces and
1498            // currently expose instability in recursive page-table teardown.
1499            // Keep tests deterministic by skipping deep PT reclaim in this mode.
1500            log::trace!(
1501                "AddressSpace::drop selftest mode: skipping deep page-table free for CR3={:#x}",
1502                self.cr3_phys.as_u64()
1503            );
1504        }
1505
1506        // Free the PML4 frame itself.
1507        // NOTE: Recursive freeing of intermediate page tables (L3/L2/L1) that
1508        // belong exclusively to the user half is deferred to P2.
1509        let phys_frame = crate::memory::PhysFrame {
1510            start_address: self.cr3_phys,
1511        };
1512        crate::sync::with_irqs_disabled(|token| {
1513            crate::memory::free_frame(token, phys_frame);
1514        });
1515
1516        log::trace!("AddressSpace::drop end CR3={:#x}", self.cr3_phys.as_u64());
1517        log::debug!(
1518            "User address space dropped: CR3={:#x}",
1519            self.cr3_phys.as_u64()
1520        );
1521    }
1522}
1523
1524// ---------------------------------------------------------------------------
1525// Page table cleanup helpers (user half only)
1526// ---------------------------------------------------------------------------
1527
1528/// Releases frame.
1529fn free_frame(phys: PhysAddr) {
1530    let phys_frame = crate::memory::PhysFrame {
1531        start_address: phys,
1532    };
1533    crate::sync::with_irqs_disabled(|token| {
1534        crate::memory::free_frame(token, phys_frame);
1535    });
1536}
1537
1538/// Releases l1 table.
1539fn free_l1_table(frame: X86PhysFrame<Size4KiB>) {
1540    let l1_virt = VirtAddr::new(crate::memory::phys_to_virt(frame.start_address().as_u64()));
1541    // SAFETY: l1_virt points to a valid page table frame in HHDM.
1542    let l1 = unsafe { &mut *l1_virt.as_mut_ptr::<PageTable>() };
1543    for entry in l1.iter_mut() {
1544        if entry.flags().contains(PageTableFlags::PRESENT) {
1545            // Mapped frames are already freed via unmap_all_user_regions.
1546            entry.set_unused();
1547        }
1548    }
1549    free_frame(frame.start_address());
1550}
1551
1552/// Releases l2 table.
1553fn free_l2_table(frame: X86PhysFrame<Size4KiB>) {
1554    let l2_virt = VirtAddr::new(crate::memory::phys_to_virt(frame.start_address().as_u64()));
1555    let l2 = unsafe { &mut *l2_virt.as_mut_ptr::<PageTable>() };
1556    for entry in l2.iter_mut() {
1557        if !entry.flags().contains(PageTableFlags::PRESENT) {
1558            continue;
1559        }
1560        if entry.flags().contains(PageTableFlags::HUGE_PAGE) {
1561            // 2 MiB pages are not expected in user space today.
1562            entry.set_unused();
1563            continue;
1564        }
1565        if let Ok(l1_frame) = entry.frame() {
1566            free_l1_table(l1_frame);
1567        }
1568        entry.set_unused();
1569    }
1570    free_frame(frame.start_address());
1571}
1572
1573/// Releases l3 table.
1574fn free_l3_table(frame: X86PhysFrame<Size4KiB>) {
1575    let l3_virt = VirtAddr::new(crate::memory::phys_to_virt(frame.start_address().as_u64()));
1576    let l3 = unsafe { &mut *l3_virt.as_mut_ptr::<PageTable>() };
1577    for entry in l3.iter_mut() {
1578        if !entry.flags().contains(PageTableFlags::PRESENT) {
1579            continue;
1580        }
1581        if entry.flags().contains(PageTableFlags::HUGE_PAGE) {
1582            // 1 GiB pages are not expected in user space today.
1583            entry.set_unused();
1584            continue;
1585        }
1586        if let Ok(l2_frame) = entry.frame() {
1587            free_l2_table(l2_frame);
1588        }
1589        entry.set_unused();
1590    }
1591    free_frame(frame.start_address());
1592}
1593
1594// ---------------------------------------------------------------------------
1595// Kernel address space singleton
1596// ---------------------------------------------------------------------------
1597
1598static KERNEL_ADDRESS_SPACE: Once<Arc<AddressSpace>> = Once::new();
1599
1600/// Initialize the kernel address space singleton.
1601///
1602/// Must be called once during boot, after paging is initialized, before the
1603/// scheduler creates any tasks.
1604///
1605/// # Safety
1606/// Must be called in single-threaded init context.
1607pub unsafe fn init_kernel_address_space() {
1608    KERNEL_ADDRESS_SPACE.call_once(|| {
1609        // SAFETY: Called once, single-threaded, paging initialized.
1610        Arc::new(unsafe { AddressSpace::new_kernel() })
1611    });
1612}
1613
1614/// Get a reference to the kernel address space.
1615///
1616/// Panics if called before `init_kernel_address_space()`.
1617pub fn kernel_address_space() -> &'static Arc<AddressSpace> {
1618    KERNEL_ADDRESS_SPACE
1619        .get()
1620        .expect("Kernel address space not initialized")
1621}