Skip to main content

strat9_kernel/memory/
frame.rs

1//! Physical frame allocator abstraction.
2//!
3//! ## MetaSlot (per-frame metadata, issue #38)
4//!
5//! Each 4 KiB physical frame has a **dedicated 64-byte [`MetaSlot`]**
6//! in a separate contiguous array (initialized by [`init_metadata_array`]). Buddy
7//! free-list [`FreeListLink`] nodes, reference counts, purpose flags,
8//! [`meta_guard`] bits, a per-allocation **generation** counter, and an optional
9//! [`FrameMetaVtable`] live here : **not** in the mapped page bytes, so mappings
10//! see a pristine payload.
11//!
12//! ## Revue / invariants (issue #38)
13//!
14//! - **Pas de métadonnées dans la charge utile** : les liens buddy sont dans
15//!   [`FreeListLink`], jamais écrits comme « faux pointeurs » dans les 4 KiB mappés.
16//! - **`generation`** : incrémentée uniquement par [`MetaSlot::note_new_allocation_epoch`]
17//!   après un `CAS` réussi dans [`FrameAllocOptions::allocate`]. Ne pas utiliser
18//!   [`MetaSlot::set_generation`] sauf bootstrap/tests : sinon les schémas « généalogiques »
19//!   deviennent incohérents.
20//! - **`meta_guard::POISONED` vs `frame_flags::POISONED`** : deux espaces (bits dédiés
21//!   `guard` vs flags logiques). Pour marquer une frame corrompue, préférer
22//!   [`MetaSlot::mark_poisoned`] qui pose les deux.
23//! - **`vtable_ref` / `try_vtable_ref`** : bits `0` → défaut ; bits non alignés ou invalides
24//!   → défaut (pas d’UB). Les pointeurs alignés doivent désigner une [`FrameMetaVtable`] `'static`
25//!   valide lorsqu’ils sont enregistrés par le noyau.
26//! - **Cache order-0** : `buddy::alloc(0)` peut servir depuis le cache local ; le chemin
27//!   [`FrameAllocOptions::allocate`] applique quand même le CAS + epoch sur la même frame.
28
29use crate::{memory::boot_alloc::BootAllocator, sync::IrqDisabledToken};
30use core::{
31    mem::{self, offset_of},
32    ptr,
33    sync::atomic::{AtomicU32, AtomicU64, AtomicU8, Ordering},
34};
35use x86_64::PhysAddr;
36
37// ==============================================================================
38// FrameAllocOptions  (Asterinas OSTD pattern)
39// ==============================================================================
40//
41// DESIGN NOTES : why this wrapper exists:
42//
43//  * In Asterinas OSTD, `FrameAllocOptions::new()` defaults to `zeroed: true`.
44//    This means callers can never accidentally hand out a frame that still holds
45//    data from a previous lifetime.  The only way to skip zeroing is an
46//    explicit `.zeroed(false)` call at the site that *knows* it is safe to do
47//    so (e.g. a frame that will be fully overwritten before any read).
48//
49//  * The critical failure mode we are fixing:
50//    `BuddyFrameAllocator::allocate_frame` (used by `OffsetPageTable` when it
51//    needs a new intermediate page-table node) was returning raw, unzeroed
52//    frames.  A freshly-split buddy block can contain bytes left behind by the
53//    slab allocator (POISON_BYTE = 0xDE) or by whatever previously lived in
54//    that memory.  The CPU page-table walker reads all 512 entries of every
55//    intermediate node it traverses.  A random non-zero entry is decoded as a
56//    valid PTE pointing to an arbitrary physical address : which explains why
57//    RIP (the first fetch address the CPU tries after entering Ring 3) changes
58//    on every boot.
59//
60//  * The `flags` field mirrors OSTD's per-frame metadata: we stamp the purpose
61//    (kernel / user / page-table) into `FrameMeta::flags` atomically using
62//    `Ordering::Release` so that any CPU that later reads the frame through
63//    `get_meta` observes the correct flags.
64//
65//  * Refcount state machine (OSTD-style, fully enforced):
66//
67//    `buddy.rs` maintains the invariant: free-list frame ⟹ refcount == REFCOUNT_UNUSED.
68//    `mark_block_free()` stamps REFCOUNT_UNUSED; `mark_block_allocated()` leaves it
69//    untouched.  `FrameAllocOptions::allocate()` performs CAS(REFCOUNT_UNUSED -> 1)
70//    as a fail-fast corruption check before publishing the frame as live:
71//
72//       buddy alloc ▶ optional zero ▶ set flags ▶ CAS(UNUSED -> 1) ▶ live
73
74/// Sentinel refcount for a frame that is in the buddy free list.
75///
76/// Mirrors `REF_COUNT_UNUSED` in Asterinas OSTD `meta.rs`.
77///
78/// `buddy.rs` stamps this value in `mark_block_free()` and leaves it intact in
79/// `mark_block_allocated()`.  `FrameAllocOptions::allocate()` performs
80/// `CAS(REFCOUNT_UNUSED -> 1)` to atomically claim the frame and detect any
81/// double-free / free-list corruption.
82pub const REFCOUNT_UNUSED: u32 = u32::MAX;
83
84/// Options controlling how a physical frame is allocated.
85///
86/// The default configuration (`FrameAllocOptions::new()`) produces a
87/// **zeroed** frame.  Callers that need a non-zeroed frame (e.g. DMA buffers
88/// that are immediately filled by hardware, or frames that will be fully
89/// overwritten before any read) must explicitly call `.zeroed(false)`.
90///
91/// # Example
92///
93/// ```ignore
94/// // Allocate a zeroed page-table frame (the safe default).
95/// let frame = FrameAllocOptions::new()
96///     .purpose(FramePurpose::PageTable)
97///     .allocate(token)?;
98///
99/// // Allocate a user-data frame without zeroing (caller guarantees it will
100/// // be fully overwritten, e.g. by an ELF segment load).
101/// let frame = FrameAllocOptions::new()
102///     .zeroed(false)
103///     .purpose(FramePurpose::UserData)
104///     .allocate(token)?;
105/// ```
106pub struct FrameAllocOptions {
107    /// Whether the frame content should be zeroed before being returned.
108    ///
109    /// Defaults to `true`.  Setting this to `false` is only safe when the
110    /// caller guarantees the frame will be fully written before any read.
111    zeroed: bool,
112    /// The logical purpose of the frame, encoded as `frame_flags` bits.
113    purpose_flags: u32,
114}
115
116/// Describes the intended purpose of an allocated frame.
117///
118/// Purpose is written into `FrameMeta::flags` with `Ordering::Release` so
119/// that any concurrent reader of the metadata (e.g. a TLB-shootdown handler
120/// deciding whether a frame holds a page-table node) sees a consistent view.
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum FramePurpose {
123    /// Frame will hold a kernel page-table node (PML4/PDPT/PD/PT).
124    ///
125    /// These frames MUST be zeroed : unzeroed page-table nodes are the primary
126    /// source of non-deterministic RIP at Ring 3 transition.
127    PageTable,
128    /// Frame belongs to kernel address-space (e.g. heap, stack, metadata).
129    KernelData,
130    /// Frame belongs to a user-space address-space (anonymous or file-backed).
131    UserData,
132    /// Caller-managed; raw flags are passed through unchanged.
133    Custom(u32),
134}
135
136impl FramePurpose {
137    fn to_flags(self) -> u32 {
138        match self {
139            // Page-table frames are always kernel-owned.
140            Self::PageTable => frame_flags::KERNEL | frame_flags::ALLOCATED,
141            Self::KernelData => frame_flags::KERNEL | frame_flags::ALLOCATED,
142            Self::UserData => frame_flags::USER | frame_flags::ALLOCATED | frame_flags::MOVABLE,
143            Self::Custom(f) => f | frame_flags::ALLOCATED,
144        }
145    }
146
147    /// Returns `true` if this purpose requires zeroing regardless of the
148    /// `zeroed` option.  Page-table nodes must always be zeroed.
149    pub fn requires_zero(self) -> bool {
150        matches!(self, Self::PageTable)
151    }
152}
153
154impl Default for FrameAllocOptions {
155    fn default() -> Self {
156        Self::new()
157    }
158}
159
160impl FrameAllocOptions {
161    /// Creates allocation options with safe defaults:
162    ///  - `zeroed = true`
163    ///  - purpose = `KernelData`
164    pub fn new() -> Self {
165        Self {
166            zeroed: true,
167            purpose_flags: FramePurpose::KernelData.to_flags(),
168        }
169    }
170
171    /// Override the zero-initialisation policy.
172    ///
173    /// # Safety contract (enforced by convention, not the type system)
174    ///
175    /// If `zeroed` is set to `false`, the caller MUST fully overwrite every
176    /// byte of the frame before allowing any other CPU or subsystem to read it.
177    /// Violating this rule is a memory-safety hazard: stale bytes in an
178    /// intermediate page-table node cause the CPU to follow arbitrary PTEs.
179    pub fn zeroed(mut self, zeroed: bool) -> Self {
180        self.zeroed = zeroed;
181        self
182    }
183
184    /// Set the intended purpose of the frame.
185    ///
186    /// `PageTable` purpose forces zeroing even if `.zeroed(false)` was called.
187    pub fn purpose(mut self, p: FramePurpose) -> Self {
188        self.purpose_flags = p.to_flags();
189        // Page-table nodes must always be zeroed : override any caller setting.
190        if p.requires_zero() {
191            self.zeroed = true;
192        }
193        self
194    }
195
196    /// Allocate a single 4 KiB frame according to the configured options.
197    ///
198    /// The allocation path is:
199    ///
200    /// 1. Ask the buddy allocator for an order-0 frame (exclusive ownership is
201    ///    guaranteed by the buddy's own bitmap + free-list discipline).
202    /// 2. Optionally zero the 4 KiB frame contents via the HHDM.
203    /// 3. Stamp `FrameMeta::flags` with the purpose flags using `Release`
204    ///    ordering.
205    /// 4. Store `refcount = 1` with `Release` ordering so any later `Acquire`
206    ///    load of the refcount observes the fully-initialised metadata and
207    ///    (if zeroed) zeroed content.
208    ///
209    /// # Sentinel handoff: `CAS(REFCOUNT_UNUSED -> 1)`
210    ///
211    /// `buddy.rs` maintains the invariant that every frame on the free list has
212    /// `refcount == REFCOUNT_UNUSED`.  `mark_block_allocated()` leaves this
213    /// sentinel intact, so the frame arriving here still carries `REFCOUNT_UNUSED`.
214    ///
215    /// The CAS atomically claims the frame and acts as a fail-fast corruption
216    /// check: if the same frame appears twice in the buddy free list (double-free
217    /// or metadata corruption), the second allocation attempt will observe a
218    /// refcount of `1` (set by the first allocation) and panic immediately rather
219    /// than silently aliasing memory.
220    pub fn allocate(self, token: &IrqDisabledToken) -> Result<PhysFrame, AllocError> {
221        let migratetype = if self.purpose_flags & frame_flags::MOVABLE != 0 {
222            crate::memory::zone::Migratetype::Movable
223        } else {
224            crate::memory::zone::Migratetype::Unmovable
225        };
226
227        // Step 1 : exclusive frame from the buddy allocator.
228        let frame = crate::memory::buddy::alloc_migratetype(token, 0, migratetype)?;
229        let phys = frame.start_address.as_u64();
230
231        // SAFETY: `get_meta` panics only if `phys` is out-of-bounds, which
232        // would be a buddy-level invariant violation (it returned an address
233        // beyond the metadata array).  That is a kernel bug, not UB here.
234        let meta = get_meta(frame.start_address);
235
236        // Step 2 : zero the frame content if required.
237        //
238        // The zeroing MUST happen before the `Release` store of `refcount = 1`
239        // (step 4) so that any thread performing an `Acquire` load of the
240        // refcount and then reading frame bytes observes zeros.
241        //
242        // For `FramePurpose::PageTable` this is unconditional: the CPU's
243        // page-table walker reads all 512 entries of every intermediate node it
244        // visits.  Stale non-zero bytes would be decoded as valid PTEs pointing
245        // to arbitrary physical addresses, producing a non-deterministic RIP on
246        // Ring 3 entry (the root cause of the original bug).
247        //
248        // SAFETY: `phys_to_virt(phys)` is a valid HHDM address covering exactly
249        // `PAGE_SIZE` bytes.  The buddy allocator guarantees we have exclusive
250        // ownership of these bytes for the duration of this function.
251        if self.zeroed {
252            unsafe {
253                ptr::write_bytes(
254                    crate::memory::phys_to_virt(phys) as *mut u8,
255                    0,
256                    PAGE_SIZE as usize,
257                );
258            }
259        }
260
261        // Step 3 : stamp purpose flags with `Release` ordering.
262        //
263        // Any reader that subsequently loads `refcount` with `Acquire` (step 4)
264        // is guaranteed to observe these flags as well.
265        meta.flags.store(self.purpose_flags, Ordering::Release);
266        meta.set_order(0);
267
268        // Step 4 : claim the frame and publish it as live.
269        //
270        // CAS(REFCOUNT_UNUSED -> 1): atomically transitions the frame from the
271        // buddy free-list sentinel to a live, exclusively-owned frame.  The
272        // `AcqRel` success ordering ensures steps 2 and 3 happen-before any
273        // `Acquire` load of this refcount by another CPU, and also observes
274        // the buddy's `Release` store of REFCOUNT_UNUSED.
275        //
276        // Failure means the frame's refcount was not REFCOUNT_UNUSED : either
277        // the frame is still live (double-alloc) or the buddy free list is
278        // corrupt (double-free).  Both are kernel bugs; panic immediately.
279        meta.cas_refcount(REFCOUNT_UNUSED, 1)
280            .unwrap_or_else(|actual| {
281                panic!(
282                    "buddy corruption: frame {:#x} refcount is {:#x} (expected REFCOUNT_UNUSED); \
283                 double-free or free-list corruption",
284                    phys, actual,
285                )
286            });
287
288        // New live epoch: default vtable, clear guard bits, bump generation (issue #38).
289        meta.note_new_allocation_epoch();
290
291        Ok(frame)
292    }
293}
294
295pub const PAGE_SIZE: u64 = 4096;
296pub const FRAME_META_ALIGN: usize = 64;
297pub const FRAME_META_SIZE: usize = 64;
298pub const FRAME_META_LINK_NONE: u64 = u64::MAX;
299
300/// Guard bits stored in [`MetaSlot::guard`] (issue #38 : extensible without touching page bytes).
301///
302/// Distinct from [`frame_flags::POISONED`] (logical frame state in `flags`).
303pub mod meta_guard {
304    /// No guard condition asserted.
305    pub const NONE: u32 = 0;
306    /// Frame must not be exposed as a userspace mapping (kernel / debug).
307    pub const KERNEL_ONLY: u32 = 1 << 0;
308    /// Slot marked poisoned after detected corruption (never recycle blindly).
309    pub const POISONED: u32 = 1 << 31;
310}
311
312/// Persistent flags stored in [`MetaSlot`] / [`FrameMeta`].
313pub mod frame_flags {
314    /// La frame est allouée.
315    pub const ALLOCATED: u32 = 1 << 8;
316    /// La frame est libre.
317    pub const FREE: u32 = 1 << 9;
318    /// La frame est réservée au noyau.
319    pub const KERNEL: u32 = 1 << 10;
320    /// La frame appartient à l'espace utilisateur.
321    pub const USER: u32 = 1 << 11;
322    /// La frame est empoisonnée et ne doit plus être recyclée telle quelle.
323    pub const POISONED: u32 = 1 << 12;
324    /// La frame appartient à une classe de pages movable.
325    pub const MOVABLE: u32 = 1 << 13;
326    /// Frame éligible au copy-on-write.
327    pub const COW: u32 = 1 << 0;
328    /// Frame partagée de type DLL, jamais COW.
329    pub const DLL: u32 = 1 << 1;
330    /// Frame anonyme.
331    pub const ANONYMOUS: u32 = 1 << 2;
332}
333
334/// Buddy free-list link storage (intrusive list nodes live in [`MetaSlot`], not in frame bytes).
335///
336/// `AtomicU64` matches the rest of the metadata slot’s atomic story and keeps the public
337/// [`MetaSlot`] API safe if list helpers are ever used without the buddy spinlock. Today
338/// `buddy.rs` mutates these fields only while holding the global buddy lock, so plain
339/// `Cell<u64>` would suffice for ordering; that would be a micro-optimization if profiling shows
340/// hot contention here.
341#[repr(C)]
342pub struct FreeListLink {
343    pub(crate) next: AtomicU64,
344    pub(crate) prev: AtomicU64,
345}
346
347impl FreeListLink {
348    pub const fn new() -> Self {
349        Self {
350            next: AtomicU64::new(FRAME_META_LINK_NONE),
351            prev: AtomicU64::new(FRAME_META_LINK_NONE),
352        }
353    }
354}
355
356/// Custom vtable for frame-type-specific behavior (DMA teardown, device hooks, …).
357///
358/// Store a pointer as `u64` in [`MetaSlot::vtable`]; `0` selects [`DEFAULT_FRAME_META_VTABLE`].
359#[repr(C)]
360pub struct FrameMetaVtable {
361    /// Called when the last shared reference to the frame is dropped (`refcount` → 0 path).
362    ///
363    /// # When it runs
364    /// Invoked by [`release_owned_block`] **once** for the head frame of a block,
365    /// immediately after the last ownership reference is dropped and before any
366    /// per-page `on_unmap` hooks.  It does **not** run for individual page unmappings
367    /// that leave the block pinned (e.g. one task unmapping while another still holds a pin).
368    ///
369    /// # Constraints
370    /// Invoked with IRQs **disabled** and without the buddy zone lock held.
371    /// MUST be: allocation-free, lock-free, and infallible.
372    pub on_last_ref: Option<fn(PhysAddr)>,
373    /// Called once per 4 KiB page when a mapping block is released to the allocator (unmap path).
374    ///
375    /// # When it runs
376    /// Invoked by [`release_owned_block`] **before** the buddy allocator decides whether
377    /// to recycle or quarantine the block.  It therefore runs even for poisoned frames
378    /// that will be quarantined and never reused.
379    ///
380    /// # Constraints
381    /// Invoked with IRQs **disabled** and the buddy zone lock held on the caller's CPU.
382    /// MUST be:
383    ///   - allocation-free (no heap, no buddy);
384    ///   - lock-free (no spinlocks that might be held by the interrupted CPU);
385    ///   - infallible (no panic, no unwrap).
386    pub on_unmap: Option<fn(PhysAddr)>,
387    /// Reserved for future hooks (keeps struct at 64 bytes for [`FRAME_META_SIZE`]).
388    pub reserved: [u64; 6],
389}
390
391/// Default vtable used when [`MetaSlot::vtable`] is `0`.
392pub static DEFAULT_FRAME_META_VTABLE: FrameMetaVtable = FrameMetaVtable {
393    on_last_ref: None,
394    on_unmap: None,
395    reserved: [0; 6],
396};
397
398const _: () = assert!(mem::size_of::<FrameMetaVtable>() == FRAME_META_SIZE);
399
400/// 64-byte cache-line metadata for one physical frame (issue #38).
401///
402/// Layout: free-list links + flags + refcount + optional vtable + generation + reserved tail
403/// for future guard bits / generational references without touching the page payload.
404///
405/// Use plain `#[repr(C)]` (not `align(64)` on the struct): `align(64)` would pad the **type
406/// size** to a multiple of 64 and can inflate `size_of` to 128. The metadata **array** is
407/// still allocated with [`FRAME_META_ALIGN`] so each slot stays cache-line aligned.
408///
409/// Field order matters: `vtable` immediately follows `free_link` so `AtomicU64` stays
410/// 8-byte aligned without hidden padding after `refcount` (which would inflate the struct
411/// to 72 bytes).
412#[repr(C)]
413pub struct MetaSlot {
414    pub free_link: FreeListLink,
415    /// `*const FrameMetaVtable` as bits; `0` means [`DEFAULT_FRAME_META_VTABLE`].
416    pub vtable: AtomicU64,
417    pub flags: AtomicU32,
418    pub order: AtomicU8,
419    /// Padding so `refcount` stays 4-byte aligned; if `order` widens or new fields are added,
420    /// re-check [`META_SLOT_REFCOUNT_BYTE_OFFSET`] / [`MetaSlot::REFCOUNT_BYTE_OFFSET`].
421    _reserved0: [u8; 3],
422    pub refcount: AtomicU32,
423    /// Bumps each time the frame is successfully claimed from the buddy free list
424    /// (see [`MetaSlot::note_new_allocation_epoch`]).
425    pub generation: AtomicU32,
426    /// Kernel-owned guard bits ([`meta_guard`]); independent of `frame_flags`.
427    pub guard: AtomicU32,
428    /// Low 16 bits: owner CPU id hint (issue #38); upper bits reserved / NUMA placeholder.
429    pub meta_aux: AtomicU32,
430    pub _reserved_tail: [u8; 16],
431}
432
433/// Byte offset of [`MetaSlot::refcount`] from the start of each metadata slot (layout contract).
434///
435/// Re-exported as [`crate::memory::META_SLOT_REFCOUNT_BYTE_OFFSET`]. Equals [`MetaSlot::REFCOUNT_BYTE_OFFSET`].
436pub const META_SLOT_REFCOUNT_BYTE_OFFSET: usize = offset_of!(MetaSlot, refcount);
437
438/// Backwards-compatible name for [`MetaSlot`].
439pub type FrameMeta = MetaSlot;
440
441impl MetaSlot {
442    /// Empty metadata for boot-time array initialization.
443    pub const fn new() -> Self {
444        Self {
445            free_link: FreeListLink::new(),
446            vtable: AtomicU64::new(0),
447            flags: AtomicU32::new(0),
448            order: AtomicU8::new(0),
449            _reserved0: [0; 3],
450            refcount: AtomicU32::new(0),
451            generation: AtomicU32::new(0),
452            guard: AtomicU32::new(0),
453            meta_aux: AtomicU32::new(0),
454            _reserved_tail: [0; 16],
455        }
456    }
457
458    /// Reset vtable/guard when returning a frame to the buddy free list (`buddy::set_block_meta`).
459    ///
460    /// Preserves [`meta_guard::POISONED`] so poisoned frames are not silently « healed » on free.
461    #[inline]
462    pub fn reset_with_free_list_meta(&self) {
463        self.set_vtable_bits(0);
464        let poison = self.get_guard() & meta_guard::POISONED;
465        self.guard.store(poison, Ordering::Release);
466    }
467
468    #[inline]
469    pub fn meta_aux_load(&self) -> u32 {
470        self.meta_aux.load(Ordering::Relaxed)
471    }
472
473    #[inline]
474    pub fn meta_aux_store(&self, v: u32) {
475        // CPU-id hint : no happens-before relationship needed; Relaxed is sufficient.
476        self.meta_aux.store(v, Ordering::Relaxed);
477    }
478
479    /// Byte offset of `refcount` from the start of [`MetaSlot`] (same as [`META_SLOT_REFCOUNT_BYTE_OFFSET`]).
480    pub const REFCOUNT_BYTE_OFFSET: usize = META_SLOT_REFCOUNT_BYTE_OFFSET;
481
482    /// After a successful `CAS(REFCOUNT_UNUSED → 1)` in [`FrameAllocOptions::allocate`],
483    /// start a new metadata epoch: default vtable, clear guards, bump generation.
484    ///
485    /// The generation bump uses [`Ordering::Release`] so another CPU that later
486    /// [`Acquire`]-loads [`Self::generation`] or pairs with the refcount hand-off sees this
487    /// epoch for genealogical use-after-free checks. [`Ordering::Relaxed`] would be enough only
488    /// if all such checks ran on the allocating CPU with no cross-CPU visibility requirement.
489    #[inline]
490    pub fn note_new_allocation_epoch(&self) {
491        self.set_vtable_bits(0);
492        self.guard.store(meta_guard::NONE, Ordering::Release);
493        self.generation.fetch_add(1, Ordering::Release);
494    }
495
496    #[inline]
497    pub fn get_guard(&self) -> u32 {
498        self.guard.load(Ordering::Acquire)
499    }
500
501    #[inline]
502    pub fn set_guard(&self, bits: u32) {
503        self.guard.store(bits, Ordering::Release);
504    }
505
506    #[inline]
507    pub fn fetch_or_guard(&self, bits: u32) -> u32 {
508        self.guard.fetch_or(bits, Ordering::AcqRel)
509    }
510
511    /// Returns `true` if [`meta_guard::POISONED`] is set.
512    #[inline]
513    pub fn is_guard_poisoned(&self) -> bool {
514        self.get_guard() & meta_guard::POISONED != 0
515    }
516
517    /// Marks both [`meta_guard::POISONED`] and [`frame_flags::POISONED`] (corruption / audit path).
518    #[inline]
519    pub fn mark_poisoned(&self) {
520        self.fetch_or_guard(meta_guard::POISONED);
521        self.set_flags(self.get_flags() | frame_flags::POISONED);
522    }
523
524    /// `(generation, guard_bits, vtable_bits)` for serial / shell diagnostics.
525    #[inline]
526    pub fn debug_snapshot(&self) -> (u32, u32, u64) {
527        (self.generation(), self.get_guard(), self.vtable_bits())
528    }
529
530    /// Raw vtable pointer bits (`0` = default).
531    #[inline]
532    pub fn vtable_bits(&self) -> u64 {
533        self.vtable.load(Ordering::Acquire)
534    }
535
536    /// Install a custom vtable pointer (must point to a `'static` [`FrameMetaVtable`]).
537    #[inline]
538    pub fn set_vtable_bits(&self, bits: u64) {
539        self.vtable.store(bits, Ordering::Release);
540    }
541
542    /// Resolved vtable reference (`0` bits map to [`DEFAULT_FRAME_META_VTABLE`]).
543    ///
544    /// Misaligned or otherwise invalid non-zero pointer bits fall back to the default vtable
545    /// (same as [`Self::try_vtable_ref`] returning `None`).
546    pub fn vtable_ref(&self) -> &'static FrameMetaVtable {
547        self.try_vtable_ref().unwrap_or(&DEFAULT_FRAME_META_VTABLE)
548    }
549
550    /// Like [`Self::vtable_ref`], but returns `None` if non-zero vtable bits are not aligned
551    /// to a [`FrameMetaVtable`] pointer (8-byte aligned).
552    pub fn try_vtable_ref(&self) -> Option<&'static FrameMetaVtable> {
553        let bits = self.vtable_bits();
554        if bits == 0 {
555            return Some(&DEFAULT_FRAME_META_VTABLE);
556        }
557        #[cfg(debug_assertions)]
558        debug_assert_eq!(
559            bits & 7,
560            0,
561            "MetaSlot::vtable_bits must be 8-byte aligned (got {bits:#x})"
562        );
563        if bits & 7 != 0 {
564            return None;
565        }
566        // `bits` is non-zero (checked above) and 8-byte aligned, so the pointer is non-null.
567        let ptr = bits as *const FrameMetaVtable;
568        // SAFETY: aligned, non-null; must point to a `'static` vtable when registered by the kernel.
569        unsafe { Some(&*ptr) }
570    }
571
572    /// Loads the allocation generation with [`Ordering::Acquire`], pairing with the
573    /// [`Ordering::Release`] bump in [`Self::note_new_allocation_epoch`] for cross-CPU checks.
574    #[inline]
575    pub fn generation(&self) -> u32 {
576        self.generation.load(Ordering::Acquire)
577    }
578
579    /// Overwrites the generation counter : **only** for boot-time init or tests.
580    ///
581    /// Normal allocations bump generation via [`MetaSlot::note_new_allocation_epoch`].
582    /// Arbitrary values break « generational » use-after-free checks.
583    #[inline]
584    pub fn set_generation(&self, g: u32) {
585        self.generation.store(g, Ordering::Release);
586    }
587
588    #[inline]
589    pub fn next(&self) -> u64 {
590        self.free_link.next.load(Ordering::Acquire)
591    }
592
593    #[inline]
594    pub fn set_next(&self, next: u64) {
595        self.free_link.next.store(next, Ordering::Release);
596    }
597
598    #[inline]
599    pub fn prev(&self) -> u64 {
600        self.free_link.prev.load(Ordering::Acquire)
601    }
602
603    #[inline]
604    pub fn set_prev(&self, prev: u64) {
605        self.free_link.prev.store(prev, Ordering::Release);
606    }
607
608    #[inline]
609    pub fn inc_ref(&self) {
610        self.refcount.fetch_add(1, Ordering::Relaxed);
611    }
612
613    #[inline]
614    pub fn dec_ref(&self) -> u32 {
615        self.refcount.fetch_sub(1, Ordering::Release)
616    }
617
618    #[inline]
619    pub fn get_refcount(&self) -> u32 {
620        self.refcount.load(Ordering::Acquire)
621    }
622
623    #[inline]
624    pub fn set_flags(&self, flags: u32) {
625        self.flags.store(flags, Ordering::Release);
626    }
627
628    #[inline]
629    pub fn get_flags(&self) -> u32 {
630        self.flags.load(Ordering::Acquire)
631    }
632
633    #[inline]
634    pub fn get_order(&self) -> u8 {
635        self.order.load(Ordering::Acquire)
636    }
637
638    #[inline]
639    pub fn set_order(&self, order: u8) {
640        self.order.store(order, Ordering::Release);
641    }
642
643    #[inline]
644    pub fn set_refcount(&self, count: u32) {
645        self.refcount.store(count, Ordering::Release);
646    }
647
648    #[inline]
649    pub fn cas_refcount(&self, expect: u32, new: u32) -> Result<u32, u32> {
650        self.refcount
651            .compare_exchange(expect, new, Ordering::AcqRel, Ordering::Acquire)
652    }
653
654    #[inline]
655    pub fn reset_refcount(&self) {
656        self.set_refcount(0);
657    }
658
659    #[inline]
660    pub fn is_cow(&self) -> bool {
661        self.get_flags() & frame_flags::COW != 0
662    }
663
664    #[inline]
665    pub fn is_dll(&self) -> bool {
666        self.get_flags() & frame_flags::DLL != 0
667    }
668}
669
670const _: () = {
671    assert!(mem::size_of::<MetaSlot>() == FRAME_META_SIZE);
672    // Stride is `FRAME_META_SIZE`; the backing array is allocated with `FRAME_META_ALIGN`
673    // so each index maps to a cache-line-aligned slot even if `align_of::<MetaSlot>()` is 8.
674    assert!(mem::align_of::<MetaSlot>() <= FRAME_META_SIZE);
675    // `_reserved0` exists only to pad `order`+tail to 4 bytes before `refcount`; changing field
676    // sizes or order requires updating `META_SLOT_REFCOUNT_BYTE_OFFSET` and this assert.
677    assert!(META_SLOT_REFCOUNT_BYTE_OFFSET == 32);
678};
679
680/// The metadata array size for `ram_size` bytes, rounded up to the nearest page since each frame
681/// has a dedicated metadata entry.
682
683/// @param ram_size Total RAM size to be covered by the metadata (in bytes).
684///
685pub const fn metadata_size_for(ram_size: u64) -> u64 {
686    let frames = (ram_size / PAGE_SIZE) + if ram_size % PAGE_SIZE == 0 { 0 } else { 1 };
687    frames * FRAME_META_SIZE as u64
688}
689
690static METADATA_BASE_VIRT: AtomicU64 = AtomicU64::new(0);
691static METADATA_FRAME_COUNT: AtomicU64 = AtomicU64::new(0);
692
693/// Initialize the global metadata array for all physical frames.
694pub fn init_metadata_array(total_ram: u64, boot_alloc: &mut BootAllocator) {
695    let frame_count = (total_ram / PAGE_SIZE) + if total_ram % PAGE_SIZE == 0 { 0 } else { 1 };
696    if frame_count == 0 {
697        METADATA_BASE_VIRT.store(0, Ordering::Release);
698        METADATA_FRAME_COUNT.store(0, Ordering::Release);
699        return;
700    }
701
702    let bytes = metadata_size_for(total_ram) as usize;
703    let phys = boot_alloc
704        .try_alloc_accessible(bytes, FRAME_META_ALIGN)
705        .unwrap_or_else(|| {
706            panic!(
707                "frame metadata: boot allocator could not reserve {} bytes (align {}) for {} frames : out of early boot memory",
708                bytes, FRAME_META_ALIGN, frame_count
709            )
710        });
711    let virt = crate::memory::phys_to_virt(phys.as_u64()) as *mut MetaSlot;
712
713    for idx in 0..frame_count as usize {
714        // SAFETY: le bloc a été réservé par le boot allocator avec un alignement
715        // compatible `MetaSlot` et une taille suffisante pour tout le tableau.
716        unsafe {
717            ptr::write(virt.add(idx), MetaSlot::new());
718        }
719    }
720
721    METADATA_FRAME_COUNT.store(frame_count, Ordering::Release);
722    METADATA_BASE_VIRT.store(virt as u64, Ordering::Release);
723}
724
725/// Get the [`MetaSlot`] for a given physical frame (same as [`get_meta_slot`]).
726#[inline]
727pub fn get_meta(phys: PhysAddr) -> &'static MetaSlot {
728    get_meta_slot(phys)
729}
730
731/// `(generation, guard_bits, vtable_bits)` for debugging (e.g. `serial_println!`).
732#[inline]
733pub fn frame_meta_debug_snapshot(phys: PhysAddr) -> (u32, u32, u64) {
734    get_meta_slot(phys).debug_snapshot()
735}
736
737/// Returns `true` if [`MetaSlot::generation`] matches `expected` for `phys` (epoch check for use-after-free guards).
738#[inline]
739pub fn meta_generation_matches(phys: PhysAddr, expected: u32) -> bool {
740    get_meta_slot(phys).generation() == expected
741}
742
743/// Returns `true` if any page in `[phys, phys + 2^order * PAGE_SIZE)` has [`MetaSlot::is_guard_poisoned`].
744///
745/// Returns `false` when the metadata array is not yet initialized (early-boot
746/// guard: called from `free_list_push` / `alloc_from_zone` during buddy setup
747/// before `init_metadata_array` has run).
748pub fn block_phys_has_poison_guard(frame_phys: u64, order: u8) -> bool {
749    if METADATA_BASE_VIRT.load(Ordering::Acquire) == 0 {
750        return false; // metadata not yet initialized : no poison possible
751    }
752    let n = 1u64 << order;
753    for i in 0..n {
754        let p = PhysAddr::new(frame_phys + i * PAGE_SIZE);
755        if get_meta_slot(p).is_guard_poisoned() {
756            return true;
757        }
758    }
759    false
760}
761
762/// Invokes `on_unmap` from the frame vtable, if any (issue #38 : unmap / release path).
763pub fn invoke_vtable_on_unmap(phys: PhysAddr) {
764    let m = get_meta_slot(phys);
765    let Some(vt) = m.try_vtable_ref() else {
766        return;
767    };
768    if let Some(f) = vt.on_unmap {
769        f(phys);
770    }
771}
772
773/// Invokes `on_last_ref` from the frame vtable, if any (last refcount drop).
774pub fn invoke_vtable_on_last_ref(phys: PhysAddr) {
775    let m = get_meta_slot(phys);
776    let Some(vt) = m.try_vtable_ref() else {
777        return;
778    };
779    if let Some(f) = vt.on_last_ref {
780        f(phys);
781    }
782}
783
784/// Preferred name matching the frame metadata design (issue #38).
785pub fn get_meta_slot(phys: PhysAddr) -> &'static MetaSlot {
786    let base = METADATA_BASE_VIRT.load(Ordering::Acquire);
787    let frame_count = METADATA_FRAME_COUNT.load(Ordering::Acquire);
788    assert!(base != 0, "frame metadata array is not initialized");
789
790    let pfn = phys.as_u64() / PAGE_SIZE;
791    assert!(pfn < frame_count, "frame metadata access out of bounds");
792
793    let byte_offset = pfn as usize * FRAME_META_SIZE;
794    // SAFETY: le tableau global couvre au moins `frame_count` entrées et reste
795    // vivant pendant toute la durée du noyau.
796    unsafe { &*((base as usize + byte_offset) as *const MetaSlot) }
797}
798
799/// Physical frame (4KB aligned physical memory)
800#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
801pub struct PhysFrame {
802    pub start_address: PhysAddr,
803}
804
805/// Performs the phys frame containing address operation.
806impl PhysFrame {
807    /// Create a PhysFrame containing the given physical address
808    pub fn containing_address(addr: PhysAddr) -> Self {
809        PhysFrame {
810            start_address: PhysAddr::new(addr.as_u64() & !0xFFF),
811        }
812    }
813
814    /// Create a PhysFrame from a 4KB-aligned address
815    pub fn from_start_address(addr: PhysAddr) -> Result<Self, ()> {
816        if addr.is_aligned(4096u64) {
817            Ok(PhysFrame {
818                start_address: addr,
819            })
820        } else {
821            Err(())
822        }
823    }
824
825    /// Create an inclusive range of frames
826    pub fn range_inclusive(start: PhysFrame, end: PhysFrame) -> FrameRangeInclusive {
827        FrameRangeInclusive { start, end }
828    }
829}
830
831/// Iterator over an inclusive range of physical frames
832pub struct FrameRangeInclusive {
833    pub start: PhysFrame,
834    pub end: PhysFrame,
835}
836
837/// Performs the iterator operation for FrameRangeInclusive.
838impl Iterator for FrameRangeInclusive {
839    type Item = PhysFrame;
840
841    /// Performs the next operation.
842    fn next(&mut self) -> Option<Self::Item> {
843        if self.start <= self.end {
844            let frame = self.start;
845            self.start.start_address += 4096u64;
846            Some(frame)
847        } else {
848            None
849        }
850    }
851}
852
853/// Frame allocation errors
854#[derive(Debug, Clone, Copy, PartialEq, Eq)]
855pub enum AllocError {
856    /// No memory available
857    OutOfMemory,
858    /// Invalid order (> MAX_ORDER)
859    InvalidOrder,
860    /// Invalid address alignment
861    InvalidAddress,
862}
863
864/// Frame allocator trait
865pub trait FrameAllocator {
866    /// Allocate `2^order` contiguous frames.
867    ///
868    /// Le token interdit les appels depuis un contexte où le verrou global de
869    /// l'allocateur pourrait être ré-entré par interruption.
870    fn alloc(&mut self, order: u8, token: &IrqDisabledToken) -> Result<PhysFrame, AllocError>;
871
872    /// Free `2^order` contiguous frames starting at frame.
873    fn free(&mut self, frame: PhysFrame, order: u8, token: &IrqDisabledToken);
874
875    /// Allocate a single frame (convenience method)
876    fn alloc_frame(&mut self, token: &IrqDisabledToken) -> Result<PhysFrame, AllocError> {
877        self.alloc(0, token)
878    }
879}