Skip to main content

strat9_kernel/memory/
buddy.rs

1// Buddy allocator implementation
2//
3// Refcount sentinel invariant (OSTD-style, fully enforced):
4//
5//   free-list frame => refcount == REFCOUNT_UNUSED  (u32::MAX)
6//   live frame      => refcount >= 1
7//
8// `mark_block_free()` stamps REFCOUNT_UNUSED on every free path.
9// `mark_block_allocated()` leaves refcount untouched (still REFCOUNT_UNUSED)
10// so that `FrameAllocOptions::allocate()` can perform a fail-fast
11// CAS(REFCOUNT_UNUSED → 1) that catches double-free / free-list corruption
12// immediately rather than silently aliasing memory.
13
14use crate::{
15    boot::entry::{MemoryKind, MemoryRegion},
16    memory::{
17        boot_alloc,
18        frame::{
19            frame_flags, get_meta, AllocError, FrameAllocator, PhysFrame, FRAME_META_LINK_NONE,
20        },
21        hhdm_offset, phys_to_virt,
22        zone::{
23            BuddyBitmap, Migratetype, Zone, ZoneSegment, ZoneType, MAX_ORDER, PAGEBLOCK_ORDER,
24            PAGEBLOCK_PAGES,
25        },
26    },
27    serial_println,
28    sync::{IrqDisabledToken, SpinLock, SpinLockGuard},
29};
30use core::{
31    mem, ptr,
32    sync::atomic::{AtomicUsize, Ordering as AtomicOrdering},
33};
34use x86_64::PhysAddr;
35
36const PAGE_SIZE: u64 = 4096;
37const DMA_MAX: u64 = 16 * 1024 * 1024;
38const NORMAL_MAX: u64 = 896 * 1024 * 1024;
39const LOCAL_CACHE_CAPACITY: usize = 256;
40const LOCAL_CACHE_REFILL_ORDER: u8 = 4;
41const LOCAL_CACHE_REFILL_FRAMES: usize = 1 << (LOCAL_CACHE_REFILL_ORDER as usize);
42const LOCAL_CACHE_FLUSH_BATCH: usize = 64;
43const LOCAL_CACHE_SLOTS: usize = Migratetype::COUNT * crate::arch::x86_64::percpu::MAX_CPUS;
44const LOCAL_CACHED_ZONE_MIGRATETYPE_SLOTS: usize = Migratetype::COUNT * ZoneType::COUNT;
45const COMPACTION_FRAGMENTATION_THRESHOLD: usize = 35;
46const COMPACTION_SNAPSHOT_NONE: usize = usize::MAX;
47const UNMOVABLE_ZONE_ORDER: [usize; ZoneType::COUNT] = [
48    ZoneType::Normal as usize,
49    ZoneType::HighMem as usize,
50    ZoneType::DMA as usize,
51];
52const MOVABLE_ZONE_ORDER: [usize; ZoneType::COUNT] = [
53    ZoneType::HighMem as usize,
54    ZoneType::Normal as usize,
55    ZoneType::DMA as usize,
56];
57
58#[cfg(feature = "selftest")]
59macro_rules! buddy_dbg {
60    ($($arg:tt)*) => {
61        serial_println!($($arg)*);
62    };
63}
64
65#[cfg(not(feature = "selftest"))]
66macro_rules! buddy_dbg {
67    ($($arg:tt)*) => {};
68}
69
70pub struct BuddyAllocator {
71    zones: [Zone; ZoneType::COUNT],
72    /// Per-zone bitmap pool reserved from free memory: [start, end).
73    bitmap_pool: [(u64, u64); ZoneType::COUNT],
74}
75
76#[derive(Clone, Copy, Debug)]
77struct CompactionCandidate {
78    zone_idx: usize,
79    zone_type: ZoneType,
80    order: u8,
81    migratetype: Migratetype,
82    pressure: ZonePressure,
83    fragmentation_score: usize,
84    requested_pages: usize,
85    available_pages: usize,
86    usable_pages: usize,
87    cached_pages: usize,
88    pageblock_count: usize,
89    matching_pageblocks: usize,
90}
91
92impl BuddyAllocator {
93    /// Creates a new instance.
94    pub const fn new() -> Self {
95        BuddyAllocator {
96            zones: [
97                Zone::new(ZoneType::DMA),
98                Zone::new(ZoneType::Normal),
99                Zone::new(ZoneType::HighMem),
100            ],
101            bitmap_pool: [(0, 0); ZoneType::COUNT],
102        }
103    }
104
105    /// Performs the init operation.
106    pub fn init(&mut self, memory_regions: &[MemoryRegion]) {
107        #[cfg(debug_assertions)]
108        debug_assert!(
109            hhdm_offset() != u64::MAX,
110            "HHDM offset sanity check failed unexpectedly"
111        );
112
113        serial_println!(
114            "Buddy allocator: initializing with {} memory regions",
115            memory_regions.len()
116        );
117
118        // Dump memory regions for diagnostic (compare QEMU vs VMware maps)
119        for (i, region) in memory_regions.iter().enumerate() {
120            let kind_str = match region.kind {
121                crate::boot::entry::MemoryKind::Free => "FREE",
122                crate::boot::entry::MemoryKind::Reclaim => "RECLAIM",
123                crate::boot::entry::MemoryKind::Reserved => "RESERVED",
124                crate::boot::entry::MemoryKind::Null => "NULL",
125                _ => "UNKNOWN",
126            };
127            serial_println!(
128                "  [buddy] MMAP[{:2}]: phys={:#018x}..{:#018x} size={:#x} ({})",
129                i,
130                region.base,
131                region.base.saturating_add(region.size),
132                region.size,
133                kind_str
134            );
135        }
136
137        for (_protected_base, _protected_size) in
138            Self::protected_module_ranges().into_iter().flatten()
139        {
140            buddy_dbg!(
141                "  Protected module range: phys=0x{:x}..0x{:x}",
142                Self::align_down(_protected_base, PAGE_SIZE),
143                Self::align_up(_protected_base.saturating_add(_protected_size), PAGE_SIZE)
144            );
145        }
146
147        // Pass 1: compute per-zone address span (base + span_pages)
148        self.pass_count(memory_regions);
149
150        // Diagnostic: log span info for each zone (helps diagnose VMware memory map issues)
151        for zone in &self.zones {
152            serial_println!(
153                "  [buddy] Zone {:?}: base={:#x} span={} pages ({} MB span)",
154                zone.zone_type,
155                zone.base.as_u64(),
156                zone.span_pages,
157                (zone.span_pages * 4096) / (1024 * 1024)
158            );
159        }
160
161        // Pass 2: reserve per-zone bitmap pools using an upper bound derived
162        // from the boot allocator's current free extents.
163        let mut candidates = [MemoryRegion {
164            base: 0,
165            size: 0,
166            kind: MemoryKind::Reserved,
167        }; boot_alloc::MAX_BOOT_ALLOC_REGIONS];
168        let candidate_len = boot_alloc::snapshot_free_regions(&mut candidates);
169        self.pass_reserve_bitmap_pools(&candidates[..candidate_len]);
170
171        // Pass 3: reserve exact segment storage from the remaining accessible
172        // boot memory and then build the final segmented buddy layout from the boot
173        // allocator's remaining free ranges after bitmap reservations.
174        let mut remaining = [MemoryRegion {
175            base: 0,
176            size: 0,
177            kind: MemoryKind::Reserved,
178        }; boot_alloc::MAX_BOOT_ALLOC_REGIONS];
179        let remaining_len = boot_alloc::snapshot_free_regions(&mut remaining);
180        self.pass_reserve_segment_storage(&remaining[..remaining_len]);
181        self.pass_build_segments(&remaining[..remaining_len]);
182        self.pass_finalize_zone_accounting();
183        self.pass_setup_segment_bitmaps();
184        self.pass_populate();
185
186        // Seal the boot allocator: all its remaining free regions are now managed
187        // by buddy.  Any later boot_alloc::alloc_stack() call would otherwise
188        // double-allocate pages that buddy already tracks in its free lists.
189        boot_alloc::seal();
190
191        for zone in &self.zones {
192            let hole_pages = zone.span_pages.saturating_sub(zone.page_count);
193            let efficiency = if zone.span_pages > 0 {
194                (zone.page_count * 100) / zone.span_pages
195            } else {
196                0
197            };
198            serial_println!(
199                "  [buddy] Zone {:?}: segments={}/{} managed={} present={} reserved={} span={} holes={} min/low/high={}/{}/{} reserve={} ({}% utilized, {} MB managed)",
200                zone.zone_type,
201                zone.segment_count,
202                zone.segment_capacity,
203                zone.page_count,
204                zone.present_pages,
205                zone.reserved_pages,
206                zone.span_pages,
207                hole_pages,
208                zone.watermark_min,
209                zone.watermark_low,
210                zone.watermark_high,
211                zone.lowmem_reserve_pages,
212                efficiency,
213                (zone.page_count * 4096) / (1024 * 1024)
214            );
215            if zone.span_pages > 0 && efficiency < 70 {
216                serial_println!(
217                    "  [buddy] WARNING: Zone {:?} has large holes ({}% wasted). This may indicate VMware memory fragmentation.",
218                    zone.zone_type,
219                    100 - efficiency
220                );
221            }
222        }
223    }
224
225    /// Performs the pass count operation.
226    fn pass_count(&mut self, memory_regions: &[MemoryRegion]) {
227        let mut min_base = [u64::MAX; ZoneType::COUNT];
228        let mut max_end = [0u64; ZoneType::COUNT];
229        let mut present_pages = [0usize; ZoneType::COUNT];
230
231        for region in memory_regions {
232            for zi in 0..ZoneType::COUNT {
233                if let Some((start, end)) = Self::zone_intersection_aligned(region, zi) {
234                    present_pages[zi] =
235                        present_pages[zi].saturating_add(((end - start) / PAGE_SIZE) as usize);
236                    if start < min_base[zi] {
237                        min_base[zi] = start;
238                    }
239                    if end > max_end[zi] {
240                        max_end[zi] = end;
241                    }
242                }
243            }
244        }
245
246        for zi in 0..ZoneType::COUNT {
247            let zone = &mut self.zones[zi];
248            zone.base = PhysAddr::new(0);
249            zone.page_count = 0;
250            zone.present_pages = present_pages[zi];
251            zone.span_pages = 0;
252            zone.allocated = 0;
253            zone.reserved_pages = 0;
254            zone.lowmem_reserve_pages = 0;
255            zone.watermark_min = 0;
256            zone.watermark_low = 0;
257            zone.watermark_high = 0;
258            zone.clear_segments();
259
260            if min_base[zi] == u64::MAX || max_end[zi] <= min_base[zi] {
261                continue;
262            }
263
264            zone.base = PhysAddr::new(min_base[zi]);
265            zone.span_pages = ((max_end[zi] - min_base[zi]) / PAGE_SIZE) as usize;
266        }
267    }
268
269    /// Reserve per-zone segment tables sized to the actual fragmented layout.
270    fn pass_reserve_segment_storage(&mut self, memory_regions: &[MemoryRegion]) {
271        let mut segment_counts = [0usize; ZoneType::COUNT];
272
273        for region in memory_regions {
274            for (zi, count) in segment_counts.iter_mut().enumerate() {
275                if Self::zone_intersection_aligned(region, zi).is_some() {
276                    *count = count.saturating_add(1);
277                }
278            }
279        }
280
281        for (zi, &segment_count) in segment_counts.iter().enumerate() {
282            let zone = &mut self.zones[zi];
283            zone.clear_segments();
284
285            if segment_count == 0 {
286                continue;
287            }
288
289            let bytes = segment_count.saturating_mul(mem::size_of::<ZoneSegment>());
290            let storage_phys =
291                boot_alloc::alloc_bytes_accessible(bytes, mem::align_of::<ZoneSegment>())
292                    .unwrap_or_else(|| {
293                        panic!(
294                            "Buddy allocator: unable to reserve {} bytes for {:?} segment table",
295                            bytes, zone.zone_type
296                        )
297                    })
298                    .as_u64();
299            unsafe {
300                ptr::write_bytes(phys_to_virt(storage_phys) as *mut u8, 0, bytes);
301            }
302
303            zone.segment_capacity = segment_count;
304            zone.segments = phys_to_virt(storage_phys) as *mut ZoneSegment;
305        }
306    }
307
308    /// Reserve per-zone bitmap pools using a segmentation-safe upper bound.
309    fn pass_reserve_bitmap_pools(&mut self, memory_regions: &[MemoryRegion]) {
310        for zi in 0..ZoneType::COUNT {
311            let managed_pages = memory_regions
312                .iter()
313                .filter_map(|region| Self::zone_intersection_aligned(region, zi))
314                .map(|(start, end)| ((end - start) / PAGE_SIZE) as usize)
315                .sum::<usize>();
316            let needed_bytes = Self::bitmap_bytes_upper_bound_for_pages(managed_pages);
317            let reserved_bytes = Self::align_up(needed_bytes as u64, PAGE_SIZE);
318
319            if reserved_bytes == 0 {
320                self.bitmap_pool[zi] = (0, 0);
321                continue;
322            }
323
324            let pool_start = boot_alloc::alloc_bytes_accessible(needed_bytes, PAGE_SIZE as usize)
325                .unwrap_or_else(|| {
326                    panic!(
327                        "Buddy allocator: unable to reserve {} bytes for zone {:?} bitmaps",
328                        needed_bytes, self.zones[zi].zone_type
329                    )
330                })
331                .as_u64();
332            let pool_end = pool_start.saturating_add(reserved_bytes);
333            self.bitmap_pool[zi] = (pool_start, pool_end);
334            buddy_dbg!(
335                "  Zone {:?}: bitmap pool phys=0x{:x}..0x{:x} ({} bytes)",
336                self.zones[zi].zone_type,
337                pool_start,
338                pool_end,
339                needed_bytes
340            );
341
342            // Zero stolen pages to initialize all bitmaps to 0.
343            unsafe {
344                core::ptr::write_bytes(
345                    phys_to_virt(pool_start) as *mut u8,
346                    0,
347                    (pool_end - pool_start) as usize,
348                );
349            }
350        }
351    }
352
353    /// Finalise zone accounting once the managed segment set is known.
354    fn pass_finalize_zone_accounting(&mut self) {
355        for zone in &mut self.zones {
356            zone.reserved_pages = zone.present_pages.saturating_sub(zone.page_count);
357            zone.lowmem_reserve_pages =
358                Self::lowmem_reserve_target_pages(zone.zone_type, zone.page_count);
359            zone.watermark_min = Self::watermark_target_pages(zone.page_count, 256, 16, 2048);
360
361            let delta = Self::watermark_target_pages(zone.page_count, 512, 16, 2048);
362            zone.watermark_low = zone
363                .watermark_min
364                .saturating_add(delta)
365                .min(zone.page_count);
366            zone.watermark_high = zone
367                .watermark_low
368                .saturating_add(delta)
369                .min(zone.page_count);
370        }
371    }
372
373    /// Compute a bounded watermark target for a zone.
374    fn watermark_target_pages(
375        managed_pages: usize,
376        divisor: usize,
377        floor: usize,
378        cap: usize,
379    ) -> usize {
380        Self::bounded_zone_target(managed_pages, divisor, floor, cap, 8)
381    }
382
383    /// Compute a bounded low-memory reserve target.
384    fn lowmem_reserve_target_pages(zone_type: ZoneType, managed_pages: usize) -> usize {
385        match zone_type {
386            ZoneType::DMA => Self::bounded_zone_target(managed_pages, 8, 16, 512, 4),
387            ZoneType::Normal => Self::bounded_zone_target(managed_pages, 64, 64, 2048, 8),
388            ZoneType::HighMem => 0,
389        }
390    }
391
392    /// Bound a policy target to something meaningful for the current zone size.
393    fn bounded_zone_target(
394        managed_pages: usize,
395        divisor: usize,
396        floor: usize,
397        cap: usize,
398        max_fraction_divisor: usize,
399    ) -> usize {
400        if managed_pages == 0 {
401            return 0;
402        }
403
404        let scaled = core::cmp::max(managed_pages / divisor, floor);
405        let capped = core::cmp::min(scaled, cap);
406        let max_for_zone = core::cmp::max(1, managed_pages / max_fraction_divisor);
407        core::cmp::min(capped, max_for_zone)
408    }
409
410    /// Build the final segmented physical layout from remaining boot allocator ranges.
411    fn pass_build_segments(&mut self, memory_regions: &[MemoryRegion]) {
412        for region in memory_regions {
413            for zi in 0..ZoneType::COUNT {
414                let Some((start, end)) = Self::zone_intersection_aligned(region, zi) else {
415                    continue;
416                };
417                let zone = &mut self.zones[zi];
418                if zone.segment_count >= zone.segment_capacity {
419                    panic!(
420                        "Buddy allocator: zone {:?} exceeded reserved segment capacity={} while processing phys=0x{:x}..0x{:x}",
421                        zone.zone_type,
422                        zone.segment_capacity,
423                        start,
424                        end,
425                    );
426                }
427
428                let slot = zone.segment_count;
429                zone.segments_mut()[slot] = ZoneSegment {
430                    base: PhysAddr::new(start),
431                    page_count: ((end - start) / PAGE_SIZE) as usize,
432                    free_lists: [[0; MAX_ORDER + 1]; Migratetype::COUNT],
433                    buddy_bitmaps: [BuddyBitmap::empty(); MAX_ORDER + 1],
434                    pageblock_tags: ptr::null_mut(),
435                    pageblock_count: 0,
436                    #[cfg(debug_assertions)]
437                    alloc_bitmap: BuddyBitmap::empty(),
438                };
439                zone.segment_count = slot + 1;
440                zone.page_count = zone
441                    .page_count
442                    .saturating_add(((end - start) / PAGE_SIZE) as usize);
443
444                buddy_dbg!(
445                    "  Zone {:?}: segment phys=0x{:x}..0x{:x} pages={}",
446                    zone.zone_type,
447                    start,
448                    end,
449                    ((end - start) / PAGE_SIZE) as usize,
450                );
451            }
452        }
453    }
454
455    /// Assign bitmap slices to each populated segment.
456    fn pass_setup_segment_bitmaps(&mut self) {
457        for zi in 0..ZoneType::COUNT {
458            let (pool_start, pool_end) = self.bitmap_pool[zi];
459            if pool_start == 0 || pool_end <= pool_start {
460                continue;
461            }
462
463            let zone = &mut self.zones[zi];
464            let default_pageblock_migratetype = Self::default_pageblock_migratetype(zone.zone_type);
465            let mut cursor = pool_start;
466            let segment_count = zone.segment_count;
467            for segment in zone.segments_mut().iter_mut().take(segment_count) {
468                let _exact_bitmap_bytes = Self::bitmap_bytes_for_span(segment.page_count);
469                for order in 0..=MAX_ORDER {
470                    let num_bits = Self::pairs_for_order(segment.page_count, order as u8);
471                    let num_bytes = Self::bits_to_bytes(num_bits) as u64;
472                    if num_bits == 0 {
473                        segment.buddy_bitmaps[order] = BuddyBitmap::empty();
474                        continue;
475                    }
476
477                    debug_assert!(cursor + num_bytes <= pool_end);
478                    segment.buddy_bitmaps[order] = BuddyBitmap {
479                        data: phys_to_virt(cursor) as *mut u8,
480                        num_bits,
481                    };
482                    cursor += num_bytes;
483                }
484
485                #[cfg(debug_assertions)]
486                {
487                    let num_bits = segment.page_count;
488                    let num_bytes = Self::bits_to_bytes(num_bits) as u64;
489                    if num_bits == 0 {
490                        segment.alloc_bitmap = BuddyBitmap::empty();
491                    } else {
492                        debug_assert!(cursor + num_bytes <= pool_end);
493                        segment.alloc_bitmap = BuddyBitmap {
494                            data: phys_to_virt(cursor) as *mut u8,
495                            num_bits,
496                        };
497                        cursor += num_bytes;
498                    }
499                }
500
501                let pageblock_count = segment.page_count.div_ceil(PAGEBLOCK_PAGES);
502                segment.pageblock_count = pageblock_count;
503                if pageblock_count == 0 {
504                    segment.pageblock_tags = ptr::null_mut();
505                } else {
506                    let num_bytes = pageblock_count as u64;
507                    debug_assert!(cursor + num_bytes <= pool_end);
508                    segment.pageblock_tags = phys_to_virt(cursor) as *mut u8;
509                    unsafe {
510                        ptr::write_bytes(
511                            segment.pageblock_tags,
512                            default_pageblock_migratetype as u8,
513                            pageblock_count,
514                        );
515                    }
516                    cursor += num_bytes;
517                }
518            }
519
520            debug_assert!(cursor <= pool_end);
521        }
522    }
523
524    /// Seed each contiguous segment with greedy block insertion.
525    fn pass_populate(&mut self) {
526        for zi in 0..ZoneType::COUNT {
527            let zone_type = self.zones[zi].zone_type;
528            let segment_count = self.zones[zi].segment_count;
529            for si in 0..segment_count {
530                let (start, end) = {
531                    let segments = self.zones[zi].segments();
532                    let segment = &segments[si];
533                    (segment.base.as_u64(), segment.end_address())
534                };
535                let segment = &mut self.zones[zi].segments_mut()[si];
536                Self::seed_range_as_free(zone_type, segment, start, end);
537            }
538        }
539    }
540
541    /// Seeds a contiguous physical range `[start, end)` as free using greedy block insertion.
542    ///
543    /// Unlike the previous min/max span design, `segment` is guaranteed to be a
544    /// genuinely contiguous free extent. Greedy seeding therefore improves boot
545    /// time without ever making holes visible to the buddy topology.
546    fn seed_range_as_free(zone_type: ZoneType, segment: &mut ZoneSegment, start: u64, end: u64) {
547        let _ = zone_type;
548        if start >= end {
549            return;
550        }
551        let mut addr = start;
552
553        'seed: while addr < end {
554            if !segment.contains_address(PhysAddr::new(addr)) {
555                break;
556            }
557
558            if let Some(protected_end) = Self::protected_overlap_end(addr, addr + PAGE_SIZE) {
559                buddy_dbg!(
560                    "  Zone {:?}: skip protected range 0x{:x}..0x{:x}",
561                    zone_type,
562                    addr,
563                    protected_end
564                );
565                addr = core::cmp::min(protected_end, end);
566                continue;
567            }
568
569            let remaining_pages = ((end - addr) / PAGE_SIZE) as usize;
570            debug_assert!(remaining_pages != 0);
571            let mut order = ((remaining_pages.ilog2()) as u8).min(MAX_ORDER as u8);
572
573            while order > 0 {
574                let block_size = PAGE_SIZE << order;
575                if addr & (block_size - 1) == 0 {
576                    break;
577                }
578                order -= 1;
579            }
580
581            loop {
582                let block_size = PAGE_SIZE << order;
583                let block_end = addr.saturating_add(block_size);
584                if block_end > end {
585                    debug_assert!(order != 0);
586                    order -= 1;
587                    continue;
588                }
589
590                if Self::protected_overlap_end(addr, block_end).is_some() {
591                    if order == 0 {
592                        if let Some(skip_to) = Self::protected_overlap_end(addr, block_end) {
593                            buddy_dbg!("  Zone {:?}: skip protected page 0x{:x}", zone_type, addr);
594                            addr = core::cmp::min(skip_to, end);
595                            continue 'seed;
596                        }
597                    }
598                    order -= 1;
599                    continue;
600                }
601
602                let migratetype = Self::pageblock_migratetype(
603                    segment,
604                    addr,
605                    Self::default_pageblock_migratetype(zone_type),
606                );
607                Self::insert_free_block(segment, addr, order, migratetype);
608                addr = block_end;
609                continue 'seed;
610            }
611        }
612    }
613
614    /// Allocates from zone.
615    fn alloc_from_zone(
616        zone: &mut Zone,
617        zone_idx: usize,
618        order: u8,
619        migratetype: Migratetype,
620        honor_watermarks: bool,
621        token: &IrqDisabledToken,
622    ) -> Option<PhysFrame> {
623        if !Self::zone_allows_allocation(zone, zone_idx, order, honor_watermarks) {
624            return None;
625        }
626
627        for si in 0..zone.segment_count {
628            let frame_phys = {
629                let segment = &mut zone.segments_mut()[si];
630                Self::alloc_from_segment(segment, order, migratetype, token)
631            };
632            if let Some(frame_phys) = frame_phys {
633                zone.allocated += 1usize << order;
634                return PhysFrame::from_start_address(PhysAddr::new(frame_phys)).ok();
635            }
636        }
637        None
638    }
639
640    /// Allocate from one contiguous segment.
641    fn alloc_from_segment(
642        segment: &mut ZoneSegment,
643        order: u8,
644        requested_migratetype: Migratetype,
645        _token: &IrqDisabledToken,
646    ) -> Option<u64> {
647        for cur_order in order..=MAX_ORDER as u8 {
648            for donor_migratetype in requested_migratetype.fallback_order() {
649                let Some(frame_phys) = Self::free_list_pop(segment, cur_order, donor_migratetype)
650                else {
651                    continue;
652                };
653                debug_assert!(
654                    !crate::memory::frame::block_phys_has_poison_guard(frame_phys, cur_order),
655                    "buddy: poisoned block on free list (order {})",
656                    cur_order
657                );
658                let block_size = PAGE_SIZE << cur_order;
659                let block_end = frame_phys.saturating_add(block_size);
660                if Self::protected_overlap_end(frame_phys, block_end).is_some() {
661                    panic!(
662                        "Buddy allocator inconsistency: free block 0x{:x} order {} overlaps protected memory",
663                        frame_phys, cur_order
664                    );
665                }
666
667                let _ = Self::toggle_pair(segment, frame_phys, cur_order);
668
669                let mut split_order = cur_order;
670                while split_order > order {
671                    split_order -= 1;
672                    Self::retag_pageblock_range(
673                        segment,
674                        frame_phys,
675                        split_order,
676                        requested_migratetype,
677                    );
678                    let buddy_phys = frame_phys + ((1u64 << split_order) * PAGE_SIZE);
679                    let buddy_migratetype =
680                        Self::pageblock_migratetype(segment, buddy_phys, donor_migratetype);
681                    Self::mark_block_free(buddy_phys, split_order, buddy_migratetype);
682                    Self::free_list_push(segment, buddy_phys, split_order, buddy_migratetype);
683                    let _ = Self::toggle_pair(segment, frame_phys, split_order);
684                }
685                Self::retag_pageblock_range(segment, frame_phys, order, requested_migratetype);
686                Self::mark_block_allocated(frame_phys, order, requested_migratetype);
687
688                #[cfg(debug_assertions)]
689                Self::mark_allocated(segment, frame_phys, order, true);
690
691                return Some(frame_phys);
692            }
693        }
694        None
695    }
696
697    #[inline]
698    fn find_segment_index(zone: &Zone, phys: u64, order: u8) -> Option<usize> {
699        zone.segments()
700            .iter()
701            .take(zone.segment_count)
702            .position(|segment| Self::segment_contains_block(segment, phys, order))
703    }
704
705    #[inline]
706    fn segment_contains_block(segment: &ZoneSegment, phys: u64, order: u8) -> bool {
707        if !segment.contains_address(PhysAddr::new(phys)) {
708            return false;
709        }
710        let block_end = phys.saturating_add(PAGE_SIZE << order);
711        block_end <= segment.end_address()
712    }
713
714    /// Releases to zone.
715    fn free_to_zone(zone: &mut Zone, frame: PhysFrame, order: u8, _token: &IrqDisabledToken) {
716        let frame_phys = frame.start_address.as_u64();
717        let block_size = PAGE_SIZE << order;
718        let block_end = frame_phys.saturating_add(block_size);
719        let migratetype = Self::block_migratetype(frame_phys);
720        let Some(segment_idx) = Self::find_segment_index(zone, frame_phys, order) else {
721            panic!(
722                "buddy free: frame 0x{:x} order {} does not belong to any segment in zone {:?}",
723                frame_phys, order, zone.zone_type,
724            );
725        };
726
727        debug_assert!(order <= MAX_ORDER as u8);
728        debug_assert!(frame.start_address.is_aligned(PAGE_SIZE << order));
729        debug_assert!(zone.contains_address(frame.start_address));
730
731        if Self::protected_overlap_end(frame_phys, block_end).is_some() {
732            buddy_dbg!(
733                "  Zone {:?}: drop free overlap-protected 0x{:x}..0x{:x} order={}",
734                zone.zone_type,
735                frame_phys,
736                block_end,
737                order
738            );
739            return;
740        }
741
742        #[cfg(debug_assertions)]
743        {
744            let segment = &mut zone.segments_mut()[segment_idx];
745            Self::mark_allocated(segment, frame_phys, order, false);
746        }
747
748        {
749            let segment = &mut zone.segments_mut()[segment_idx];
750            if order as usize >= PAGEBLOCK_ORDER {
751                Self::retag_pageblock_range(segment, frame_phys, order, migratetype);
752            }
753            let free_migratetype = Self::pageblock_migratetype(segment, frame_phys, migratetype);
754            Self::mark_block_free(frame_phys, order, free_migratetype);
755            Self::insert_free_block(segment, frame_phys, order, free_migratetype);
756        }
757        zone.allocated = zone.allocated.saturating_sub(1usize << order);
758    }
759
760    /// Drops allocator accounting for a poisoned block without returning it to the free list.
761    ///
762    /// The block is **not** placed on any free list and its debug-bitmap entries
763    /// remain marked as "allocated" : because they genuinely are: the pages are
764    /// quarantined and inaccessible.  Clearing them would defeat the double-free
765    /// detector for any later attempt to free the same block.
766    fn quarantine_poisoned_block_in_zone(
767        zone: &mut Zone,
768        frame: PhysFrame,
769        order: u8,
770        _token: &IrqDisabledToken,
771    ) {
772        let frame_phys = frame.start_address.as_u64();
773        let block_size = PAGE_SIZE << order;
774        let block_end = frame_phys.saturating_add(block_size);
775        let Some(segment_idx) = Self::find_segment_index(zone, frame_phys, order) else {
776            panic!(
777                "buddy quarantine: frame 0x{:x} order {} does not belong to any segment in zone {:?}",
778                frame_phys,
779                order,
780                zone.zone_type,
781            );
782        };
783
784        debug_assert!(order <= MAX_ORDER as u8);
785        debug_assert!(frame.start_address.is_aligned(PAGE_SIZE << order));
786        debug_assert!(zone.contains_address(frame.start_address));
787        debug_assert!(Self::segment_contains_block(
788            &zone.segments()[segment_idx],
789            frame_phys,
790            order
791        ));
792
793        if Self::protected_overlap_end(frame_phys, block_end).is_some() {
794            return;
795        }
796
797        // Intentionally NO mark_allocated(false) here : pages stay "allocated"
798        // in the debug bitmap because they are quarantined, not freed.
799
800        zone.allocated = zone.allocated.saturating_sub(1usize << order);
801        POISON_QUARANTINE_PAGES.fetch_add(1usize << order, AtomicOrdering::Relaxed);
802    }
803
804    /// Linux-style parity-map coalescing insertion.
805    /// Returns after inserting the (potentially coalesced) block into the appropriate free list, without recursing further.
806    /// If the buddy bit is already set or we reach MAX_ORDER, the block is inserted as-is.
807    /// Otherwise, the buddy block is removed from its free list and coalesced with the current block, and the process repeats at the next order.
808    fn insert_free_block(
809        segment: &mut ZoneSegment,
810        frame_phys: u64,
811        initial_order: u8,
812        migratetype: Migratetype,
813    ) {
814        let mut current = frame_phys;
815        let mut order = initial_order;
816
817        loop {
818            let bit_is_set = Self::toggle_pair(segment, current, order);
819            if bit_is_set || order == MAX_ORDER as u8 {
820                Self::mark_block_free(current, order, migratetype);
821                Self::free_list_push(segment, current, order, migratetype);
822                break;
823            }
824
825            let Some(buddy) = Self::buddy_phys(segment, current, order) else {
826                Self::mark_block_free(current, order, migratetype);
827                Self::free_list_push(segment, current, order, migratetype);
828                break;
829            };
830
831            if !Self::can_merge_with_buddy(buddy, order, migratetype) {
832                Self::mark_block_free(current, order, migratetype);
833                Self::free_list_push(segment, current, order, migratetype);
834                break;
835            }
836
837            let removed = Self::free_list_remove(segment, buddy, order, migratetype);
838            if !removed {
839                debug_assert!(false, "buddy bitmap/list inconsistency while freeing");
840                Self::mark_block_free(current, order, migratetype);
841                Self::free_list_push(segment, current, order, migratetype);
842                break;
843            }
844
845            current = core::cmp::min(current, buddy);
846            order += 1;
847        }
848    }
849
850    /// Performs the page index operation.
851    #[inline]
852    fn page_index(segment: &ZoneSegment, phys: u64) -> usize {
853        debug_assert!(segment.page_count > 0);
854        let base = segment.base.as_u64();
855        debug_assert!(phys >= base);
856        debug_assert!((phys - base).is_multiple_of(PAGE_SIZE));
857        ((phys - base) / PAGE_SIZE) as usize
858    }
859
860    /// Performs the pair index operation.
861    #[inline]
862    fn pair_index(segment: &ZoneSegment, phys: u64, order: u8) -> usize {
863        Self::page_index(segment, phys) >> (order as usize + 1)
864    }
865
866    /// Performs the toggle pair operation.
867    #[inline]
868    fn toggle_pair(segment: &mut ZoneSegment, phys: u64, order: u8) -> bool {
869        let bitmap = segment.buddy_bitmaps[order as usize];
870        if bitmap.is_empty() {
871            return true;
872        }
873        let idx = Self::pair_index(segment, phys, order);
874        debug_assert!(idx < bitmap.num_bits);
875        bitmap.toggle(idx)
876    }
877
878    /// Performs the buddy phys operation.
879    #[inline]
880    fn buddy_phys(segment: &ZoneSegment, phys: u64, order: u8) -> Option<u64> {
881        let base = segment.base.as_u64();
882        if phys < base {
883            return None;
884        }
885        let offset = phys - base;
886        let block_size = PAGE_SIZE << order;
887        let buddy_offset = offset ^ block_size;
888        let buddy_page = (buddy_offset / PAGE_SIZE) as usize;
889        if buddy_page >= segment.page_count {
890            return None;
891        }
892        Some(base + buddy_offset)
893    }
894
895    /// Performs the mark allocated operation.
896    #[cfg(debug_assertions)]
897    fn mark_allocated(segment: &mut ZoneSegment, frame_phys: u64, order: u8, allocated: bool) {
898        if segment.alloc_bitmap.is_empty() {
899            return;
900        }
901        let start = Self::page_index(segment, frame_phys);
902        let count = 1usize << order;
903        for i in 0..count {
904            let bit = start + i;
905            debug_assert!(bit < segment.alloc_bitmap.num_bits);
906            if allocated {
907                debug_assert!(
908                    !segment.alloc_bitmap.test(bit),
909                    "double allocation detected"
910                );
911                segment.alloc_bitmap.set(bit);
912            } else {
913                debug_assert!(segment.alloc_bitmap.test(bit), "double free detected");
914                segment.alloc_bitmap.clear(bit);
915            }
916        }
917    }
918
919    /// Releases list push.
920    fn free_list_push(segment: &mut ZoneSegment, phys: u64, order: u8, migratetype: Migratetype) {
921        debug_assert!(
922            !crate::memory::frame::block_phys_has_poison_guard(phys, order),
923            "buddy: refusing to push poisoned block to free list"
924        );
925        let head = segment.free_lists[migratetype.index()][order as usize];
926        Self::write_free_prev(phys, 0);
927        Self::write_free_next(phys, head);
928        if head != 0 {
929            Self::write_free_prev(head, phys);
930        }
931        segment.free_lists[migratetype.index()][order as usize] = phys;
932    }
933
934    /// Releases list pop.
935    fn free_list_pop(
936        segment: &mut ZoneSegment,
937        order: u8,
938        migratetype: Migratetype,
939    ) -> Option<u64> {
940        let head = segment.free_lists[migratetype.index()][order as usize];
941        if head == 0 {
942            return None;
943        }
944        let next = Self::read_free_next(head);
945        segment.free_lists[migratetype.index()][order as usize] = next;
946        if next != 0 {
947            Self::write_free_prev(next, 0);
948        }
949        Self::write_free_next(head, 0);
950        Self::write_free_prev(head, 0);
951        Some(head)
952    }
953
954    /// Releases list remove.
955    fn free_list_remove(
956        segment: &mut ZoneSegment,
957        phys: u64,
958        order: u8,
959        migratetype: Migratetype,
960    ) -> bool {
961        let prev = Self::read_free_prev(phys);
962        let next = Self::read_free_next(phys);
963
964        if prev == 0 {
965            if segment.free_lists[migratetype.index()][order as usize] != phys {
966                return false;
967            }
968            segment.free_lists[migratetype.index()][order as usize] = next;
969        } else {
970            Self::write_free_next(prev, next);
971        }
972
973        if next != 0 {
974            Self::write_free_prev(next, prev);
975        }
976
977        Self::write_free_next(phys, 0);
978        Self::write_free_prev(phys, 0);
979        true
980    }
981
982    /// Reads free next.
983    #[inline]
984    fn read_free_next(phys: u64) -> u64 {
985        let next = get_meta(PhysAddr::new(phys)).next();
986        if next == FRAME_META_LINK_NONE {
987            0
988        } else {
989            next
990        }
991    }
992
993    /// Writes free next.
994    #[inline]
995    fn write_free_next(phys: u64, next: u64) {
996        get_meta(PhysAddr::new(phys)).set_next(if next == 0 {
997            FRAME_META_LINK_NONE
998        } else {
999            next
1000        });
1001    }
1002
1003    /// Reads free prev.
1004    #[inline]
1005    fn read_free_prev(phys: u64) -> u64 {
1006        let prev = get_meta(PhysAddr::new(phys)).prev();
1007        if prev == FRAME_META_LINK_NONE {
1008            0
1009        } else {
1010            prev
1011        }
1012    }
1013
1014    /// Writes free prev.
1015    #[inline]
1016    fn write_free_prev(phys: u64, prev: u64) {
1017        get_meta(PhysAddr::new(phys)).set_prev(if prev == 0 {
1018            FRAME_META_LINK_NONE
1019        } else {
1020            prev
1021        });
1022    }
1023
1024    /// Performs the zone index for addr operation.
1025    fn zone_index_for_addr(addr: u64) -> usize {
1026        if addr < DMA_MAX {
1027            ZoneType::DMA as usize
1028        } else if addr < NORMAL_MAX {
1029            ZoneType::Normal as usize
1030        } else {
1031            ZoneType::HighMem as usize
1032        }
1033    }
1034
1035    /// Performs the zone bounds operation.
1036    fn zone_bounds(zone_idx: usize) -> (u64, u64) {
1037        match zone_idx {
1038            x if x == ZoneType::DMA as usize => (0, DMA_MAX),
1039            x if x == ZoneType::Normal as usize => (DMA_MAX, NORMAL_MAX),
1040            _ => (NORMAL_MAX, u64::MAX),
1041        }
1042    }
1043
1044    /// Performs the zone intersection aligned operation.
1045    fn zone_intersection_aligned(region: &MemoryRegion, zone_idx: usize) -> Option<(u64, u64)> {
1046        if !matches!(region.kind, MemoryKind::Free | MemoryKind::Reclaim) {
1047            return None;
1048        }
1049
1050        let region_start = region.base;
1051        let region_end = region.base.saturating_add(region.size);
1052        let (zone_start, zone_end) = Self::zone_bounds(zone_idx);
1053
1054        let start = core::cmp::max(region_start, zone_start);
1055        let end = core::cmp::min(region_end, zone_end);
1056        if start >= end {
1057            return None;
1058        }
1059
1060        // Reserve physical address 0 as sentinel/not-usable.
1061        let start = Self::align_up(core::cmp::max(start, PAGE_SIZE), PAGE_SIZE);
1062        let end = Self::align_down(end, PAGE_SIZE);
1063        if start >= end {
1064            None
1065        } else {
1066            Some((start, end))
1067        }
1068    }
1069
1070    /// Performs the protected overlap end operation.
1071    fn protected_overlap_end(start: u64, end: u64) -> Option<u64> {
1072        for (base, size) in Self::protected_module_ranges().into_iter().flatten() {
1073            if size == 0 {
1074                continue;
1075            }
1076            let pstart = Self::align_down(base, PAGE_SIZE);
1077            let pend = Self::align_up(base.saturating_add(size), PAGE_SIZE);
1078            if end <= pstart || start >= pend {
1079                continue;
1080            }
1081            return Some(pend);
1082        }
1083        None
1084    }
1085
1086    /// Performs the protected module ranges operation.
1087    fn protected_module_ranges() -> [Option<(u64, u64)>; boot_alloc::MAX_PROTECTED_RANGES] {
1088        boot_alloc::protected_ranges_snapshot()
1089    }
1090
1091    /// Performs the pairs for order operation.
1092    #[inline]
1093    fn pairs_for_order(span_pages: usize, order: u8) -> usize {
1094        let pair_span = 1usize << (order as usize + 1);
1095        span_pages.div_ceil(pair_span)
1096    }
1097
1098    /// Performs the bits to bytes operation.
1099    #[inline]
1100    fn bits_to_bytes(bits: usize) -> usize {
1101        bits.div_ceil(8)
1102    }
1103
1104    /// Performs the bitmap bytes for span operation.
1105    fn bitmap_bytes_for_span(span_pages: usize) -> usize {
1106        let mut bytes = 0usize;
1107        for order in 0..=MAX_ORDER as u8 {
1108            bytes += Self::bits_to_bytes(Self::pairs_for_order(span_pages, order));
1109        }
1110        #[cfg(debug_assertions)]
1111        {
1112            bytes += Self::bits_to_bytes(span_pages);
1113        }
1114        bytes += Self::pageblock_tag_bytes_for_span(span_pages);
1115        bytes
1116    }
1117
1118    /// Upper bound for bitmap storage over any segmentation of `page_count` pages.
1119    ///
1120    /// For one page, every order contributes at most one parity bit. Summing that
1121    /// pessimistic bound across all pages yields a simple safe allocation bound,
1122    /// even if bitmap-pool reservations split ranges further.
1123    fn bitmap_bytes_upper_bound_for_pages(page_count: usize) -> usize {
1124        let mut bits = page_count.saturating_mul(MAX_ORDER + 1);
1125        #[cfg(debug_assertions)]
1126        {
1127            bits = bits.saturating_add(page_count);
1128        }
1129        Self::bits_to_bytes(bits)
1130            .saturating_add(Self::pageblock_tag_bytes_upper_bound_for_pages(page_count))
1131    }
1132
1133    /// Exact byte count required for pageblock migratetype tags over one contiguous span.
1134    #[inline]
1135    fn pageblock_tag_bytes_for_span(span_pages: usize) -> usize {
1136        span_pages.div_ceil(PAGEBLOCK_PAGES)
1137    }
1138
1139    /// Safe upper bound for pageblock-tag storage across any segmentation of `page_count` pages.
1140    #[inline]
1141    fn pageblock_tag_bytes_upper_bound_for_pages(page_count: usize) -> usize {
1142        page_count
1143    }
1144
1145    /// Performs the align up operation.
1146    #[inline]
1147    fn align_up(value: u64, align: u64) -> u64 {
1148        debug_assert!(align.is_power_of_two());
1149        (value + align - 1) & !(align - 1)
1150    }
1151
1152    /// Performs the align down operation.
1153    #[inline]
1154    fn align_down(value: u64, align: u64) -> u64 {
1155        debug_assert!(align.is_power_of_two());
1156        value & !(align - 1)
1157    }
1158
1159    /// Default pageblock migratetype assigned at bootstrap for one zone.
1160    #[inline]
1161    fn default_pageblock_migratetype(zone_type: ZoneType) -> Migratetype {
1162        match zone_type {
1163            ZoneType::HighMem => Migratetype::Movable,
1164            ZoneType::DMA | ZoneType::Normal => Migratetype::Unmovable,
1165        }
1166    }
1167
1168    /// Returns the pageblock index covering `phys` inside `segment`.
1169    #[inline]
1170    fn pageblock_index(segment: &ZoneSegment, phys: u64) -> usize {
1171        Self::page_index(segment, phys) / PAGEBLOCK_PAGES
1172    }
1173
1174    /// Decode one pageblock tag byte into a migratetype.
1175    #[inline]
1176    fn decode_pageblock_tag(tag: u8) -> Migratetype {
1177        match tag {
1178            x if x == Migratetype::Movable as u8 => Migratetype::Movable,
1179            _ => Migratetype::Unmovable,
1180        }
1181    }
1182
1183    /// Returns the current pageblock migratetype for a block start.
1184    #[inline]
1185    fn pageblock_migratetype(
1186        segment: &ZoneSegment,
1187        phys: u64,
1188        fallback: Migratetype,
1189    ) -> Migratetype {
1190        if segment.pageblock_count == 0 || segment.pageblock_tags.is_null() {
1191            return fallback;
1192        }
1193        let idx = Self::pageblock_index(segment, phys);
1194        debug_assert!(idx < segment.pageblock_count);
1195        unsafe { Self::decode_pageblock_tag(*segment.pageblock_tags.add(idx)) }
1196    }
1197
1198    /// Retag every pageblock overlapped by the buddy block `[phys, phys + 2^order * PAGE_SIZE)`.
1199    fn retag_pageblock_range(
1200        segment: &mut ZoneSegment,
1201        phys: u64,
1202        order: u8,
1203        migratetype: Migratetype,
1204    ) {
1205        if segment.pageblock_count == 0 || segment.pageblock_tags.is_null() {
1206            return;
1207        }
1208
1209        let start_page = Self::page_index(segment, phys);
1210        let end_page_exclusive = start_page.saturating_add(1usize << order);
1211        let start_idx = start_page / PAGEBLOCK_PAGES;
1212        let end_idx = end_page_exclusive.saturating_sub(1) / PAGEBLOCK_PAGES;
1213        debug_assert!(end_idx < segment.pageblock_count);
1214
1215        for idx in start_idx..=end_idx {
1216            unsafe {
1217                *segment.pageblock_tags.add(idx) = migratetype as u8;
1218            }
1219        }
1220    }
1221
1222    /// Count pageblocks by migratetype for one zone.
1223    fn zone_pageblock_counts(zone: &Zone) -> [usize; Migratetype::COUNT] {
1224        let mut counts = [0usize; Migratetype::COUNT];
1225        for segment in zone.segments().iter().take(zone.segment_count) {
1226            if segment.pageblock_count == 0 || segment.pageblock_tags.is_null() {
1227                continue;
1228            }
1229            for idx in 0..segment.pageblock_count {
1230                let migratetype =
1231                    unsafe { Self::decode_pageblock_tag(*segment.pageblock_tags.add(idx)) };
1232                counts[migratetype.index()] = counts[migratetype.index()].saturating_add(1);
1233            }
1234        }
1235        counts
1236    }
1237
1238    fn zone_effective_free_pages(zone: &Zone, zone_idx: usize) -> usize {
1239        zone.available_pages()
1240            .saturating_add(LOCAL_CACHED_ZONE_FRAMES[zone_idx].load(AtomicOrdering::Relaxed))
1241    }
1242
1243    /// Returns whether the zone should be considered for the current request.
1244    fn zone_allows_allocation(
1245        zone: &Zone,
1246        zone_idx: usize,
1247        order: u8,
1248        honor_watermarks: bool,
1249    ) -> bool {
1250        if zone.page_count == 0 {
1251            return false;
1252        }
1253
1254        if !honor_watermarks {
1255            return true;
1256        }
1257
1258        let requested_pages = 1usize << order;
1259        let floor = zone.watermark_min.saturating_add(zone.lowmem_reserve_pages);
1260        Self::zone_effective_free_pages(zone, zone_idx) >= requested_pages.saturating_add(floor)
1261    }
1262
1263    /// Returns whether a buddy block is free and coalescible with `migratetype`.
1264    fn can_merge_with_buddy(phys: u64, order: u8, migratetype: Migratetype) -> bool {
1265        let meta = get_meta(PhysAddr::new(phys));
1266        let flags = meta.get_flags();
1267        flags & frame_flags::FREE != 0
1268            && meta.get_order() == order
1269            && Self::migratetype_from_flags(flags) == migratetype
1270            && !crate::memory::frame::block_phys_has_poison_guard(phys, order)
1271    }
1272
1273    /// Decode the block migratetype stored in frame metadata flags.
1274    fn block_migratetype(frame_phys: u64) -> Migratetype {
1275        Self::migratetype_from_flags(get_meta(PhysAddr::new(frame_phys)).get_flags())
1276    }
1277
1278    /// Decode a migratetype from frame flags.
1279    #[inline]
1280    fn migratetype_from_flags(flags: u32) -> Migratetype {
1281        if flags & frame_flags::MOVABLE != 0 {
1282            Migratetype::Movable
1283        } else {
1284            Migratetype::Unmovable
1285        }
1286    }
1287
1288    /// Encode the metadata flags for a free block of the given migratetype.
1289    #[inline]
1290    fn free_flags_for(migratetype: Migratetype) -> u32 {
1291        match migratetype {
1292            Migratetype::Unmovable => frame_flags::FREE,
1293            Migratetype::Movable => frame_flags::FREE | frame_flags::MOVABLE,
1294        }
1295    }
1296
1297    /// Encode the metadata flags for an allocated block of the given migratetype.
1298    #[inline]
1299    fn allocated_flags_for(migratetype: Migratetype) -> u32 {
1300        match migratetype {
1301            Migratetype::Unmovable => frame_flags::ALLOCATED,
1302            Migratetype::Movable => frame_flags::ALLOCATED | frame_flags::MOVABLE,
1303        }
1304    }
1305
1306    /// Try to allocate from the supplied zone order, first honoring reserves and then bypassing them.
1307    fn alloc_in_zone_order(
1308        &mut self,
1309        order: u8,
1310        migratetype: Migratetype,
1311        zone_order: &[usize],
1312        token: &IrqDisabledToken,
1313    ) -> Option<PhysFrame> {
1314        for honor_watermarks in [true, false] {
1315            for &zi in zone_order {
1316                if let Some(frame) = Self::alloc_from_zone(
1317                    &mut self.zones[zi],
1318                    zi,
1319                    order,
1320                    migratetype,
1321                    honor_watermarks,
1322                    token,
1323                ) {
1324                    return Some(frame);
1325                }
1326            }
1327        }
1328        None
1329    }
1330
1331    /// Returns the preferred zone scan order for one migratetype.
1332    ///
1333    /// Unmovable allocations still prefer `Normal` first because the current
1334    /// kernel hot-touches those pages directly. Movable allocations instead
1335    /// prefer `HighMem` first to preserve scarce low memory for pinned kernel
1336    /// structures and emergency paths.
1337    #[inline]
1338    fn preferred_zone_order(migratetype: Migratetype) -> &'static [usize; ZoneType::COUNT] {
1339        match migratetype {
1340            Migratetype::Unmovable => &UNMOVABLE_ZONE_ORDER,
1341            Migratetype::Movable => &MOVABLE_ZONE_ORDER,
1342        }
1343    }
1344
1345    #[inline]
1346    fn zone_pressure_for_free_pages(zone: &Zone, free_pages: usize) -> ZonePressure {
1347        let reserve_floor = zone.watermark_min.saturating_add(zone.lowmem_reserve_pages);
1348        let low_floor = zone.watermark_low.saturating_add(zone.lowmem_reserve_pages);
1349        let high_floor = zone
1350            .watermark_high
1351            .saturating_add(zone.lowmem_reserve_pages);
1352
1353        if free_pages <= reserve_floor {
1354            ZonePressure::Min
1355        } else if free_pages <= low_floor {
1356            ZonePressure::Low
1357        } else if free_pages <= high_floor {
1358            ZonePressure::High
1359        } else {
1360            ZonePressure::Healthy
1361        }
1362    }
1363
1364    fn compaction_candidate(
1365        &self,
1366        order: u8,
1367        migratetype: Migratetype,
1368        zone_order: &[usize],
1369    ) -> Option<CompactionCandidate> {
1370        if order == 0 {
1371            return None;
1372        }
1373
1374        let requested_pages = 1usize << order;
1375        let mut best: Option<CompactionCandidate> = None;
1376
1377        for &zone_idx in zone_order {
1378            let zone = &self.zones[zone_idx];
1379            if zone.page_count == 0 {
1380                continue;
1381            }
1382
1383            let cached_pages = LOCAL_CACHED_ZONE_FRAMES[zone_idx].load(AtomicOrdering::Relaxed);
1384            if cached_pages == 0 {
1385                continue;
1386            }
1387
1388            let effective_free = Self::zone_effective_free_pages(zone, zone_idx);
1389            let available_pages = effective_free
1390                .saturating_sub(zone.watermark_min.saturating_add(zone.lowmem_reserve_pages));
1391            if available_pages < requested_pages {
1392                continue;
1393            }
1394
1395            let usable_pages = zone.free_pages_at_or_above_order(order);
1396            if usable_pages >= requested_pages {
1397                continue;
1398            }
1399
1400            let fragmentation_score = zone.fragmentation_score(order, cached_pages);
1401            if fragmentation_score < COMPACTION_FRAGMENTATION_THRESHOLD {
1402                continue;
1403            }
1404
1405            let pageblocks = Self::zone_pageblock_counts(zone);
1406            let candidate = CompactionCandidate {
1407                zone_idx,
1408                zone_type: zone.zone_type,
1409                order,
1410                migratetype,
1411                pressure: Self::zone_pressure_for_free_pages(zone, effective_free),
1412                fragmentation_score,
1413                requested_pages,
1414                available_pages,
1415                usable_pages,
1416                cached_pages,
1417                pageblock_count: pageblocks[Migratetype::Unmovable.index()]
1418                    .saturating_add(pageblocks[Migratetype::Movable.index()]),
1419                matching_pageblocks: pageblocks[migratetype.index()],
1420            };
1421
1422            let replace = match best {
1423                None => true,
1424                Some(current) => {
1425                    candidate.fragmentation_score > current.fragmentation_score
1426                        || (candidate.fragmentation_score == current.fragmentation_score
1427                            && candidate.cached_pages > current.cached_pages)
1428                        || (candidate.fragmentation_score == current.fragmentation_score
1429                            && candidate.cached_pages == current.cached_pages
1430                            && candidate.matching_pageblocks > current.matching_pageblocks)
1431                }
1432            };
1433
1434            if replace {
1435                best = Some(candidate);
1436            }
1437        }
1438
1439        best
1440    }
1441
1442    #[inline]
1443    fn compaction_drain_budget(candidate: CompactionCandidate) -> usize {
1444        let pageblock_goal = if candidate.matching_pageblocks != 0 {
1445            PAGEBLOCK_PAGES
1446        } else {
1447            candidate.requested_pages
1448        };
1449        let target_pages = core::cmp::max(candidate.requested_pages, pageblock_goal)
1450            .saturating_mul(2)
1451            .max(LOCAL_CACHE_FLUSH_BATCH);
1452        core::cmp::min(target_pages, candidate.cached_pages)
1453    }
1454
1455    /// Allocate while the caller already owns the global allocator lock.
1456    fn alloc_locked_with_migratetype(
1457        &mut self,
1458        order: u8,
1459        migratetype: Migratetype,
1460        token: &IrqDisabledToken,
1461    ) -> Result<PhysFrame, AllocError> {
1462        if order > MAX_ORDER as u8 {
1463            return Err(AllocError::InvalidOrder);
1464        }
1465
1466        let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
1467        if ALLOC_IN_PROGRESS[cpu_idx].swap(true, core::sync::atomic::Ordering::Acquire) {
1468            panic!("Recursive allocation detected on CPU {}!", cpu_idx);
1469        }
1470
1471        let result = self
1472            .alloc_in_zone_order(
1473                order,
1474                migratetype,
1475                Self::preferred_zone_order(migratetype),
1476                token,
1477            )
1478            .ok_or_else(|| {
1479                crate::memory::buddy::record_buddy_alloc_fail(order);
1480                AllocError::OutOfMemory
1481            });
1482
1483        ALLOC_IN_PROGRESS[cpu_idx].store(false, core::sync::atomic::Ordering::Release);
1484        result
1485    }
1486
1487    /// Allocate from one explicit zone while the caller already owns the global allocator lock.
1488    fn alloc_zone_locked(
1489        &mut self,
1490        order: u8,
1491        zone: ZoneType,
1492        migratetype: Migratetype,
1493        token: &IrqDisabledToken,
1494    ) -> Result<PhysFrame, AllocError> {
1495        if order > MAX_ORDER as u8 {
1496            return Err(AllocError::InvalidOrder);
1497        }
1498
1499        let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
1500        if ALLOC_IN_PROGRESS[cpu_idx].swap(true, core::sync::atomic::Ordering::Acquire) {
1501            panic!("Recursive allocation detected on CPU {}!", cpu_idx);
1502        }
1503
1504        let zone_idx = zone as usize;
1505        let zone_order = [zone_idx];
1506        let result = self
1507            .alloc_in_zone_order(order, migratetype, &zone_order, token)
1508            .ok_or_else(|| {
1509                crate::memory::buddy::record_buddy_alloc_fail(order);
1510                AllocError::OutOfMemory
1511            });
1512
1513        ALLOC_IN_PROGRESS[cpu_idx].store(false, core::sync::atomic::Ordering::Release);
1514        result
1515    }
1516
1517    fn mark_block_allocated(frame_phys: u64, order: u8, migratetype: Migratetype) {
1518        let page_count = 1usize << order;
1519        for page_idx in 0..page_count {
1520            let phys = frame_phys + page_idx as u64 * PAGE_SIZE;
1521            let meta = get_meta(PhysAddr::new(phys));
1522            // Sentinel must still be intact at this point : if not, the frame
1523            // was never on the free list (double-alloc or metadata corruption).
1524            debug_assert_eq!(
1525                meta.get_refcount(),
1526                crate::memory::frame::REFCOUNT_UNUSED,
1527                "buddy: mark_block_allocated on frame {:#x} with unexpected refcount (corruption?)",
1528                phys,
1529            );
1530            meta.set_flags(Self::allocated_flags_for(migratetype));
1531            meta.set_order(order);
1532            // Leave refcount as REFCOUNT_UNUSED; FrameAllocOptions::allocate()
1533            // will perform CAS(REFCOUNT_UNUSED → 1) as the fail-fast handoff.
1534        }
1535    }
1536
1537    fn mark_block_free(frame_phys: u64, order: u8, migratetype: Migratetype) {
1538        Self::set_block_meta(
1539            frame_phys,
1540            order,
1541            Self::free_flags_for(migratetype),
1542            crate::memory::frame::REFCOUNT_UNUSED,
1543        );
1544    }
1545
1546    /// Stamp every 4 KiB [`MetaSlot`] in the buddy block (flags, order, free-list links, refcount).
1547    ///
1548    /// [`MetaSlot::reset_with_free_list_meta`] runs on **each** page, including non-head pages
1549    /// of a multi-page block: the whole block returns to the buddy as one unit, so vtable and
1550    /// guard bits are cleared (except poison preserved per-slot) on every constituent frame.
1551    fn set_block_meta(frame_phys: u64, order: u8, flags: u32, refcount: u32) {
1552        let page_count = 1usize << order;
1553        for page_idx in 0..page_count {
1554            let phys = frame_phys + page_idx as u64 * PAGE_SIZE;
1555            let meta = get_meta(PhysAddr::new(phys));
1556            meta.set_flags(flags);
1557            meta.set_order(order);
1558            meta.set_next(FRAME_META_LINK_NONE);
1559            meta.set_prev(FRAME_META_LINK_NONE);
1560            meta.set_refcount(refcount);
1561            meta.reset_with_free_list_meta();
1562        }
1563    }
1564}
1565
1566static BUDDY_ALLOCATOR: SpinLock<Option<BuddyAllocator>> = SpinLock::new(None);
1567
1568/// Per-order allocation failure counters.
1569///
1570/// `BUDDY_ALLOC_FAIL_COUNTS[order]` counts how many times a request for
1571/// `order` failed to find a free block at `order` or any higher order.
1572/// These are incremented in `alloc_from_zone` when the loop exhausts all
1573/// orders without finding a free block.
1574///
1575/// Read via `buddy_alloc_fail_counts_snapshot()` for diagnostics.
1576static BUDDY_ALLOC_FAIL_COUNTS: [core::sync::atomic::AtomicUsize;
1577    crate::memory::zone::MAX_ORDER + 1] =
1578    [const { core::sync::atomic::AtomicUsize::new(0) }; crate::memory::zone::MAX_ORDER + 1];
1579
1580static COMPACTION_ATTEMPTS: AtomicUsize = AtomicUsize::new(0);
1581static COMPACTION_SUCCESSES: AtomicUsize = AtomicUsize::new(0);
1582static COMPACTION_LAST_ORDER: AtomicUsize = AtomicUsize::new(COMPACTION_SNAPSHOT_NONE);
1583static COMPACTION_LAST_MIGRATETYPE: AtomicUsize = AtomicUsize::new(COMPACTION_SNAPSHOT_NONE);
1584static COMPACTION_LAST_ZONE: AtomicUsize = AtomicUsize::new(COMPACTION_SNAPSHOT_NONE);
1585static COMPACTION_LAST_PRESSURE: AtomicUsize = AtomicUsize::new(ZonePressure::SNAPSHOT_COUNT);
1586static COMPACTION_LAST_FRAGMENTATION: AtomicUsize = AtomicUsize::new(0);
1587static COMPACTION_LAST_REQUESTED_PAGES: AtomicUsize = AtomicUsize::new(0);
1588static COMPACTION_LAST_AVAILABLE_PAGES: AtomicUsize = AtomicUsize::new(0);
1589static COMPACTION_LAST_USABLE_PAGES: AtomicUsize = AtomicUsize::new(0);
1590static COMPACTION_LAST_CACHED_PAGES: AtomicUsize = AtomicUsize::new(0);
1591static COMPACTION_LAST_DRAINED_PAGES: AtomicUsize = AtomicUsize::new(0);
1592static COMPACTION_LAST_PAGEBLOCK_COUNT: AtomicUsize = AtomicUsize::new(0);
1593static COMPACTION_LAST_MATCHING_PAGEBLOCKS: AtomicUsize = AtomicUsize::new(0);
1594
1595/// Records a buddy allocation failure for the given order.
1596///
1597/// Called from `alloc_from_zone` when no free block is available at any
1598/// order >= `order`. Increments the per-order counter for diagnostics.
1599pub(crate) fn record_buddy_alloc_fail(order: u8) {
1600    let idx = order as usize;
1601    if idx <= crate::memory::zone::MAX_ORDER {
1602        BUDDY_ALLOC_FAIL_COUNTS[idx].fetch_add(1, core::sync::atomic::Ordering::Relaxed);
1603    }
1604}
1605
1606/// Returns the buddy allocation failure counts by order.
1607///
1608/// Use this for diagnostics : e.g., to determine whether a heap panic is
1609/// caused by genuine memory pressure or by high-order fragmentation.
1610pub fn buddy_alloc_fail_counts_snapshot() -> [usize; crate::memory::zone::MAX_ORDER + 1] {
1611    let mut out = [0usize; crate::memory::zone::MAX_ORDER + 1];
1612    for (i, counter) in BUDDY_ALLOC_FAIL_COUNTS.iter().enumerate() {
1613        out[i] = counter.load(core::sync::atomic::Ordering::Relaxed);
1614    }
1615    out
1616}
1617
1618fn snapshot_zone_type(value: usize) -> Option<ZoneType> {
1619    match value {
1620        x if x == ZoneType::DMA as usize => Some(ZoneType::DMA),
1621        x if x == ZoneType::Normal as usize => Some(ZoneType::Normal),
1622        x if x == ZoneType::HighMem as usize => Some(ZoneType::HighMem),
1623        _ => None,
1624    }
1625}
1626
1627fn snapshot_migratetype(value: usize) -> Option<Migratetype> {
1628    match value {
1629        x if x == Migratetype::Unmovable as usize => Some(Migratetype::Unmovable),
1630        x if x == Migratetype::Movable as usize => Some(Migratetype::Movable),
1631        _ => None,
1632    }
1633}
1634
1635fn record_compaction_attempt(candidate: CompactionCandidate, drained_pages: usize, success: bool) {
1636    COMPACTION_ATTEMPTS.fetch_add(1, AtomicOrdering::Relaxed);
1637    if success {
1638        COMPACTION_SUCCESSES.fetch_add(1, AtomicOrdering::Relaxed);
1639    }
1640
1641    COMPACTION_LAST_ORDER.store(candidate.order as usize, AtomicOrdering::Relaxed);
1642    COMPACTION_LAST_MIGRATETYPE.store(candidate.migratetype as usize, AtomicOrdering::Relaxed);
1643    COMPACTION_LAST_ZONE.store(candidate.zone_type as usize, AtomicOrdering::Relaxed);
1644    COMPACTION_LAST_PRESSURE.store(candidate.pressure.as_snapshot(), AtomicOrdering::Relaxed);
1645    COMPACTION_LAST_FRAGMENTATION.store(candidate.fragmentation_score, AtomicOrdering::Relaxed);
1646    COMPACTION_LAST_REQUESTED_PAGES.store(candidate.requested_pages, AtomicOrdering::Relaxed);
1647    COMPACTION_LAST_AVAILABLE_PAGES.store(candidate.available_pages, AtomicOrdering::Relaxed);
1648    COMPACTION_LAST_USABLE_PAGES.store(candidate.usable_pages, AtomicOrdering::Relaxed);
1649    COMPACTION_LAST_CACHED_PAGES.store(candidate.cached_pages, AtomicOrdering::Relaxed);
1650    COMPACTION_LAST_DRAINED_PAGES.store(drained_pages, AtomicOrdering::Relaxed);
1651    COMPACTION_LAST_PAGEBLOCK_COUNT.store(candidate.pageblock_count, AtomicOrdering::Relaxed);
1652    COMPACTION_LAST_MATCHING_PAGEBLOCKS
1653        .store(candidate.matching_pageblocks, AtomicOrdering::Relaxed);
1654}
1655
1656/// Snapshot compaction-assist telemetry without locking the allocator.
1657pub fn compaction_stats_snapshot() -> CompactionStats {
1658    let last_order = COMPACTION_LAST_ORDER.load(AtomicOrdering::Relaxed);
1659    let last_migratetype = COMPACTION_LAST_MIGRATETYPE.load(AtomicOrdering::Relaxed);
1660    let last_zone = COMPACTION_LAST_ZONE.load(AtomicOrdering::Relaxed);
1661    let last_pressure = COMPACTION_LAST_PRESSURE.load(AtomicOrdering::Relaxed);
1662
1663    CompactionStats {
1664        attempts: COMPACTION_ATTEMPTS.load(AtomicOrdering::Relaxed),
1665        successes: COMPACTION_SUCCESSES.load(AtomicOrdering::Relaxed),
1666        last_order: if last_order == COMPACTION_SNAPSHOT_NONE {
1667            None
1668        } else {
1669            Some(last_order as u8)
1670        },
1671        last_migratetype: snapshot_migratetype(last_migratetype),
1672        last_zone: snapshot_zone_type(last_zone),
1673        last_pressure: ZonePressure::from_snapshot(last_pressure),
1674        last_fragmentation_score: COMPACTION_LAST_FRAGMENTATION.load(AtomicOrdering::Relaxed),
1675        last_requested_pages: COMPACTION_LAST_REQUESTED_PAGES.load(AtomicOrdering::Relaxed),
1676        last_available_pages: COMPACTION_LAST_AVAILABLE_PAGES.load(AtomicOrdering::Relaxed),
1677        last_usable_pages: COMPACTION_LAST_USABLE_PAGES.load(AtomicOrdering::Relaxed),
1678        last_cached_pages: COMPACTION_LAST_CACHED_PAGES.load(AtomicOrdering::Relaxed),
1679        last_drained_pages: COMPACTION_LAST_DRAINED_PAGES.load(AtomicOrdering::Relaxed),
1680        last_pageblock_count: COMPACTION_LAST_PAGEBLOCK_COUNT.load(AtomicOrdering::Relaxed),
1681        last_matching_pageblocks: COMPACTION_LAST_MATCHING_PAGEBLOCKS.load(AtomicOrdering::Relaxed),
1682    }
1683}
1684
1685/// Pages permanently withheld from the buddy free lists due to [`meta_guard::POISONED`].
1686static POISON_QUARANTINE_PAGES: AtomicUsize = AtomicUsize::new(0);
1687
1688/// Snapshot of pages quarantined (not recycled) because frame metadata reported poison.
1689pub fn poison_quarantine_pages_snapshot() -> usize {
1690    POISON_QUARANTINE_PAGES.load(AtomicOrdering::Relaxed)
1691}
1692
1693/// Returns the global buddy lock address for deadlock tracing.
1694pub fn debug_buddy_lock_addr() -> usize {
1695    &BUDDY_ALLOCATOR as *const _ as usize
1696}
1697
1698/// Per-CPU flag to detect recursive allocations (deadlocks from logs/interrupts)
1699static ALLOC_IN_PROGRESS: [core::sync::atomic::AtomicBool; crate::arch::x86_64::percpu::MAX_CPUS] =
1700    [const { core::sync::atomic::AtomicBool::new(false) }; crate::arch::x86_64::percpu::MAX_CPUS];
1701
1702struct LocalFrameCache {
1703    len: usize,
1704    frames: [u64; LOCAL_CACHE_CAPACITY],
1705}
1706
1707impl LocalFrameCache {
1708    const fn new() -> Self {
1709        Self {
1710            len: 0,
1711            frames: [0; LOCAL_CACHE_CAPACITY],
1712        }
1713    }
1714
1715    fn clear(&mut self) {
1716        self.len = 0;
1717    }
1718
1719    fn pop(&mut self) -> Option<PhysFrame> {
1720        if self.len == 0 {
1721            return None;
1722        }
1723        self.len -= 1;
1724        Some(PhysFrame {
1725            start_address: PhysAddr::new(self.frames[self.len]),
1726        })
1727    }
1728
1729    fn push(&mut self, frame: PhysFrame) -> Result<(), PhysFrame> {
1730        if self.len >= LOCAL_CACHE_CAPACITY {
1731            return Err(frame);
1732        }
1733        self.frames[self.len] = frame.start_address.as_u64();
1734        self.len += 1;
1735        Ok(())
1736    }
1737
1738    fn pop_many(&mut self, out: &mut [u64]) -> usize {
1739        let count = core::cmp::min(self.len, out.len());
1740        for slot in out.iter_mut().take(count) {
1741            self.len -= 1;
1742            *slot = self.frames[self.len];
1743        }
1744        count
1745    }
1746
1747    fn pop_many_for_zone(&mut self, out: &mut [u64], zone_idx: usize) -> usize {
1748        let mut written = 0usize;
1749        let mut idx = 0usize;
1750
1751        while idx < self.len && written < out.len() {
1752            let phys = self.frames[idx];
1753            if zone_index_for_phys(phys) != zone_idx {
1754                idx += 1;
1755                continue;
1756            }
1757
1758            self.len -= 1;
1759            out[written] = phys;
1760            written += 1;
1761            self.frames[idx] = self.frames[self.len];
1762        }
1763
1764        written
1765    }
1766}
1767
1768static LOCAL_FRAME_CACHES: [SpinLock<LocalFrameCache>; LOCAL_CACHE_SLOTS] =
1769    [const { SpinLock::new(LocalFrameCache::new()) }; LOCAL_CACHE_SLOTS];
1770static LOCAL_CACHED_FRAMES: AtomicUsize = AtomicUsize::new(0);
1771static LOCAL_CACHED_ZONE_FRAMES: [AtomicUsize; ZoneType::COUNT] =
1772    [const { AtomicUsize::new(0) }; ZoneType::COUNT];
1773static LOCAL_CACHED_ZONE_MIGRATETYPE_FRAMES: [AtomicUsize; LOCAL_CACHED_ZONE_MIGRATETYPE_SLOTS] =
1774    [const { AtomicUsize::new(0) }; LOCAL_CACHED_ZONE_MIGRATETYPE_SLOTS];
1775
1776type GlobalGuard = SpinLockGuard<'static, Option<BuddyAllocator>>;
1777
1778struct OnDemandGlobalLock {
1779    guard: Option<GlobalGuard>,
1780}
1781
1782impl OnDemandGlobalLock {
1783    fn new() -> Self {
1784        Self { guard: None }
1785    }
1786
1787    fn unlock(&mut self) {
1788        self.guard = None;
1789    }
1790
1791    fn with_allocator<R>(
1792        &mut self,
1793        f: impl FnOnce(&mut BuddyAllocator, &IrqDisabledToken) -> R,
1794    ) -> Option<R> {
1795        let guard = self.guard.get_or_insert_with(|| BUDDY_ALLOCATOR.lock());
1796        guard.with_mut_and_token(|slot, token| slot.as_mut().map(|allocator| f(allocator, token)))
1797    }
1798
1799    fn alloc_with_migratetype(
1800        &mut self,
1801        order: u8,
1802        migratetype: Migratetype,
1803    ) -> Result<PhysFrame, AllocError> {
1804        self.with_allocator(|allocator, token| {
1805            allocator.alloc_locked_with_migratetype(order, migratetype, token)
1806        })
1807        .unwrap_or(Err(AllocError::OutOfMemory))
1808    }
1809
1810    fn free(&mut self, frame: PhysFrame, order: u8) {
1811        let _ = self.with_allocator(|allocator, token| allocator.free(frame, order, token));
1812    }
1813
1814    fn free_phys_batch(&mut self, phys_batch: &[u64], count: usize) {
1815        if count == 0 {
1816            return;
1817        }
1818        let _ = self.with_allocator(|allocator, token| {
1819            for phys in phys_batch.iter().take(count).copied() {
1820                allocator.free(
1821                    PhysFrame {
1822                        start_address: PhysAddr::new(phys),
1823                    },
1824                    0,
1825                    token,
1826                );
1827            }
1828        });
1829    }
1830}
1831
1832#[inline]
1833fn zone_index_for_phys(phys: u64) -> usize {
1834    if phys < DMA_MAX {
1835        ZoneType::DMA as usize
1836    } else if phys < NORMAL_MAX {
1837        ZoneType::Normal as usize
1838    } else {
1839        ZoneType::HighMem as usize
1840    }
1841}
1842
1843#[inline]
1844fn local_cache_slot(cpu_idx: usize, migratetype: Migratetype) -> usize {
1845    migratetype.index() * crate::arch::x86_64::percpu::MAX_CPUS + cpu_idx
1846}
1847
1848#[inline]
1849fn local_cached_zone_migratetype_slot(zone_idx: usize, migratetype: Migratetype) -> usize {
1850    migratetype.index() * ZoneType::COUNT + zone_idx
1851}
1852
1853#[inline]
1854fn is_cacheable_phys_for(phys: u64, migratetype: Migratetype) -> bool {
1855    match migratetype {
1856        Migratetype::Unmovable => zone_index_for_phys(phys) == ZoneType::Normal as usize,
1857        Migratetype::Movable => zone_index_for_phys(phys) != ZoneType::DMA as usize,
1858    }
1859}
1860
1861#[inline]
1862fn local_cached_zone_migratetype_count(zone_idx: usize, migratetype: Migratetype) -> usize {
1863    LOCAL_CACHED_ZONE_MIGRATETYPE_FRAMES[local_cached_zone_migratetype_slot(zone_idx, migratetype)]
1864        .load(AtomicOrdering::Relaxed)
1865}
1866
1867#[inline]
1868fn local_cached_inc_phys(phys: u64, migratetype: Migratetype) {
1869    let zone_idx = zone_index_for_phys(phys);
1870    LOCAL_CACHED_FRAMES.fetch_add(1, AtomicOrdering::Relaxed);
1871    LOCAL_CACHED_ZONE_FRAMES[zone_idx].fetch_add(1, AtomicOrdering::Relaxed);
1872    LOCAL_CACHED_ZONE_MIGRATETYPE_FRAMES[local_cached_zone_migratetype_slot(zone_idx, migratetype)]
1873        .fetch_add(1, AtomicOrdering::Relaxed);
1874}
1875
1876#[inline]
1877fn local_cached_dec_phys(phys: u64, migratetype: Migratetype) {
1878    let prev_total = LOCAL_CACHED_FRAMES.fetch_sub(1, AtomicOrdering::Relaxed);
1879    debug_assert!(prev_total > 0);
1880    let zone = zone_index_for_phys(phys);
1881    let prev_zone = LOCAL_CACHED_ZONE_FRAMES[zone].fetch_sub(1, AtomicOrdering::Relaxed);
1882    debug_assert!(prev_zone > 0);
1883    let prev_zone_type = LOCAL_CACHED_ZONE_MIGRATETYPE_FRAMES
1884        [local_cached_zone_migratetype_slot(zone, migratetype)]
1885    .fetch_sub(1, AtomicOrdering::Relaxed);
1886    debug_assert!(prev_zone_type > 0);
1887}
1888
1889fn drain_local_caches_to_global(max_pages: usize, global: &mut OnDemandGlobalLock) -> usize {
1890    if max_pages == 0 {
1891        return 0;
1892    }
1893
1894    let mut drained = 0usize;
1895    let mut batch = [0u64; LOCAL_CACHE_FLUSH_BATCH];
1896    for migratetype in Migratetype::ALL {
1897        for cpu in 0..crate::arch::x86_64::percpu::MAX_CPUS {
1898            if drained >= max_pages {
1899                break;
1900            }
1901            let target = core::cmp::min(batch.len(), max_pages.saturating_sub(drained));
1902            if target == 0 {
1903                break;
1904            }
1905
1906            let popped = {
1907                let mut cache = LOCAL_FRAME_CACHES[local_cache_slot(cpu, migratetype)].lock();
1908                cache.pop_many(&mut batch[..target])
1909            };
1910            if popped == 0 {
1911                continue;
1912            }
1913
1914            for phys in batch.iter().take(popped).copied() {
1915                local_cached_dec_phys(phys, migratetype);
1916            }
1917            global.free_phys_batch(&batch, popped);
1918
1919            // Keep lock acquisition on-demand during cross-CPU draining.
1920            global.unlock();
1921            drained += popped;
1922        }
1923    }
1924
1925    drained
1926}
1927
1928fn drain_local_caches_for_zone(
1929    max_pages: usize,
1930    zone_idx: usize,
1931    primary_migratetype: Migratetype,
1932    global: &mut OnDemandGlobalLock,
1933) -> usize {
1934    if max_pages == 0 {
1935        return 0;
1936    }
1937
1938    let mut drained = 0usize;
1939    let mut batch = [0u64; LOCAL_CACHE_FLUSH_BATCH];
1940
1941    for migratetype in primary_migratetype.fallback_order() {
1942        for cpu in 0..crate::arch::x86_64::percpu::MAX_CPUS {
1943            if drained >= max_pages {
1944                return drained;
1945            }
1946
1947            let target = core::cmp::min(batch.len(), max_pages.saturating_sub(drained));
1948            if target == 0 {
1949                break;
1950            }
1951
1952            let popped = {
1953                let mut cache = LOCAL_FRAME_CACHES[local_cache_slot(cpu, migratetype)].lock();
1954                cache.pop_many_for_zone(&mut batch[..target], zone_idx)
1955            };
1956            if popped == 0 {
1957                continue;
1958            }
1959
1960            for phys in batch.iter().take(popped).copied() {
1961                local_cached_dec_phys(phys, migratetype);
1962            }
1963            global.free_phys_batch(&batch, popped);
1964            global.unlock();
1965            drained += popped;
1966        }
1967    }
1968
1969    if drained < max_pages {
1970        drained = drained.saturating_add(drain_local_caches_to_global(
1971            max_pages.saturating_sub(drained),
1972            global,
1973        ));
1974    }
1975
1976    drained
1977}
1978
1979/// Initializes buddy allocator.
1980pub fn init_buddy_allocator(memory_regions: &[MemoryRegion]) {
1981    for cache in &LOCAL_FRAME_CACHES {
1982        cache.lock().clear();
1983    }
1984    LOCAL_CACHED_FRAMES.store(0, AtomicOrdering::Relaxed);
1985    for zone_cached in &LOCAL_CACHED_ZONE_FRAMES {
1986        zone_cached.store(0, AtomicOrdering::Relaxed);
1987    }
1988    for zone_cached in &LOCAL_CACHED_ZONE_MIGRATETYPE_FRAMES {
1989        zone_cached.store(0, AtomicOrdering::Relaxed);
1990    }
1991
1992    {
1993        let mut guard = BUDDY_ALLOCATOR.lock();
1994        *guard = Some(BuddyAllocator::new());
1995        guard.with_mut_and_token(|slot, _token| {
1996            if let Some(allocator) = slot.as_mut() {
1997                allocator.init(memory_regions);
1998            }
1999        });
2000    }
2001    // Race/corruption diagnostic: register buddy lock for E9 LOCK-A/LOCK-R traces.
2002    crate::sync::debug_set_trace_buddy_addr(debug_buddy_lock_addr());
2003}
2004
2005/// Returns allocator.
2006pub fn get_allocator() -> &'static SpinLock<Option<BuddyAllocator>> {
2007    &BUDDY_ALLOCATOR
2008}
2009
2010fn refill_local_cache(
2011    cpu_idx: usize,
2012    global: &mut OnDemandGlobalLock,
2013    migratetype: Migratetype,
2014) -> Result<PhysFrame, AllocError> {
2015    // Critical path: refill in batches from the global allocator to amortize lock contention.
2016    let (base, order) = match global.alloc_with_migratetype(LOCAL_CACHE_REFILL_ORDER, migratetype) {
2017        Ok(frame) => (frame, LOCAL_CACHE_REFILL_ORDER),
2018        Err(AllocError::OutOfMemory) => (global.alloc_with_migratetype(0, migratetype)?, 0),
2019        Err(e) => return Err(e),
2020    };
2021    global.unlock();
2022
2023    let frame_count = 1usize << order;
2024    let mut overflow = [0u64; LOCAL_CACHE_REFILL_FRAMES];
2025    let mut overflow_len = 0usize;
2026    let mut ret = None;
2027
2028    {
2029        let mut cache = LOCAL_FRAME_CACHES[local_cache_slot(cpu_idx, migratetype)].lock();
2030        for idx in 0..frame_count {
2031            let phys = base.start_address.as_u64() + (idx as u64) * PAGE_SIZE;
2032            let frame = PhysFrame {
2033                start_address: PhysAddr::new(phys),
2034            };
2035            if !is_cacheable_phys_for(phys, migratetype) {
2036                overflow[overflow_len] = phys;
2037                overflow_len += 1;
2038                continue;
2039            }
2040            if ret.is_none() {
2041                // Re-publish the returned page as an allocated order-0 block.
2042                // The refcount must stay REFCOUNT_UNUSED so FrameAllocOptions
2043                // can still claim it via CAS(UNUSED -> 1).
2044                BuddyAllocator::mark_block_allocated(phys, 0, migratetype);
2045                ret = Some(frame);
2046                continue;
2047            }
2048            // Pages parked in the local cache are logically free and must
2049            // therefore carry the free-list sentinel invariant.
2050            BuddyAllocator::mark_block_free(phys, 0, migratetype);
2051            if cache.push(frame).is_ok() {
2052                local_cached_inc_phys(phys, migratetype);
2053            } else {
2054                overflow[overflow_len] = phys;
2055                overflow_len += 1;
2056            }
2057        }
2058    }
2059
2060    if overflow_len != 0 {
2061        global.free_phys_batch(&overflow, overflow_len);
2062    }
2063
2064    ret.ok_or(AllocError::OutOfMemory)
2065}
2066
2067fn steal_from_other_caches(cpu_idx: usize, migratetype: Migratetype) -> Option<PhysFrame> {
2068    let cpu_count = crate::arch::x86_64::percpu::cpu_count()
2069        .max(1)
2070        .min(crate::arch::x86_64::percpu::MAX_CPUS);
2071
2072    for step in 1..cpu_count {
2073        let peer = (cpu_idx + step) % cpu_count;
2074        let mut cache = LOCAL_FRAME_CACHES[local_cache_slot(peer, migratetype)].lock();
2075        if let Some(frame) = cache.pop() {
2076            BuddyAllocator::mark_block_allocated(frame.start_address.as_u64(), 0, migratetype);
2077            local_cached_dec_phys(frame.start_address.as_u64(), migratetype);
2078            return Some(frame);
2079        }
2080    }
2081    None
2082}
2083
2084fn alloc_order0_cached(migratetype: Migratetype) -> Result<PhysFrame, AllocError> {
2085    let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
2086
2087    {
2088        let mut cache = LOCAL_FRAME_CACHES[local_cache_slot(cpu_idx, migratetype)].lock();
2089        if let Some(frame) = cache.pop() {
2090            BuddyAllocator::mark_block_allocated(frame.start_address.as_u64(), 0, migratetype);
2091            local_cached_dec_phys(frame.start_address.as_u64(), migratetype);
2092            return Ok(frame);
2093        }
2094    }
2095
2096    let mut global = OnDemandGlobalLock::new();
2097
2098    if let Ok(frame) = refill_local_cache(cpu_idx, &mut global, migratetype) {
2099        return Ok(frame);
2100    }
2101    // Critical lock-order rule: never hold global while probing local caches.
2102    global.unlock();
2103
2104    if let Some(frame) = steal_from_other_caches(cpu_idx, migratetype) {
2105        return Ok(frame);
2106    }
2107
2108    global.alloc_with_migratetype(0, migratetype)
2109}
2110
2111fn free_order0_cached(frame: PhysFrame, migratetype: Migratetype) {
2112    // NOTE: O(2^order) MetaSlot scan : acceptable here because order is always 0
2113    // (single-page check) on this hot path.
2114    if crate::memory::frame::block_phys_has_poison_guard(frame.start_address.as_u64(), 0) {
2115        let mut global = OnDemandGlobalLock::new();
2116        global.free(frame, 0);
2117        return;
2118    }
2119
2120    if !is_cacheable_phys_for(frame.start_address.as_u64(), migratetype) {
2121        let mut global = OnDemandGlobalLock::new();
2122        global.free(frame, 0);
2123        return;
2124    }
2125
2126    let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
2127    let mut spill = [0u64; LOCAL_CACHE_FLUSH_BATCH];
2128
2129    let spill_len = {
2130        let mut cache = LOCAL_FRAME_CACHES[local_cache_slot(cpu_idx, migratetype)].lock();
2131        if cache.push(frame).is_ok() {
2132            // Mark free only on the success path: the incoming frame transitions
2133            // from "caller-allocated" to "cache sentinel" (REFCOUNT_UNUSED).
2134            BuddyAllocator::mark_block_free(frame.start_address.as_u64(), 0, migratetype);
2135            local_cached_inc_phys(frame.start_address.as_u64(), migratetype);
2136            return;
2137        }
2138
2139        // Cache full: pop existing frames to spill to buddy, then retry the push.
2140        let mut spill_len = cache.pop_many(&mut spill);
2141        for phys in spill.iter().take(spill_len).copied() {
2142            local_cached_dec_phys(phys, migratetype);
2143        }
2144
2145        if cache.push(frame).is_ok() {
2146            BuddyAllocator::mark_block_free(frame.start_address.as_u64(), 0, migratetype);
2147            local_cached_inc_phys(frame.start_address.as_u64(), migratetype);
2148        } else {
2149            // Still full after spilling : the incoming frame joins the spill batch.
2150            // It will be marked free by free_phys_batch → free_to_zone.
2151            spill[spill_len] = frame.start_address.as_u64();
2152            spill_len += 1;
2153        }
2154        spill_len
2155    };
2156
2157    if spill_len != 0 {
2158        let mut global = OnDemandGlobalLock::new();
2159        global.free_phys_batch(&spill, spill_len);
2160    }
2161}
2162
2163/// Allocate frames with per-CPU caching on order-0 requests.
2164///
2165/// `_token` is a compile-time proof that interrupts are disabled on the calling CPU,
2166/// preventing re-entrant allocation through an interrupt handler on the same lock.
2167pub fn alloc(_token: &IrqDisabledToken, order: u8) -> Result<PhysFrame, AllocError> {
2168    alloc_migratetype(_token, order, Migratetype::Unmovable)
2169}
2170
2171/// Allocate frames with an explicit migratetype preference.
2172///
2173/// Order-0 allocations use a per-CPU cache partitioned by migratetype so the
2174/// fast path preserves the caller's mobility class.
2175pub fn alloc_migratetype(
2176    _token: &IrqDisabledToken,
2177    order: u8,
2178    migratetype: Migratetype,
2179) -> Result<PhysFrame, AllocError> {
2180    if crate::silo::debug_boot_reg_active() {
2181        crate::serial_println!(
2182            "[trace][buddy] alloc enter order={} migratetype={:?} buddy_lock={:#x}",
2183            order,
2184            migratetype,
2185            &BUDDY_ALLOCATOR as *const _ as usize
2186        );
2187    }
2188    if order == 0 {
2189        alloc_order0_cached(migratetype)
2190    } else {
2191        let mut global = OnDemandGlobalLock::new();
2192        match global.alloc_with_migratetype(order, migratetype) {
2193            Ok(frame) => Ok(frame),
2194            Err(AllocError::OutOfMemory) => {
2195                let candidate = global
2196                    .with_allocator(|allocator, _token| {
2197                        allocator.compaction_candidate(
2198                            order,
2199                            migratetype,
2200                            BuddyAllocator::preferred_zone_order(migratetype),
2201                        )
2202                    })
2203                    .flatten();
2204
2205                if let Some(candidate) = candidate {
2206                    let budget = BuddyAllocator::compaction_drain_budget(candidate);
2207                    global.unlock();
2208                    let drained = drain_local_caches_for_zone(
2209                        budget,
2210                        candidate.zone_idx,
2211                        migratetype,
2212                        &mut global,
2213                    );
2214                    let retry = global.alloc_with_migratetype(order, migratetype);
2215                    record_compaction_attempt(candidate, drained, retry.is_ok());
2216                    if retry.is_ok() || drained != 0 {
2217                        return retry;
2218                    }
2219                } else {
2220                    global.unlock();
2221                }
2222
2223                let _ = drain_local_caches_to_global(usize::MAX, &mut global);
2224                global.alloc_with_migratetype(order, migratetype)
2225            }
2226            Err(e) => Err(e),
2227        }
2228    }
2229}
2230
2231/// Free frames with per-CPU caching on order-0 requests.
2232///
2233/// `_token` is a compile-time proof that interrupts are disabled on the calling CPU.
2234pub fn free(_token: &IrqDisabledToken, frame: PhysFrame, order: u8) {
2235    let migratetype = BuddyAllocator::block_migratetype(frame.start_address.as_u64());
2236    if order == 0 {
2237        free_order0_cached(frame, migratetype);
2238    } else {
2239        let mut global = OnDemandGlobalLock::new();
2240        global.free(frame, order);
2241    }
2242}
2243
2244impl FrameAllocator for BuddyAllocator {
2245    /// Performs the alloc operation.
2246    fn alloc(&mut self, order: u8, token: &IrqDisabledToken) -> Result<PhysFrame, AllocError> {
2247        self.alloc_locked_with_migratetype(order, Migratetype::Unmovable, token)
2248    }
2249
2250    /// Performs the free operation.
2251    fn free(&mut self, frame: PhysFrame, order: u8, token: &IrqDisabledToken) {
2252        let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
2253        if ALLOC_IN_PROGRESS[cpu_idx].swap(true, core::sync::atomic::Ordering::Acquire) {
2254            panic!("Recursive deallocation detected on CPU {}!", cpu_idx);
2255        }
2256
2257        let frame_phys = frame.start_address.as_u64();
2258        let zi = Self::zone_index_for_addr(frame_phys);
2259        let zone = &mut self.zones[zi];
2260        // NOTE: O(2^order) MetaSlot scan. Acceptable for large-order frees
2261        // (kernel stacks, vmalloc) which are rare; order-0 path is handled
2262        // separately in free_order0_cached with a single-page check.
2263        if crate::memory::frame::block_phys_has_poison_guard(frame_phys, order) {
2264            Self::quarantine_poisoned_block_in_zone(zone, frame, order, token);
2265        } else {
2266            Self::free_to_zone(zone, frame, order, token);
2267        }
2268
2269        ALLOC_IN_PROGRESS[cpu_idx].store(false, core::sync::atomic::Ordering::Release);
2270    }
2271}
2272
2273impl BuddyAllocator {
2274    /// Allocate explicitly from one zone (e.g. DMA-only callers).
2275    pub fn alloc_zone(
2276        &mut self,
2277        order: u8,
2278        zone: ZoneType,
2279        token: &IrqDisabledToken,
2280    ) -> Result<PhysFrame, AllocError> {
2281        self.alloc_zone_locked(order, zone, Migratetype::Unmovable, token)
2282    }
2283
2284    /// Allocate explicitly from one zone with a migratetype hint.
2285    ///
2286    /// This keeps the target zone fixed but still selects the preferred
2287    /// free-list class and fallback donor order from `migratetype`.
2288    pub fn alloc_zone_migratetype(
2289        &mut self,
2290        order: u8,
2291        zone: ZoneType,
2292        migratetype: Migratetype,
2293        token: &IrqDisabledToken,
2294    ) -> Result<PhysFrame, AllocError> {
2295        self.alloc_zone_locked(order, zone, migratetype, token)
2296    }
2297}
2298
2299/// Derived pressure state for a zone snapshot.
2300///
2301/// Thresholds are evaluated against the zone's effective free pages, including
2302/// pages parked in order-0 per-CPU caches.
2303#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2304pub enum ZonePressure {
2305    /// Free pages are above the high watermark.
2306    Healthy,
2307    /// Free pages dropped below the high watermark.
2308    High,
2309    /// Free pages dropped below the low watermark.
2310    Low,
2311    /// Free pages reached the minimum watermark plus reserve floor.
2312    Min,
2313}
2314
2315impl ZonePressure {
2316    const SNAPSHOT_COUNT: usize = 4;
2317
2318    #[inline]
2319    const fn as_snapshot(self) -> usize {
2320        match self {
2321            Self::Healthy => 0,
2322            Self::High => 1,
2323            Self::Low => 2,
2324            Self::Min => 3,
2325        }
2326    }
2327
2328    #[inline]
2329    const fn from_snapshot(value: usize) -> Option<Self> {
2330        match value {
2331            0 => Some(Self::Healthy),
2332            1 => Some(Self::High),
2333            2 => Some(Self::Low),
2334            3 => Some(Self::Min),
2335            _ => None,
2336        }
2337    }
2338}
2339
2340/// Snapshot statistics for a single memory zone.
2341///
2342/// The struct is plain data on purpose so low-level diagnostics and crash paths
2343/// can snapshot it onto the stack without heap allocation.
2344#[derive(Debug, Clone, Copy)]
2345pub struct ZoneStats {
2346    /// Zone classification.
2347    pub zone_type: ZoneType,
2348    /// Lowest physical address covered by the zone span.
2349    pub base: u64,
2350    /// Pages currently managed by buddy in this zone.
2351    pub managed_pages: usize,
2352    /// Pages reported as usable by the firmware map before reservations.
2353    pub present_pages: usize,
2354    /// Outer span in pages, including holes.
2355    pub spanned_pages: usize,
2356    /// Pages removed from management during bootstrap.
2357    pub reserved_pages: usize,
2358    /// Pages allocated to live callers.
2359    pub allocated_pages: usize,
2360    /// Order-0 pages currently parked in per-CPU caches.
2361    pub cached_pages: usize,
2362    /// Cached pages parked in unmovable per-CPU caches.
2363    pub cached_unmovable_pages: usize,
2364    /// Cached pages parked in movable per-CPU caches.
2365    pub cached_movable_pages: usize,
2366    /// Effective free pages, including cached pages.
2367    pub free_pages: usize,
2368    /// Free pages tracked in movable free lists.
2369    pub movable_free_pages: usize,
2370    /// Free pages tracked in unmovable free lists.
2371    pub unmovable_free_pages: usize,
2372    /// Number of populated contiguous segments.
2373    pub segment_count: usize,
2374    /// Reserved segment-table capacity.
2375    pub segment_capacity: usize,
2376    /// Total number of pageblocks tracked across all segments.
2377    pub pageblock_count: usize,
2378    /// Pageblocks currently tagged unmovable.
2379    pub unmovable_pageblocks: usize,
2380    /// Pageblocks currently tagged movable.
2381    pub movable_pageblocks: usize,
2382    /// Minimum watermark.
2383    pub watermark_min: usize,
2384    /// Low watermark.
2385    pub watermark_low: usize,
2386    /// High watermark.
2387    pub watermark_high: usize,
2388    /// Low-memory reserve kept for lower-priority paths.
2389    pub lowmem_reserve_pages: usize,
2390    /// Largest currently available free order.
2391    pub largest_free_order: Option<u8>,
2392}
2393
2394impl ZoneStats {
2395    /// Empty snapshot entry for stack-allocated arrays.
2396    pub const fn empty() -> Self {
2397        Self {
2398            zone_type: ZoneType::DMA,
2399            base: 0,
2400            managed_pages: 0,
2401            present_pages: 0,
2402            spanned_pages: 0,
2403            reserved_pages: 0,
2404            allocated_pages: 0,
2405            cached_pages: 0,
2406            cached_unmovable_pages: 0,
2407            cached_movable_pages: 0,
2408            free_pages: 0,
2409            movable_free_pages: 0,
2410            unmovable_free_pages: 0,
2411            segment_count: 0,
2412            segment_capacity: 0,
2413            pageblock_count: 0,
2414            unmovable_pageblocks: 0,
2415            movable_pageblocks: 0,
2416            watermark_min: 0,
2417            watermark_low: 0,
2418            watermark_high: 0,
2419            lowmem_reserve_pages: 0,
2420            largest_free_order: None,
2421        }
2422    }
2423
2424    /// Returns the number of hole pages inside the zone span.
2425    #[inline]
2426    pub fn hole_pages(&self) -> usize {
2427        self.spanned_pages.saturating_sub(self.managed_pages)
2428    }
2429
2430    /// Returns the effective reserve floor enforced by policy.
2431    #[inline]
2432    pub fn reserve_floor_pages(&self) -> usize {
2433        self.watermark_min.saturating_add(self.lowmem_reserve_pages)
2434    }
2435
2436    /// Returns the free pages remaining after the reserve floor is discounted.
2437    #[inline]
2438    pub fn available_after_reserve_pages(&self) -> usize {
2439        self.free_pages.saturating_sub(self.reserve_floor_pages())
2440    }
2441
2442    /// Returns the derived pressure state from the current zone watermarks.
2443    pub fn pressure(&self) -> ZonePressure {
2444        let reserve_floor = self.reserve_floor_pages();
2445        let low_floor = self.watermark_low.saturating_add(self.lowmem_reserve_pages);
2446        let high_floor = self
2447            .watermark_high
2448            .saturating_add(self.lowmem_reserve_pages);
2449
2450        if self.free_pages <= reserve_floor {
2451            ZonePressure::Min
2452        } else if self.free_pages <= low_floor {
2453            ZonePressure::Low
2454        } else if self.free_pages <= high_floor {
2455            ZonePressure::High
2456        } else {
2457            ZonePressure::Healthy
2458        }
2459    }
2460}
2461
2462/// Snapshot of the last fragmentation-driven compaction assist attempt.
2463///
2464/// The fields are intentionally plain data so crash dumps and shell commands
2465/// can read them without locking or heap allocation.
2466#[derive(Debug, Clone, Copy)]
2467pub struct CompactionStats {
2468    /// Number of targeted compaction assists attempted after an allocation miss.
2469    pub attempts: usize,
2470    /// Number of attempts that yielded a successful retry.
2471    pub successes: usize,
2472    /// Last requested buddy order that triggered a targeted drain.
2473    pub last_order: Option<u8>,
2474    /// Mobility class of the last assisted allocation.
2475    pub last_migratetype: Option<Migratetype>,
2476    /// Zone selected as the preferred compaction target.
2477    pub last_zone: Option<ZoneType>,
2478    /// Pressure state observed on that zone before draining caches.
2479    pub last_pressure: Option<ZonePressure>,
2480    /// Fragmentation score that justified the assist path.
2481    pub last_fragmentation_score: usize,
2482    /// Pages requested by the original allocation.
2483    pub last_requested_pages: usize,
2484    /// Effective free pages left above reserves in the chosen zone.
2485    pub last_available_pages: usize,
2486    /// Free pages already available at or above the requested order.
2487    pub last_usable_pages: usize,
2488    /// Order-0 pages parked in local caches for the chosen zone.
2489    pub last_cached_pages: usize,
2490    /// Pages actually drained from local caches during the last attempt.
2491    pub last_drained_pages: usize,
2492    /// Total pageblocks tracked in the selected zone.
2493    pub last_pageblock_count: usize,
2494    /// Pageblocks already tagged with the requested migratetype.
2495    pub last_matching_pageblocks: usize,
2496}
2497
2498impl CompactionStats {
2499    /// Empty snapshot used before any assisted drain happened.
2500    pub const fn empty() -> Self {
2501        Self {
2502            attempts: 0,
2503            successes: 0,
2504            last_order: None,
2505            last_migratetype: None,
2506            last_zone: None,
2507            last_pressure: None,
2508            last_fragmentation_score: 0,
2509            last_requested_pages: 0,
2510            last_available_pages: 0,
2511            last_usable_pages: 0,
2512            last_cached_pages: 0,
2513            last_drained_pages: 0,
2514            last_pageblock_count: 0,
2515            last_matching_pageblocks: 0,
2516        }
2517    }
2518}
2519
2520impl BuddyAllocator {
2521    /// Fast totals without heap allocation (safe in low-level paths).
2522    pub fn page_totals(&self) -> (usize, usize) {
2523        let mut total_pages = 0usize;
2524        let mut allocated_pages = 0usize;
2525        for zone in &self.zones {
2526            total_pages = total_pages.saturating_add(zone.page_count);
2527            allocated_pages = allocated_pages.saturating_add(zone.allocated);
2528        }
2529        let cached_pages = LOCAL_CACHED_FRAMES.load(AtomicOrdering::Relaxed);
2530        allocated_pages = allocated_pages.saturating_sub(cached_pages);
2531        (total_pages, allocated_pages)
2532    }
2533
2534    /// Get a reference to a zone by index.
2535    pub fn get_zone(&self, idx: usize) -> &Zone {
2536        &self.zones[idx]
2537    }
2538
2539    /// Snapshot zones without heap allocation.
2540    /// Returns the number of entries written to `out`.
2541    pub fn zone_snapshot(&self, out: &mut [ZoneStats]) -> usize {
2542        let n = core::cmp::min(out.len(), self.zones.len());
2543        for (i, zone) in self.zones.iter().take(n).enumerate() {
2544            let cached_unmovable = local_cached_zone_migratetype_count(i, Migratetype::Unmovable);
2545            let cached_movable = local_cached_zone_migratetype_count(i, Migratetype::Movable);
2546            let cached = cached_unmovable.saturating_add(cached_movable);
2547            let pageblocks = Self::zone_pageblock_counts(zone);
2548            let mut free_by_type = zone.free_pages_by_migratetype();
2549            free_by_type[Migratetype::Unmovable.index()] =
2550                free_by_type[Migratetype::Unmovable.index()].saturating_add(cached_unmovable);
2551            free_by_type[Migratetype::Movable.index()] =
2552                free_by_type[Migratetype::Movable.index()].saturating_add(cached_movable);
2553            out[i] = ZoneStats {
2554                zone_type: zone.zone_type,
2555                base: zone.base.as_u64(),
2556                managed_pages: zone.page_count,
2557                present_pages: zone.present_pages,
2558                spanned_pages: zone.span_pages,
2559                reserved_pages: zone.reserved_pages,
2560                allocated_pages: zone.allocated.saturating_sub(cached),
2561                cached_pages: cached,
2562                cached_unmovable_pages: cached_unmovable,
2563                cached_movable_pages: cached_movable,
2564                free_pages: Self::zone_effective_free_pages(zone, i),
2565                movable_free_pages: free_by_type[Migratetype::Movable.index()],
2566                unmovable_free_pages: free_by_type[Migratetype::Unmovable.index()],
2567                segment_count: zone.segment_count,
2568                segment_capacity: zone.segment_capacity,
2569                pageblock_count: pageblocks[Migratetype::Unmovable.index()]
2570                    .saturating_add(pageblocks[Migratetype::Movable.index()]),
2571                unmovable_pageblocks: pageblocks[Migratetype::Unmovable.index()],
2572                movable_pageblocks: pageblocks[Migratetype::Movable.index()],
2573                watermark_min: zone.watermark_min,
2574                watermark_low: zone.watermark_low,
2575                watermark_high: zone.watermark_high,
2576                lowmem_reserve_pages: zone.lowmem_reserve_pages,
2577                largest_free_order: zone.largest_free_order(),
2578            };
2579        }
2580        n
2581    }
2582}