Skip to main content

strat9_kernel/memory/
buddy.rs

1// Buddy allocator implementation
2
3use crate::{
4    boot::entry::{MemoryKind, MemoryRegion},
5    memory::{
6        boot_alloc,
7        frame::{
8            frame_flags, get_meta, AllocError, FrameAllocator, PhysFrame, FRAME_META_LINK_NONE,
9        },
10        hhdm_offset, phys_to_virt,
11        zone::{BuddyBitmap, Zone, ZoneType, MAX_ORDER},
12    },
13    serial_println,
14    sync::{IrqDisabledToken, SpinLock, SpinLockGuard},
15};
16use core::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
17use x86_64::PhysAddr;
18
19const PAGE_SIZE: u64 = 4096;
20const DMA_MAX: u64 = 16 * 1024 * 1024;
21const NORMAL_MAX: u64 = 896 * 1024 * 1024;
22const LOCAL_CACHE_CAPACITY: usize = 256;
23const LOCAL_CACHE_REFILL_ORDER: u8 = 4;
24const LOCAL_CACHE_REFILL_FRAMES: usize = 1 << (LOCAL_CACHE_REFILL_ORDER as usize);
25const LOCAL_CACHE_FLUSH_BATCH: usize = 64;
26
27#[cfg(feature = "selftest")]
28macro_rules! buddy_dbg {
29    ($($arg:tt)*) => {
30        serial_println!($($arg)*);
31    };
32}
33
34#[cfg(not(feature = "selftest"))]
35macro_rules! buddy_dbg {
36    ($($arg:tt)*) => {};
37}
38
39pub struct BuddyAllocator {
40    zones: [Zone; ZoneType::COUNT],
41    /// Per-zone bitmap pool reserved from free memory: [start, end).
42    bitmap_pool: [(u64, u64); ZoneType::COUNT],
43}
44
45impl BuddyAllocator {
46    /// Creates a new instance.
47    pub const fn new() -> Self {
48        BuddyAllocator {
49            zones: [
50                Zone::new(ZoneType::DMA),
51                Zone::new(ZoneType::Normal),
52                Zone::new(ZoneType::HighMem),
53            ],
54            bitmap_pool: [(0, 0); ZoneType::COUNT],
55        }
56    }
57
58    /// Performs the init operation.
59    pub fn init(&mut self, memory_regions: &[MemoryRegion]) {
60        #[cfg(debug_assertions)]
61        debug_assert!(
62            hhdm_offset() != u64::MAX,
63            "HHDM offset sanity check failed unexpectedly"
64        );
65
66        serial_println!(
67            "Buddy allocator: initializing with {} memory regions",
68            memory_regions.len()
69        );
70        for (_protected_base, _protected_size) in
71            Self::protected_module_ranges().into_iter().flatten()
72        {
73            buddy_dbg!(
74                "  Protected module range: phys=0x{:x}..0x{:x}",
75                Self::align_down(_protected_base, PAGE_SIZE),
76                Self::align_up(_protected_base.saturating_add(_protected_size), PAGE_SIZE)
77            );
78        }
79
80        // Pass 1: compute per-zone address span (base + span_pages)
81        self.pass_count(memory_regions);
82
83        // Pass 2: reserve bitmap storage via the boot allocator.
84        self.pass_boot_alloc_and_setup_bitmaps();
85
86        // Pass 3: populate free lists from the boot allocator's remaining ranges.
87        let mut remaining = [MemoryRegion {
88            base: 0,
89            size: 0,
90            kind: MemoryKind::Reserved,
91        }; boot_alloc::MAX_BOOT_ALLOC_REGIONS];
92        let remaining_len = boot_alloc::snapshot_free_regions(&mut remaining);
93        self.pass_populate(&remaining[..remaining_len]);
94
95        // Seal the boot allocator: all its remaining free regions are now managed
96        // by buddy.  Any later boot_alloc::alloc_stack() call would otherwise
97        // double-allocate pages that buddy already tracks in its free lists.
98        boot_alloc::seal();
99
100        for zone in &self.zones {
101            serial_println!(
102                "  Zone {:?}: managed={} pages span={} pages ({} MB managed)",
103                zone.zone_type,
104                zone.page_count,
105                zone.span_pages,
106                (zone.page_count * 4096) / (1024 * 1024)
107            );
108        }
109    }
110
111    /// Performs the pass count operation.
112    fn pass_count(&mut self, memory_regions: &[MemoryRegion]) {
113        let mut min_base = [u64::MAX; ZoneType::COUNT];
114        let mut max_end = [0u64; ZoneType::COUNT];
115
116        for region in memory_regions {
117            for zi in 0..ZoneType::COUNT {
118                if let Some((start, end)) = Self::zone_intersection_aligned(region, zi) {
119                    if start < min_base[zi] {
120                        min_base[zi] = start;
121                    }
122                    if end > max_end[zi] {
123                        max_end[zi] = end;
124                    }
125                }
126            }
127        }
128
129        for zi in 0..ZoneType::COUNT {
130            let zone = &mut self.zones[zi];
131            zone.page_count = 0;
132            zone.allocated = 0;
133            zone.free_lists = [0; MAX_ORDER + 1];
134
135            if min_base[zi] == u64::MAX || max_end[zi] <= min_base[zi] {
136                zone.base = PhysAddr::new(0);
137                zone.span_pages = 0;
138                continue;
139            }
140
141            zone.base = PhysAddr::new(min_base[zi]);
142            zone.span_pages = ((max_end[zi] - min_base[zi]) / PAGE_SIZE) as usize;
143        }
144    }
145
146    /// Performs the pass steal and setup bitmaps operation.
147    fn pass_boot_alloc_and_setup_bitmaps(&mut self) {
148        for zi in 0..ZoneType::COUNT {
149            let zone_span = self.zones[zi].span_pages;
150            let needed_bytes = Self::bitmap_bytes_for_span(zone_span);
151            let reserved_bytes = Self::align_up(needed_bytes as u64, PAGE_SIZE);
152
153            if reserved_bytes == 0 {
154                self.bitmap_pool[zi] = (0, 0);
155                self.clear_zone_bitmaps(zi);
156                continue;
157            }
158
159            let pool_start = boot_alloc::alloc_bytes(needed_bytes, PAGE_SIZE as usize)
160                .unwrap_or_else(|| {
161                    panic!(
162                        "Buddy allocator: unable to reserve {} bytes for zone {:?} bitmaps",
163                        needed_bytes, self.zones[zi].zone_type
164                    )
165                })
166                .as_u64();
167            let pool_end = pool_start.saturating_add(reserved_bytes);
168            self.bitmap_pool[zi] = (pool_start, pool_end);
169            buddy_dbg!(
170                "  Zone {:?}: bitmap pool phys=0x{:x}..0x{:x} ({} bytes)",
171                self.zones[zi].zone_type,
172                pool_start,
173                pool_end,
174                needed_bytes
175            );
176
177            // Zero stolen pages to initialize all bitmaps to 0.
178            unsafe {
179                core::ptr::write_bytes(
180                    phys_to_virt(pool_start) as *mut u8,
181                    0,
182                    (pool_end - pool_start) as usize,
183                );
184            }
185
186            self.setup_zone_bitmaps(zi, pool_start, pool_end);
187        }
188    }
189
190    /// Performs the pass populate operation.
191    fn pass_populate(&mut self, memory_regions: &[MemoryRegion]) {
192        for region in memory_regions {
193            for zi in 0..ZoneType::COUNT {
194                let Some((start, end)) = Self::zone_intersection_aligned(region, zi) else {
195                    continue;
196                };
197                self.seed_range_as_free(zi, start, end);
198            }
199        }
200    }
201
202    /// Performs the clear zone bitmaps operation.
203    fn clear_zone_bitmaps(&mut self, zone_idx: usize) {
204        let zone = &mut self.zones[zone_idx];
205        zone.buddy_bitmaps = [BuddyBitmap::empty(); MAX_ORDER + 1];
206        #[cfg(debug_assertions)]
207        {
208            zone.alloc_bitmap = BuddyBitmap::empty();
209        }
210    }
211
212    /// Performs the setup zone bitmaps operation.
213    fn setup_zone_bitmaps(&mut self, zone_idx: usize, pool_start: u64, pool_end: u64) {
214        let zone = &mut self.zones[zone_idx];
215        let mut cursor = pool_start;
216
217        for order in 0..=MAX_ORDER {
218            let num_bits = Self::pairs_for_order(zone.span_pages, order as u8);
219            let num_bytes = Self::bits_to_bytes(num_bits) as u64;
220            if num_bits == 0 {
221                zone.buddy_bitmaps[order] = BuddyBitmap::empty();
222                continue;
223            }
224            debug_assert!(cursor + num_bytes <= pool_end);
225            zone.buddy_bitmaps[order] = BuddyBitmap {
226                data: phys_to_virt(cursor) as *mut u8,
227                num_bits,
228            };
229            cursor += num_bytes;
230        }
231
232        #[cfg(debug_assertions)]
233        {
234            let num_bits = zone.span_pages;
235            let num_bytes = Self::bits_to_bytes(num_bits) as u64;
236            if num_bits == 0 {
237                zone.alloc_bitmap = BuddyBitmap::empty();
238            } else {
239                debug_assert!(cursor + num_bytes <= pool_end);
240                zone.alloc_bitmap = BuddyBitmap {
241                    data: phys_to_virt(cursor) as *mut u8,
242                    num_bits,
243                };
244                cursor += num_bytes;
245            }
246        }
247
248        debug_assert!(cursor <= pool_end);
249    }
250
251    /// Performs the seed range as free operation.
252    fn seed_range_as_free(&mut self, zone_idx: usize, start: u64, end: u64) {
253        if start >= end {
254            return;
255        }
256        let zone = &mut self.zones[zone_idx];
257        let mut addr = start;
258        while addr < end {
259            if Self::is_protected_module_page(addr) {
260                buddy_dbg!(
261                    "  Zone {:?}: skip protected page 0x{:x}",
262                    zone.zone_type,
263                    addr
264                );
265                addr += PAGE_SIZE;
266                continue;
267            }
268            Self::insert_free_block(zone, addr, 0);
269            zone.page_count += 1;
270            addr += PAGE_SIZE;
271        }
272    }
273
274    /// Allocates from zone.
275    fn alloc_from_zone(zone: &mut Zone, order: u8, _token: &IrqDisabledToken) -> Option<PhysFrame> {
276        for cur_order in order..=MAX_ORDER as u8 {
277            let Some(frame_phys) = Self::free_list_pop(zone, cur_order) else {
278                continue;
279            };
280            let block_size = PAGE_SIZE << cur_order;
281            let block_end = frame_phys.saturating_add(block_size);
282            if Self::protected_overlap_end(frame_phys, block_end).is_some() {
283                // Inconsistency: a free block overlaps with protected kernel memory.
284                // This means seed_range_as_free() was incorrect.
285                panic!("Buddy allocator inconsistency: free block 0x{:x} order {} overlaps protected memory", frame_phys, cur_order);
286            }
287
288            // One block of this order transitions free -> allocated.
289            let _ = Self::toggle_pair(zone, frame_phys, cur_order);
290
291            // Split down to requested order.
292            let mut split_order = cur_order;
293            while split_order > order {
294                split_order -= 1;
295                let buddy_phys = frame_phys + ((1u64 << split_order) * PAGE_SIZE);
296                Self::mark_block_free(buddy_phys, split_order);
297                Self::free_list_push(zone, buddy_phys, split_order);
298                // Pair at split_order becomes (allocated, free).
299                let _ = Self::toggle_pair(zone, frame_phys, split_order);
300            }
301
302            zone.allocated += 1usize << order;
303            Self::mark_block_allocated(frame_phys, order);
304
305            #[cfg(debug_assertions)]
306            Self::mark_allocated(zone, frame_phys, order, true);
307
308            return PhysFrame::from_start_address(PhysAddr::new(frame_phys)).ok();
309        }
310        None
311    }
312
313    /// Releases to zone.
314    fn free_to_zone(zone: &mut Zone, frame: PhysFrame, order: u8, _token: &IrqDisabledToken) {
315        let frame_phys = frame.start_address.as_u64();
316        let block_size = PAGE_SIZE << order;
317        let block_end = frame_phys.saturating_add(block_size);
318
319        debug_assert!(order <= MAX_ORDER as u8);
320        debug_assert!(frame.start_address.is_aligned(PAGE_SIZE << order));
321        debug_assert!(zone.contains_address(frame.start_address));
322
323        if Self::protected_overlap_end(frame_phys, block_end).is_some() {
324            buddy_dbg!(
325                "  Zone {:?}: drop free overlap-protected 0x{:x}..0x{:x} order={}",
326                zone.zone_type,
327                frame_phys,
328                block_end,
329                order
330            );
331            return;
332        }
333
334        #[cfg(debug_assertions)]
335        Self::mark_allocated(zone, frame_phys, order, false);
336
337        Self::mark_block_free(frame_phys, order);
338        Self::insert_free_block(zone, frame_phys, order);
339        zone.allocated = zone.allocated.saturating_sub(1usize << order);
340    }
341
342    /// Linux-style parity-map coalescing insertion.
343    fn insert_free_block(zone: &mut Zone, frame_phys: u64, initial_order: u8) {
344        let mut current = frame_phys;
345        let mut order = initial_order;
346
347        loop {
348            let bit_is_set = Self::toggle_pair(zone, current, order);
349            if bit_is_set || order == MAX_ORDER as u8 {
350                Self::mark_block_free(current, order);
351                Self::free_list_push(zone, current, order);
352                break;
353            }
354
355            let Some(buddy) = Self::buddy_phys(zone, current, order) else {
356                Self::mark_block_free(current, order);
357                Self::free_list_push(zone, current, order);
358                break;
359            };
360
361            let removed = Self::free_list_remove(zone, buddy, order);
362            if !removed {
363                // Inconsistency fallback: keep allocator consistent.
364                debug_assert!(false, "buddy bitmap/list inconsistency while freeing");
365                Self::mark_block_free(current, order);
366                Self::free_list_push(zone, current, order);
367                break;
368            }
369
370            current = core::cmp::min(current, buddy);
371            order += 1;
372        }
373    }
374
375    /// Performs the page index operation.
376    #[inline]
377    fn page_index(zone: &Zone, phys: u64) -> usize {
378        debug_assert!(zone.span_pages > 0);
379        let base = zone.base.as_u64();
380        debug_assert!(phys >= base);
381        debug_assert!((phys - base).is_multiple_of(PAGE_SIZE));
382        ((phys - base) / PAGE_SIZE) as usize
383    }
384
385    /// Performs the pair index operation.
386    #[inline]
387    fn pair_index(zone: &Zone, phys: u64, order: u8) -> usize {
388        Self::page_index(zone, phys) >> (order as usize + 1)
389    }
390
391    /// Performs the toggle pair operation.
392    #[inline]
393    fn toggle_pair(zone: &mut Zone, phys: u64, order: u8) -> bool {
394        let bitmap = zone.buddy_bitmaps[order as usize];
395        if bitmap.is_empty() {
396            return true;
397        }
398        let idx = Self::pair_index(zone, phys, order);
399        debug_assert!(idx < bitmap.num_bits);
400        bitmap.toggle(idx)
401    }
402
403    /// Performs the buddy phys operation.
404    #[inline]
405    fn buddy_phys(zone: &Zone, phys: u64, order: u8) -> Option<u64> {
406        let base = zone.base.as_u64();
407        if phys < base {
408            return None;
409        }
410        let offset = phys - base;
411        let block_size = PAGE_SIZE << order;
412        let buddy_offset = offset ^ block_size;
413        let buddy_page = (buddy_offset / PAGE_SIZE) as usize;
414        if buddy_page >= zone.span_pages {
415            return None;
416        }
417        Some(base + buddy_offset)
418    }
419
420    /// Performs the mark allocated operation.
421    #[cfg(debug_assertions)]
422    fn mark_allocated(zone: &mut Zone, frame_phys: u64, order: u8, allocated: bool) {
423        if zone.alloc_bitmap.is_empty() {
424            return;
425        }
426        let start = Self::page_index(zone, frame_phys);
427        let count = 1usize << order;
428        for i in 0..count {
429            let bit = start + i;
430            debug_assert!(bit < zone.alloc_bitmap.num_bits);
431            if allocated {
432                debug_assert!(!zone.alloc_bitmap.test(bit), "double allocation detected");
433                zone.alloc_bitmap.set(bit);
434            } else {
435                debug_assert!(zone.alloc_bitmap.test(bit), "double free detected");
436                zone.alloc_bitmap.clear(bit);
437            }
438        }
439    }
440
441    /// Releases list push.
442    fn free_list_push(zone: &mut Zone, phys: u64, order: u8) {
443        let head = zone.free_lists[order as usize];
444        Self::write_free_prev(phys, 0);
445        Self::write_free_next(phys, head);
446        if head != 0 {
447            Self::write_free_prev(head, phys);
448        }
449        zone.free_lists[order as usize] = phys;
450    }
451
452    /// Releases list pop.
453    fn free_list_pop(zone: &mut Zone, order: u8) -> Option<u64> {
454        let head = zone.free_lists[order as usize];
455        if head == 0 {
456            return None;
457        }
458        let next = Self::read_free_next(head);
459        zone.free_lists[order as usize] = next;
460        if next != 0 {
461            Self::write_free_prev(next, 0);
462        }
463        Self::write_free_next(head, 0);
464        Self::write_free_prev(head, 0);
465        Some(head)
466    }
467
468    /// Releases list remove.
469    fn free_list_remove(zone: &mut Zone, phys: u64, order: u8) -> bool {
470        let prev = Self::read_free_prev(phys);
471        let next = Self::read_free_next(phys);
472
473        if prev == 0 {
474            if zone.free_lists[order as usize] != phys {
475                return false;
476            }
477            zone.free_lists[order as usize] = next;
478        } else {
479            Self::write_free_next(prev, next);
480        }
481
482        if next != 0 {
483            Self::write_free_prev(next, prev);
484        }
485
486        Self::write_free_next(phys, 0);
487        Self::write_free_prev(phys, 0);
488        true
489    }
490
491    /// Reads free next.
492    #[inline]
493    fn read_free_next(phys: u64) -> u64 {
494        let next = get_meta(PhysAddr::new(phys)).next();
495        if next == FRAME_META_LINK_NONE {
496            0
497        } else {
498            next
499        }
500    }
501
502    /// Writes free next.
503    #[inline]
504    fn write_free_next(phys: u64, next: u64) {
505        get_meta(PhysAddr::new(phys)).set_next(if next == 0 {
506            FRAME_META_LINK_NONE
507        } else {
508            next
509        });
510    }
511
512    /// Reads free prev.
513    #[inline]
514    fn read_free_prev(phys: u64) -> u64 {
515        let prev = get_meta(PhysAddr::new(phys)).prev();
516        if prev == FRAME_META_LINK_NONE {
517            0
518        } else {
519            prev
520        }
521    }
522
523    /// Writes free prev.
524    #[inline]
525    fn write_free_prev(phys: u64, prev: u64) {
526        get_meta(PhysAddr::new(phys)).set_prev(if prev == 0 {
527            FRAME_META_LINK_NONE
528        } else {
529            prev
530        });
531    }
532
533    /// Performs the zone index for addr operation.
534    fn zone_index_for_addr(addr: u64) -> usize {
535        if addr < DMA_MAX {
536            ZoneType::DMA as usize
537        } else if addr < NORMAL_MAX {
538            ZoneType::Normal as usize
539        } else {
540            ZoneType::HighMem as usize
541        }
542    }
543
544    /// Performs the zone bounds operation.
545    fn zone_bounds(zone_idx: usize) -> (u64, u64) {
546        match zone_idx {
547            x if x == ZoneType::DMA as usize => (0, DMA_MAX),
548            x if x == ZoneType::Normal as usize => (DMA_MAX, NORMAL_MAX),
549            _ => (NORMAL_MAX, u64::MAX),
550        }
551    }
552
553    /// Performs the zone intersection aligned operation.
554    fn zone_intersection_aligned(region: &MemoryRegion, zone_idx: usize) -> Option<(u64, u64)> {
555        if !matches!(region.kind, MemoryKind::Free | MemoryKind::Reclaim) {
556            return None;
557        }
558
559        let region_start = region.base;
560        let region_end = region.base.saturating_add(region.size);
561        let (zone_start, zone_end) = Self::zone_bounds(zone_idx);
562
563        let start = core::cmp::max(region_start, zone_start);
564        let end = core::cmp::min(region_end, zone_end);
565        if start >= end {
566            return None;
567        }
568
569        // Reserve physical address 0 as sentinel/not-usable.
570        let start = Self::align_up(core::cmp::max(start, PAGE_SIZE), PAGE_SIZE);
571        let end = Self::align_down(end, PAGE_SIZE);
572        if start >= end {
573            None
574        } else {
575            Some((start, end))
576        }
577    }
578
579    /// Performs the protected overlap end operation.
580    fn protected_overlap_end(start: u64, end: u64) -> Option<u64> {
581        for (base, size) in Self::protected_module_ranges().into_iter().flatten() {
582            if size == 0 {
583                continue;
584            }
585            let pstart = Self::align_down(base, PAGE_SIZE);
586            let pend = Self::align_up(base.saturating_add(size), PAGE_SIZE);
587            if end <= pstart || start >= pend {
588                continue;
589            }
590            return Some(pend);
591        }
592        None
593    }
594
595    /// Returns whether protected module page.
596    fn is_protected_module_page(phys: u64) -> bool {
597        let page = Self::align_down(phys, PAGE_SIZE);
598        for (base, size) in Self::protected_module_ranges().into_iter().flatten() {
599            if size == 0 {
600                continue;
601            }
602            let pstart = Self::align_down(base, PAGE_SIZE);
603            let pend = Self::align_up(base.saturating_add(size), PAGE_SIZE);
604            if page >= pstart && page < pend {
605                return true;
606            }
607        }
608        false
609    }
610
611    /// Performs the protected module ranges operation.
612    fn protected_module_ranges() -> [Option<(u64, u64)>; boot_alloc::MAX_PROTECTED_RANGES] {
613        boot_alloc::protected_module_ranges()
614    }
615
616    /// Performs the pairs for order operation.
617    #[inline]
618    fn pairs_for_order(span_pages: usize, order: u8) -> usize {
619        let pair_span = 1usize << (order as usize + 1);
620        span_pages.div_ceil(pair_span)
621    }
622
623    /// Performs the bits to bytes operation.
624    #[inline]
625    fn bits_to_bytes(bits: usize) -> usize {
626        bits.div_ceil(8)
627    }
628
629    /// Performs the bitmap bytes for span operation.
630    fn bitmap_bytes_for_span(span_pages: usize) -> usize {
631        let mut bytes = 0usize;
632        for order in 0..=MAX_ORDER as u8 {
633            bytes += Self::bits_to_bytes(Self::pairs_for_order(span_pages, order));
634        }
635        #[cfg(debug_assertions)]
636        {
637            bytes += Self::bits_to_bytes(span_pages);
638        }
639        bytes
640    }
641
642    /// Performs the align up operation.
643    #[inline]
644    fn align_up(value: u64, align: u64) -> u64 {
645        debug_assert!(align.is_power_of_two());
646        (value + align - 1) & !(align - 1)
647    }
648
649    /// Performs the align down operation.
650    #[inline]
651    fn align_down(value: u64, align: u64) -> u64 {
652        debug_assert!(align.is_power_of_two());
653        value & !(align - 1)
654    }
655
656    fn mark_block_allocated(frame_phys: u64, order: u8) {
657        Self::set_block_meta(frame_phys, order, frame_flags::ALLOCATED);
658    }
659
660    fn mark_block_free(frame_phys: u64, order: u8) {
661        Self::set_block_meta(frame_phys, order, frame_flags::FREE);
662    }
663
664    fn set_block_meta(frame_phys: u64, order: u8, flags: u32) {
665        let page_count = 1usize << order;
666        for page_idx in 0..page_count {
667            let phys = frame_phys + page_idx as u64 * PAGE_SIZE;
668            let meta = get_meta(PhysAddr::new(phys));
669            meta.set_flags(flags);
670            meta.set_order(order);
671            meta.set_next(FRAME_META_LINK_NONE);
672            meta.set_prev(FRAME_META_LINK_NONE);
673            meta.reset_refcount();
674        }
675    }
676}
677
678static BUDDY_ALLOCATOR: SpinLock<Option<BuddyAllocator>> = SpinLock::new(None);
679
680/// Returns the global buddy lock address for deadlock tracing.
681pub fn debug_buddy_lock_addr() -> usize {
682    &BUDDY_ALLOCATOR as *const _ as usize
683}
684
685/// Per-CPU flag to detect recursive allocations (deadlocks from logs/interrupts)
686static ALLOC_IN_PROGRESS: [core::sync::atomic::AtomicBool; crate::arch::x86_64::percpu::MAX_CPUS] =
687    [const { core::sync::atomic::AtomicBool::new(false) }; crate::arch::x86_64::percpu::MAX_CPUS];
688
689struct LocalFrameCache {
690    len: usize,
691    frames: [u64; LOCAL_CACHE_CAPACITY],
692}
693
694impl LocalFrameCache {
695    const fn new() -> Self {
696        Self {
697            len: 0,
698            frames: [0; LOCAL_CACHE_CAPACITY],
699        }
700    }
701
702    fn clear(&mut self) {
703        self.len = 0;
704    }
705
706    fn pop(&mut self) -> Option<PhysFrame> {
707        if self.len == 0 {
708            return None;
709        }
710        self.len -= 1;
711        Some(PhysFrame {
712            start_address: PhysAddr::new(self.frames[self.len]),
713        })
714    }
715
716    fn push(&mut self, frame: PhysFrame) -> Result<(), PhysFrame> {
717        if self.len >= LOCAL_CACHE_CAPACITY {
718            return Err(frame);
719        }
720        self.frames[self.len] = frame.start_address.as_u64();
721        self.len += 1;
722        Ok(())
723    }
724
725    fn pop_many(&mut self, out: &mut [u64]) -> usize {
726        let count = core::cmp::min(self.len, out.len());
727        for slot in out.iter_mut().take(count) {
728            self.len -= 1;
729            *slot = self.frames[self.len];
730        }
731        count
732    }
733}
734
735static LOCAL_FRAME_CACHES: [SpinLock<LocalFrameCache>; crate::arch::x86_64::percpu::MAX_CPUS] =
736    [const { SpinLock::new(LocalFrameCache::new()) }; crate::arch::x86_64::percpu::MAX_CPUS];
737static LOCAL_CACHED_FRAMES: AtomicUsize = AtomicUsize::new(0);
738static LOCAL_CACHED_ZONE_FRAMES: [AtomicUsize; ZoneType::COUNT] =
739    [const { AtomicUsize::new(0) }; ZoneType::COUNT];
740
741type GlobalGuard = SpinLockGuard<'static, Option<BuddyAllocator>>;
742
743struct OnDemandGlobalLock {
744    guard: Option<GlobalGuard>,
745}
746
747impl OnDemandGlobalLock {
748    fn new() -> Self {
749        Self { guard: None }
750    }
751
752    fn unlock(&mut self) {
753        self.guard = None;
754    }
755
756    fn with_allocator<R>(
757        &mut self,
758        f: impl FnOnce(&mut BuddyAllocator, &IrqDisabledToken) -> R,
759    ) -> Option<R> {
760        let guard = self.guard.get_or_insert_with(|| BUDDY_ALLOCATOR.lock());
761        guard.with_mut_and_token(|slot, token| slot.as_mut().map(|allocator| f(allocator, token)))
762    }
763
764    fn alloc(&mut self, order: u8) -> Result<PhysFrame, AllocError> {
765        self.with_allocator(|allocator, token| allocator.alloc(order, token))
766            .unwrap_or(Err(AllocError::OutOfMemory))
767    }
768
769    fn free(&mut self, frame: PhysFrame, order: u8) {
770        let _ = self.with_allocator(|allocator, token| allocator.free(frame, order, token));
771    }
772
773    fn free_phys_batch(&mut self, phys_batch: &[u64], count: usize) {
774        if count == 0 {
775            return;
776        }
777        let _ = self.with_allocator(|allocator, token| {
778            for phys in phys_batch.iter().take(count).copied() {
779                allocator.free(
780                    PhysFrame {
781                        start_address: PhysAddr::new(phys),
782                    },
783                    0,
784                    token,
785                );
786            }
787        });
788    }
789}
790
791#[inline]
792fn zone_index_for_phys(phys: u64) -> usize {
793    if phys < DMA_MAX {
794        ZoneType::DMA as usize
795    } else if phys < NORMAL_MAX {
796        ZoneType::Normal as usize
797    } else {
798        ZoneType::HighMem as usize
799    }
800}
801
802#[inline]
803fn is_cacheable_phys(phys: u64) -> bool {
804    zone_index_for_phys(phys) == ZoneType::Normal as usize
805}
806
807#[inline]
808fn local_cached_inc_phys(phys: u64) {
809    LOCAL_CACHED_FRAMES.fetch_add(1, AtomicOrdering::Relaxed);
810    LOCAL_CACHED_ZONE_FRAMES[zone_index_for_phys(phys)].fetch_add(1, AtomicOrdering::Relaxed);
811}
812
813#[inline]
814fn local_cached_dec_phys(phys: u64) {
815    let prev_total = LOCAL_CACHED_FRAMES.fetch_sub(1, AtomicOrdering::Relaxed);
816    debug_assert!(prev_total > 0);
817    let zone = zone_index_for_phys(phys);
818    let prev_zone = LOCAL_CACHED_ZONE_FRAMES[zone].fetch_sub(1, AtomicOrdering::Relaxed);
819    debug_assert!(prev_zone > 0);
820}
821
822fn drain_local_caches_to_global(max_pages: usize, global: &mut OnDemandGlobalLock) -> usize {
823    if max_pages == 0 {
824        return 0;
825    }
826
827    let mut drained = 0usize;
828    let mut batch = [0u64; LOCAL_CACHE_FLUSH_BATCH];
829    for cpu in 0..crate::arch::x86_64::percpu::MAX_CPUS {
830        if drained >= max_pages {
831            break;
832        }
833        let target = core::cmp::min(batch.len(), max_pages.saturating_sub(drained));
834        if target == 0 {
835            break;
836        }
837
838        let popped = {
839            let mut cache = LOCAL_FRAME_CACHES[cpu].lock();
840            cache.pop_many(&mut batch[..target])
841        };
842        if popped == 0 {
843            continue;
844        }
845
846        for phys in batch.iter().take(popped).copied() {
847            local_cached_dec_phys(phys);
848        }
849        global.free_phys_batch(&batch, popped);
850
851        // Keep lock acquisition on-demand during cross-CPU draining.
852        global.unlock();
853        drained += popped;
854    }
855
856    drained
857}
858
859/// Initializes buddy allocator.
860pub fn init_buddy_allocator(memory_regions: &[MemoryRegion]) {
861    for cache in &LOCAL_FRAME_CACHES {
862        cache.lock().clear();
863    }
864    LOCAL_CACHED_FRAMES.store(0, AtomicOrdering::Relaxed);
865    for zone_cached in &LOCAL_CACHED_ZONE_FRAMES {
866        zone_cached.store(0, AtomicOrdering::Relaxed);
867    }
868
869    let mut allocator = BuddyAllocator::new();
870    allocator.init(memory_regions);
871    *BUDDY_ALLOCATOR.lock() = Some(allocator);
872}
873
874/// Returns allocator.
875pub fn get_allocator() -> &'static SpinLock<Option<BuddyAllocator>> {
876    &BUDDY_ALLOCATOR
877}
878
879fn refill_local_cache(
880    cpu_idx: usize,
881    global: &mut OnDemandGlobalLock,
882) -> Result<PhysFrame, AllocError> {
883    // Critical path: refill in batches from the global allocator to amortize lock contention.
884    let (base, order) = match global.alloc(LOCAL_CACHE_REFILL_ORDER) {
885        Ok(frame) => (frame, LOCAL_CACHE_REFILL_ORDER),
886        Err(AllocError::OutOfMemory) => (global.alloc(0)?, 0),
887        Err(e) => return Err(e),
888    };
889    global.unlock();
890
891    let frame_count = 1usize << order;
892    let mut overflow = [0u64; LOCAL_CACHE_REFILL_FRAMES];
893    let mut overflow_len = 0usize;
894    let mut ret = None;
895
896    {
897        let mut cache = LOCAL_FRAME_CACHES[cpu_idx].lock();
898        for idx in 0..frame_count {
899            let phys = base.start_address.as_u64() + (idx as u64) * PAGE_SIZE;
900            let frame = PhysFrame {
901                start_address: PhysAddr::new(phys),
902            };
903            if !is_cacheable_phys(phys) {
904                overflow[overflow_len] = phys;
905                overflow_len += 1;
906                continue;
907            }
908            if ret.is_none() {
909                ret = Some(frame);
910                continue;
911            }
912            if cache.push(frame).is_ok() {
913                local_cached_inc_phys(phys);
914            } else {
915                overflow[overflow_len] = phys;
916                overflow_len += 1;
917            }
918        }
919    }
920
921    if overflow_len != 0 {
922        global.free_phys_batch(&overflow, overflow_len);
923    }
924
925    ret.ok_or(AllocError::OutOfMemory)
926}
927
928fn steal_from_other_caches(cpu_idx: usize) -> Option<PhysFrame> {
929    let cpu_count = crate::arch::x86_64::percpu::cpu_count()
930        .max(1)
931        .min(crate::arch::x86_64::percpu::MAX_CPUS);
932
933    for step in 1..cpu_count {
934        let peer = (cpu_idx + step) % cpu_count;
935        let mut cache = LOCAL_FRAME_CACHES[peer].lock();
936        if let Some(frame) = cache.pop() {
937            local_cached_dec_phys(frame.start_address.as_u64());
938            return Some(frame);
939        }
940    }
941    None
942}
943
944fn alloc_order0_cached() -> Result<PhysFrame, AllocError> {
945    let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
946
947    {
948        let mut cache = LOCAL_FRAME_CACHES[cpu_idx].lock();
949        if let Some(frame) = cache.pop() {
950            local_cached_dec_phys(frame.start_address.as_u64());
951            return Ok(frame);
952        }
953    }
954
955    let mut global = OnDemandGlobalLock::new();
956
957    if let Ok(frame) = refill_local_cache(cpu_idx, &mut global) {
958        return Ok(frame);
959    }
960    // Critical lock-order rule: never hold global while probing local caches.
961    global.unlock();
962
963    if let Some(frame) = steal_from_other_caches(cpu_idx) {
964        return Ok(frame);
965    }
966
967    global.alloc(0)
968}
969
970fn free_order0_cached(frame: PhysFrame) {
971    if !is_cacheable_phys(frame.start_address.as_u64()) {
972        let mut global = OnDemandGlobalLock::new();
973        global.free(frame, 0);
974        return;
975    }
976
977    let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
978    let mut spill = [0u64; LOCAL_CACHE_FLUSH_BATCH];
979
980    let spill_len = {
981        let mut cache = LOCAL_FRAME_CACHES[cpu_idx].lock();
982        if cache.push(frame).is_ok() {
983            local_cached_inc_phys(frame.start_address.as_u64());
984            return;
985        }
986
987        let mut spill_len = cache.pop_many(&mut spill);
988        for phys in spill.iter().take(spill_len).copied() {
989            local_cached_dec_phys(phys);
990        }
991
992        if cache.push(frame).is_ok() {
993            local_cached_inc_phys(frame.start_address.as_u64());
994        } else {
995            spill[spill_len] = frame.start_address.as_u64();
996            spill_len += 1;
997        }
998        spill_len
999    };
1000
1001    if spill_len != 0 {
1002        let mut global = OnDemandGlobalLock::new();
1003        global.free_phys_batch(&spill, spill_len);
1004    }
1005}
1006
1007/// Allocate frames with per-CPU caching on order-0 requests.
1008///
1009/// `_token` is a compile-time proof that interrupts are disabled on the calling CPU,
1010/// preventing re-entrant allocation through an interrupt handler on the same lock.
1011pub fn alloc(_token: &IrqDisabledToken, order: u8) -> Result<PhysFrame, AllocError> {
1012    if crate::silo::debug_boot_reg_active() {
1013        crate::serial_println!(
1014            "[trace][buddy] alloc enter order={} buddy_lock={:#x}",
1015            order,
1016            &BUDDY_ALLOCATOR as *const _ as usize
1017        );
1018    }
1019    if order == 0 {
1020        alloc_order0_cached()
1021    } else {
1022        let mut global = OnDemandGlobalLock::new();
1023        match global.alloc(order) {
1024            Ok(frame) => Ok(frame),
1025            Err(AllocError::OutOfMemory) => {
1026                // Critical lock-order rule: release global before draining local caches.
1027                global.unlock();
1028                let _ = drain_local_caches_to_global(usize::MAX, &mut global);
1029                global.alloc(order)
1030            }
1031            Err(e) => Err(e),
1032        }
1033    }
1034}
1035
1036/// Free frames with per-CPU caching on order-0 requests.
1037///
1038/// `_token` is a compile-time proof that interrupts are disabled on the calling CPU.
1039pub fn free(_token: &IrqDisabledToken, frame: PhysFrame, order: u8) {
1040    if order == 0 {
1041        free_order0_cached(frame);
1042    } else {
1043        let mut global = OnDemandGlobalLock::new();
1044        global.free(frame, order);
1045    }
1046}
1047
1048impl FrameAllocator for BuddyAllocator {
1049    /// Performs the alloc operation.
1050    fn alloc(&mut self, order: u8, token: &IrqDisabledToken) -> Result<PhysFrame, AllocError> {
1051        if order > MAX_ORDER as u8 {
1052            return Err(AllocError::InvalidOrder);
1053        }
1054
1055        let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
1056        if ALLOC_IN_PROGRESS[cpu_idx].swap(true, core::sync::atomic::Ordering::Acquire) {
1057            panic!("Recursive allocation detected on CPU {}!", cpu_idx);
1058        }
1059
1060        let result = (|| {
1061            for zi in [
1062                ZoneType::Normal as usize,
1063                ZoneType::HighMem as usize,
1064                ZoneType::DMA as usize,
1065            ] {
1066                if let Some(frame) = Self::alloc_from_zone(&mut self.zones[zi], order, token) {
1067                    return Ok(frame);
1068                }
1069            }
1070            Err(AllocError::OutOfMemory)
1071        })();
1072
1073        ALLOC_IN_PROGRESS[cpu_idx].store(false, core::sync::atomic::Ordering::Release);
1074        result
1075    }
1076
1077    /// Performs the free operation.
1078    fn free(&mut self, frame: PhysFrame, order: u8, token: &IrqDisabledToken) {
1079        let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
1080        if ALLOC_IN_PROGRESS[cpu_idx].swap(true, core::sync::atomic::Ordering::Acquire) {
1081            panic!("Recursive deallocation detected on CPU {}!", cpu_idx);
1082        }
1083
1084        let frame_phys = frame.start_address.as_u64();
1085        let zi = Self::zone_index_for_addr(frame_phys);
1086        let zone = &mut self.zones[zi];
1087        Self::free_to_zone(zone, frame, order, token);
1088
1089        ALLOC_IN_PROGRESS[cpu_idx].store(false, core::sync::atomic::Ordering::Release);
1090    }
1091}
1092
1093impl BuddyAllocator {
1094    /// Allocate explicitly from one zone (e.g. DMA-only callers).
1095    pub fn alloc_zone(
1096        &mut self,
1097        order: u8,
1098        zone: ZoneType,
1099        token: &IrqDisabledToken,
1100    ) -> Result<PhysFrame, AllocError> {
1101        if order > MAX_ORDER as u8 {
1102            return Err(AllocError::InvalidOrder);
1103        }
1104
1105        let cpu_idx = crate::arch::x86_64::percpu::current_cpu_index();
1106        if ALLOC_IN_PROGRESS[cpu_idx].swap(true, core::sync::atomic::Ordering::Acquire) {
1107            panic!("Recursive allocation detected on CPU {}!", cpu_idx);
1108        }
1109
1110        let result = Self::alloc_from_zone(&mut self.zones[zone as usize], order, token)
1111            .ok_or(AllocError::OutOfMemory);
1112
1113        ALLOC_IN_PROGRESS[cpu_idx].store(false, core::sync::atomic::Ordering::Release);
1114        result
1115    }
1116}
1117
1118/// Statistics for a single memory zone
1119#[derive(Debug, Clone, Copy)]
1120pub struct ZoneStats {
1121    pub zone_type: ZoneType,
1122    pub base: u64,
1123    pub page_count: usize,
1124    pub allocated: usize,
1125}
1126
1127impl BuddyAllocator {
1128    /// Fast totals without heap allocation (safe in low-level paths).
1129    pub fn page_totals(&self) -> (usize, usize) {
1130        let mut total_pages = 0usize;
1131        let mut allocated_pages = 0usize;
1132        for zone in &self.zones {
1133            total_pages = total_pages.saturating_add(zone.page_count);
1134            allocated_pages = allocated_pages.saturating_add(zone.allocated);
1135        }
1136        let cached_pages = LOCAL_CACHED_FRAMES.load(AtomicOrdering::Relaxed);
1137        allocated_pages = allocated_pages.saturating_sub(cached_pages);
1138        (total_pages, allocated_pages)
1139    }
1140
1141    /// Snapshot zones without heap allocation.
1142    /// Returns the number of entries written to `out`.
1143    pub fn zone_snapshot(&self, out: &mut [(u8, u64, usize, usize)]) -> usize {
1144        let n = core::cmp::min(out.len(), self.zones.len());
1145        for (i, zone) in self.zones.iter().take(n).enumerate() {
1146            let cached = LOCAL_CACHED_ZONE_FRAMES[i].load(AtomicOrdering::Relaxed);
1147            out[i] = (
1148                zone.zone_type as u8,
1149                zone.base.as_u64(),
1150                zone.page_count,
1151                zone.allocated.saturating_sub(cached),
1152            );
1153        }
1154        n
1155    }
1156}