Skip to main content

strat9_kernel/shell/commands/sys/
heap.rs

1//! Allocator inspection and stress tests for the kernel shell.
2//!
3//! Commands such as `heap live` print task ids, addresses, and source locations
4//! from vmalloc attribution : useful for debugging but **sensitive** if the
5//! serial console is exposed; restrict shell access accordingly.
6
7use crate::{
8    alloc::string::String,
9    shell::{output::format_bytes, ShellError},
10    shell_println,
11};
12
13const HEAP_USAGE: &str = "Usage: heap [summary|vmalloc|live [limit]|fail|diag|stress [rounds]]";
14const MAX_LIVE_LIMIT: usize = 4096;
15
16/// Maximum number of pointers held on the stack during a slab stress test.
17/// Sized to cover the densest class (8 B, 509 blocks/page) over 1 page.
18const STRESS_MAX_BLOCKS: usize = 512;
19const STRESS_WORKLOAD_OBSERVE_TICKS: u64 = 100;
20const STRESS_WORKLOAD_EXIT_TIMEOUT_TICKS: u64 = 1_000;
21
22enum StressOutcome {
23    Pass,
24    Fail(&'static str),
25    Skip(&'static str),
26}
27
28/// `heap` : allocator telemetry and diagnostics.
29pub fn cmd_heap(args: &[String]) -> Result<(), ShellError> {
30    match args.first().map(|s| s.as_str()) {
31        None | Some("summary") => cmd_heap_summary(),
32        Some("vmalloc") => cmd_heap_vmalloc(),
33        Some("live") => cmd_heap_live(args.get(1)),
34        Some("fail") => cmd_heap_fail(),
35        Some("diag") => {
36            crate::memory::heap::dump_diagnostics();
37            Ok(())
38        }
39        Some("stress") => cmd_heap_stress(args.get(1)),
40        Some(_) => {
41            shell_println!("{}", HEAP_USAGE);
42            Err(ShellError::InvalidArguments)
43        }
44    }
45}
46
47fn cmd_heap_summary() -> Result<(), ShellError> {
48    let (total_pages, allocated_pages) = {
49        let Some(guard) = crate::memory::buddy::get_allocator().try_lock() else {
50            shell_println!("heap: buddy allocator busy, retry");
51            return Ok(());
52        };
53        let Some(alloc) = guard.as_ref() else {
54            shell_println!("heap: allocator not initialized");
55            return Ok(());
56        };
57        alloc.page_totals()
58    };
59
60    let free_pages = total_pages.saturating_sub(allocated_pages);
61    let fail_counts = crate::memory::buddy::buddy_alloc_fail_counts_snapshot();
62    let slab = crate::memory::heap::slab_diag_snapshot();
63    let phys = crate::memory::phys_contiguous_diag();
64
65    let (total_val, total_unit) = format_bytes(total_pages.saturating_mul(4096));
66    let (used_val, used_unit) = format_bytes(allocated_pages.saturating_mul(4096));
67    let (free_val, free_unit) = format_bytes(free_pages.saturating_mul(4096));
68
69    shell_println!("Heap summary:");
70    shell_println!(
71        "  Buddy: total={} {} used={} {} free={} {}",
72        total_val,
73        total_unit,
74        used_val,
75        used_unit,
76        free_val,
77        free_unit
78    );
79    shell_println!(
80        "  Slab: pages_allocated={} pages_reclaimed={} pages_live={}",
81        slab.pages_allocated,
82        slab.pages_reclaimed,
83        slab.pages_live
84    );
85    shell_println!(
86        "  Phys-contig: live_pages={} alloc_failures={}",
87        phys.pages_live,
88        phys.alloc_fail_count
89    );
90
91    let mut any_fail = false;
92    for count in fail_counts {
93        if count != 0 {
94            any_fail = true;
95            break;
96        }
97    }
98    if any_fail {
99        let mut line = alloc::string::String::from("  Buddy failures:");
100        for (order, count) in fail_counts.iter().enumerate() {
101            if *count != 0 {
102                use core::fmt::Write;
103                let _ = write!(line, " o{}={} ", order, count);
104            }
105        }
106        shell_println!("{}", line);
107    }
108
109    Ok(())
110}
111
112fn cmd_heap_vmalloc() -> Result<(), ShellError> {
113    let Some(diag) = crate::memory::vmalloc::diag_snapshot() else {
114        shell_println!("vmalloc: not initialized");
115        return Ok(());
116    };
117
118    shell_println!("Vmalloc:");
119    shell_println!("  Arena: 0x{:x}..0x{:x}", diag.arena_start, diag.arena_end);
120    shell_println!(
121        "  Usage: allocs={} alloc_pages={} free_pages={} peak_pages={}",
122        diag.alloc_count,
123        diag.allocated_pages,
124        diag.free_pages,
125        diag.peak_pages
126    );
127    shell_println!(
128        "  Meta: extents={} largest_free_pages={} metadata_pages={} node_pool_free={}",
129        diag.free_extent_count,
130        diag.largest_free_pages,
131        diag.metadata_pages,
132        diag.node_pool_free
133    );
134    shell_println!(
135        "  Failures: vmalloc={} policy_rejects={} total_seq={}",
136        diag.fail_count,
137        diag.policy_rejects,
138        diag.total_seq
139    );
140    if let Some(last) = diag.last_failure {
141        shell_println!(
142            "  Last failure: size={} pages={} error={:?}",
143            last.size,
144            last.pages,
145            last.error
146        );
147    }
148
149    Ok(())
150}
151
152fn cmd_heap_live(limit_arg: Option<&String>) -> Result<(), ShellError> {
153    let limit = match limit_arg {
154        None => 32usize,
155        Some(raw) => raw
156            .parse::<usize>()
157            .map_err(|_| ShellError::InvalidArguments)?
158            .min(MAX_LIVE_LIMIT),
159    };
160    if limit == 0 {
161        shell_println!("{}", HEAP_USAGE);
162        return Err(ShellError::InvalidArguments);
163    }
164
165    let mut rows = alloc::vec::Vec::new();
166    rows.resize(
167        limit,
168        crate::memory::vmalloc::VmallocLiveAllocationSnapshot {
169            seq: 0,
170            task_id: 0,
171            pid: 0,
172            tid: 0,
173            silo_id: 0,
174            size: 0,
175            pages: 0,
176            vaddr: 0,
177            backend: crate::memory::vmalloc::VmallocAllocBackend::KernelVirtual,
178            caller_file: "",
179            caller_line: 0,
180            caller_column: 0,
181        },
182    );
183
184    let Some(diag) = crate::memory::vmalloc::diag_snapshot() else {
185        shell_println!("vmalloc: not initialized");
186        return Ok(());
187    };
188    let count = crate::memory::vmalloc::live_allocations_snapshot(&mut rows[..]);
189    // Non-atomic diagnostic read: alloc_count and live rows are obtained under
190    // separate VMALLOC acquisitions, so they may reflect slightly different
191    // moments in time under concurrent allocation traffic.
192    let total_live = diag.alloc_count;
193
194    if count == 0 {
195        shell_println!("heap live: no active vmalloc allocations");
196        return Ok(());
197    }
198
199    shell_println!("Live vmalloc allocations:");
200    for entry in rows.iter().take(count) {
201        shell_println!(
202            "  seq={} task={} pid={} tid={} silo={} size={} pages={} vaddr=0x{:x} caller={}:{}:{}",
203            entry.seq,
204            entry.task_id,
205            entry.pid,
206            entry.tid,
207            entry.silo_id,
208            entry.size,
209            entry.pages,
210            entry.vaddr,
211            entry.caller_file,
212            entry.caller_line,
213            entry.caller_column
214        );
215    }
216    let hidden = total_live.saturating_sub(count);
217    if hidden != 0 {
218        shell_println!("  ... {} more allocation(s) not shown", hidden);
219    }
220
221    Ok(())
222}
223
224fn cmd_heap_fail() -> Result<(), ShellError> {
225    match crate::memory::heap::last_heap_failure_snapshot() {
226        Some(failure) => shell_println!(
227            "Last heap failure: backend={:?} requested={} align={} effective={} error={:?}",
228            failure.backend,
229            failure.requested_size,
230            failure.align,
231            failure.effective_size,
232            failure.error
233        ),
234        None => shell_println!("Last heap failure: none"),
235    }
236
237    match crate::memory::vmalloc::last_failure_snapshot() {
238        Some(last) => shell_println!(
239            "Last vmalloc failure: size={} pages={} error={:?}",
240            last.size,
241            last.pages,
242            last.error
243        ),
244        None => shell_println!("Last vmalloc failure: none"),
245    }
246
247    Ok(())
248}
249
250// =============================================================================
251// Stress tests
252// =============================================================================
253
254/// `heap stress [rounds]` : exercise allocator smoke paths and one bounded
255/// userspace workload path.
256///
257/// Each round runs:
258///   slab_reclaim[S]    : fill/drain slab classes without leaking.
259///   slab_frag[256]     : verify a page becomes partial after partial free.
260///   vmalloc_cycle      : alloc/free vmalloc ranges and verify live tracking.
261///   vmalloc_frag       : random-size allocs freed in random order; checks that
262///                        virtual fragmentation is observable and all pages are
263///                        returned after drain.
264///   telemetry          : sanity-check counters are self-consistent.
265///   userspace_workload : launch `/initfs/test_mem_stressed` in a silo and
266///                        observe its lifecycle for a bounded period.
267fn cmd_heap_stress(rounds_arg: Option<&String>) -> Result<(), ShellError> {
268    let rounds = match rounds_arg {
269        None => 1,
270        Some(r) => r
271            .parse::<usize>()
272            .map_err(|_| ShellError::InvalidArguments)?
273            .max(1)
274            .min(32),
275    };
276
277    shell_println!("heap stress: {} round(s)", rounds);
278
279    let mut total_pass = 0usize;
280    let mut total_fail = 0usize;
281    let mut total_skip = 0usize;
282
283    for round in 0..rounds {
284        if crate::shell::is_interrupted() {
285            shell_println!("^C");
286            return Ok(());
287        }
288        if rounds > 1 {
289            shell_println!("--- round {}/{} ---", round + 1, rounds);
290        }
291
292        let mut run = |name: &str, result: StressOutcome| match result {
293            StressOutcome::Pass => {
294                shell_println!("  {:<36} PASS", name);
295                total_pass += 1;
296            }
297            StressOutcome::Fail(msg) => {
298                shell_println!("  {:<36} FAIL  {}", name, msg);
299                total_fail += 1;
300            }
301            StressOutcome::Skip(msg) => {
302                shell_println!("  {:<36} SKIP  {}", name, msg);
303                total_skip += 1;
304            }
305        };
306
307        run("slab_reclaim[2048 ci=25 p=4]", stress_slab_reclaim(25, 4));
308        run("slab_reclaim[512 ci=17 p=3]", stress_slab_reclaim(17, 3));
309        run("slab_reclaim[64 ci=5 p=2]", stress_slab_reclaim(5, 2));
310        run("slab_reclaim[8 ci=0 p=1]", stress_slab_reclaim(0, 1));
311        run("slab_frag[256 ci=13]", stress_slab_frag(13));
312        run("vmalloc_cycle", stress_vmalloc_cycle());
313        run("vmalloc_frag", stress_vmalloc_frag());
314        run("telemetry_consistency", stress_telemetry());
315        run("userspace_workload", stress_userspace_workload());
316    }
317
318    shell_println!(
319        "heap stress: {} passed, {} failed, {} skipped",
320        total_pass,
321        total_fail,
322        total_skip
323    );
324    if total_fail == 0 {
325        Ok(())
326    } else {
327        Err(ShellError::ExecutionFailed)
328    }
329}
330
331// ---------------------------------------------------------------------------
332// Sub-test: slab_reclaim
333//
334// Allocate `pages` worth of slab objects for class `ci`, then free them all.
335// This is a smoke test for fill/drain behavior, not an isolated proof about
336// global reclaim counters under concurrent allocator activity.
337// ---------------------------------------------------------------------------
338fn stress_slab_reclaim(ci: usize, pages: usize) -> StressOutcome {
339    use alloc::alloc::{alloc, dealloc, Layout};
340
341    let class_size = crate::memory::heap::slab_class_size(ci);
342    let blocks_per_page = crate::memory::heap::slab_blocks_per_page(ci);
343    let total = pages * blocks_per_page;
344
345    if total > STRESS_MAX_BLOCKS {
346        return StressOutcome::Skip("test config exceeds STRESS_MAX_BLOCKS");
347    }
348
349    let layout = match Layout::from_size_align(class_size, 8) {
350        Ok(layout) => layout,
351        Err(_) => return StressOutcome::Fail("Layout::from_size_align failed"),
352    };
353
354    let before = crate::memory::heap::slab_diag_snapshot();
355
356    let mut ptrs = [core::ptr::null_mut::<u8>(); STRESS_MAX_BLOCKS];
357    let mut count = 0usize;
358
359    for i in 0..total {
360        if crate::shell::is_interrupted() {
361            for j in 0..count {
362                unsafe { dealloc(ptrs[j], layout) };
363            }
364            return StressOutcome::Skip("interrupted");
365        }
366        let ptr = unsafe { alloc(layout) };
367        if ptr.is_null() {
368            for j in 0..count {
369                unsafe { dealloc(ptrs[j], layout) };
370            }
371            return StressOutcome::Fail("OOM during slab reclaim fill phase");
372        }
373        ptrs[i] = ptr;
374        count += 1;
375    }
376
377    for i in 0..count {
378        unsafe { dealloc(ptrs[i], layout) };
379        ptrs[i] = core::ptr::null_mut();
380    }
381
382    let after = crate::memory::heap::slab_diag_snapshot();
383
384    // Check 1: pages_live must return to (at most) baseline + 1.
385    // Tolerates ±1 for atomic snapshot races between the two load() calls.
386    if after.pages_live > before.pages_live.saturating_add(1) {
387        return StressOutcome::Fail("slab pages not fully reclaimed after freeing all blocks");
388    }
389
390    // Check 2: pages_reclaimed must have grown by at least `pages`.
391    // Catches bugs where pages are silently lost (neither live nor reclaimed):
392    // pages_live alone cannot distinguish between correct reclaim and a leak
393    // that happens to match the baseline by coincidence.
394    if after.pages_reclaimed < before.pages_reclaimed.saturating_add(pages) {
395        return StressOutcome::Fail("slab pages_reclaimed did not increase by expected count");
396    }
397
398    // Sanity: allocator must still be functional after a full fill/drain cycle.
399    let ptr = unsafe { alloc(layout) };
400    if ptr.is_null() {
401        return StressOutcome::Fail("slab unusable after fill/drain cycle");
402    }
403    unsafe { dealloc(ptr, layout) };
404
405    StressOutcome::Pass
406}
407
408// ---------------------------------------------------------------------------
409// Sub-test: slab_frag
410//
411// Allocate one page worth of blocks for class `ci`, free half of them, and
412// verify the backing page becomes visible in the partial list. This test is
413// skipped when the allocator is already using multiple pages for the sample,
414// because that means concurrent or pre-existing activity breaks the single-page
415// assumption required for a precise page-local assertion.
416// ---------------------------------------------------------------------------
417fn stress_slab_frag(ci: usize) -> StressOutcome {
418    use alloc::alloc::{alloc, dealloc, Layout};
419
420    let class_size = crate::memory::heap::slab_class_size(ci);
421    let bpp = crate::memory::heap::slab_blocks_per_page(ci);
422
423    if bpp < 2 {
424        return StressOutcome::Skip("class has < 2 blocks/page");
425    }
426    if bpp > STRESS_MAX_BLOCKS {
427        return StressOutcome::Skip("blocks_per_page > STRESS_MAX_BLOCKS");
428    }
429
430    let layout = match Layout::from_size_align(class_size, 8) {
431        Ok(layout) => layout,
432        Err(_) => return StressOutcome::Fail("Layout::from_size_align failed"),
433    };
434
435    let mut ptrs = [core::ptr::null_mut::<u8>(); STRESS_MAX_BLOCKS];
436
437    for i in 0..bpp {
438        if crate::shell::is_interrupted() {
439            for j in 0..i {
440                unsafe { dealloc(ptrs[j], layout) };
441            }
442            return StressOutcome::Skip("interrupted");
443        }
444        let ptr = unsafe { alloc(layout) };
445        if ptr.is_null() {
446            for j in 0..i {
447                unsafe { dealloc(ptrs[j], layout) };
448            }
449            return StressOutcome::Fail("OOM during slab frag alloc phase");
450        }
451        ptrs[i] = ptr;
452    }
453
454    let first_page = page_base(ptrs[0]);
455    for ptr in ptrs.iter().take(bpp) {
456        if page_base(*ptr) != first_page {
457            for ptr in ptrs.iter().take(bpp) {
458                unsafe { dealloc(*ptr, layout) };
459            }
460            return StressOutcome::Skip("allocator not quiescent for single-page frag check");
461        }
462    }
463
464    for i in (0..bpp).step_by(2) {
465        unsafe { dealloc(ptrs[i], layout) };
466        ptrs[i] = core::ptr::null_mut();
467    }
468
469    let Some(partial_seen) = crate::memory::heap::slab_page_in_partial_list(ci, first_page) else {
470        for i in (1..bpp).step_by(2) {
471            if !ptrs[i].is_null() {
472                unsafe { dealloc(ptrs[i], layout) };
473            }
474        }
475        return StressOutcome::Skip("slab lock busy");
476    };
477    if !partial_seen {
478        for i in (1..bpp).step_by(2) {
479            if !ptrs[i].is_null() {
480                unsafe { dealloc(ptrs[i], layout) };
481            }
482        }
483        return StressOutcome::Fail("page did not appear in partial list after partial free");
484    }
485
486    for i in (1..bpp).step_by(2) {
487        if !ptrs[i].is_null() {
488            unsafe { dealloc(ptrs[i], layout) };
489            ptrs[i] = core::ptr::null_mut();
490        }
491    }
492
493    let Some(still_partial) = crate::memory::heap::slab_page_in_partial_list(ci, first_page) else {
494        return StressOutcome::Skip("slab lock busy after free");
495    };
496    if still_partial {
497        return StressOutcome::Skip(
498            "page still visible as partial after full free; concurrent same-class activity suspected",
499        );
500    }
501
502    StressOutcome::Pass
503}
504
505// ---------------------------------------------------------------------------
506// Sub-test: vmalloc_cycle
507//
508// Allocate and free several vmalloc regions of increasing size. Validation is
509// based on the presence/absence of the specific ranges in the live set, not on
510// global counters that can move under concurrent allocator traffic.
511// ---------------------------------------------------------------------------
512fn stress_vmalloc_cycle() -> StressOutcome {
513    const SIZES: [usize; 4] = [4096, 16384, 65536, 262144];
514    let mut vptrs = [core::ptr::null_mut::<u8>(); 4];
515
516    if crate::memory::vmalloc::diag_snapshot().is_none() {
517        return StressOutcome::Skip("vmalloc not initialised");
518    }
519
520    for (i, &size) in SIZES.iter().enumerate() {
521        if crate::shell::is_interrupted() {
522            for ptr in vptrs.iter_mut() {
523                if !ptr.is_null() {
524                    crate::sync::with_irqs_disabled(|token| {
525                        crate::memory::free_kernel_virtual(*ptr, token);
526                    });
527                    *ptr = core::ptr::null_mut();
528                }
529            }
530            return StressOutcome::Skip("interrupted");
531        }
532        let ptr = crate::sync::with_irqs_disabled(|token| {
533            crate::memory::allocate_kernel_virtual(size, token).ok()
534        });
535        match ptr {
536            Some(p) if !p.is_null() => vptrs[i] = p,
537            _ => {
538                // OOM or arena exhausted: free what we have.
539                for j in 0..i {
540                    if !vptrs[j].is_null() {
541                        crate::sync::with_irqs_disabled(|token| {
542                            crate::memory::free_kernel_virtual(vptrs[j], token);
543                        });
544                    }
545                }
546                return StressOutcome::Fail("vmalloc returned null during cycle alloc phase");
547            }
548        }
549
550        match crate::memory::vmalloc::is_live_allocation(vptrs[i]) {
551            Some(true) => {}
552            Some(false) => {
553                for ptr in vptrs.iter_mut().take(i + 1) {
554                    if !ptr.is_null() {
555                        crate::sync::with_irqs_disabled(|token| {
556                            crate::memory::free_kernel_virtual(*ptr, token);
557                        });
558                        *ptr = core::ptr::null_mut();
559                    }
560                }
561                return StressOutcome::Fail("vmalloc allocation missing from live set");
562            }
563            None => {
564                for ptr in vptrs.iter_mut().take(i + 1) {
565                    if !ptr.is_null() {
566                        crate::sync::with_irqs_disabled(|token| {
567                            crate::memory::free_kernel_virtual(*ptr, token);
568                        });
569                        *ptr = core::ptr::null_mut();
570                    }
571                }
572                return StressOutcome::Skip("VMALLOC lock busy during live-set check");
573            }
574        }
575    }
576
577    for ptr in vptrs.iter_mut() {
578        if !ptr.is_null() {
579            crate::sync::with_irqs_disabled(|token| {
580                crate::memory::free_kernel_virtual(*ptr, token);
581            });
582            match crate::memory::vmalloc::is_live_allocation(*ptr) {
583                Some(false) => {}
584                Some(true) => return StressOutcome::Fail("freed vmalloc range still marked live"),
585                None => return StressOutcome::Skip("VMALLOC lock busy during post-free check"),
586            }
587            *ptr = core::ptr::null_mut();
588        }
589    }
590
591    StressOutcome::Pass
592}
593
594// ---------------------------------------------------------------------------
595// Sub-test: vmalloc_frag
596//
597// Stress the vmalloc extent allocator with random-size allocations freed in
598// a random order to verify:
599//
600//  1. Virtual fragmentation is observable: after freeing half the regions in
601//     shuffled order while the other half remain live, free_extent_count must
602//     exceed the pre-test baseline (the arena was a single large free extent).
603//  2. No silent leaks: after a full drain, free_pages returns to baseline.
604//  3. Coherence: largest_free_pages ≤ free_pages.
605//
606// Uses an Xorshift64 PRNG seeded from the tick counter : no heap allocation
607// for PRNG state (stack-only bookkeeping, sizes and order vary across runs).
608//
609// Note B (ticket #49): SMP contention is not exercised here.
610// That requires a dedicated benchmark harness, not a shell sub-test.
611// ---------------------------------------------------------------------------
612const VMALLOC_FRAG_COUNT: usize = 32;
613
614fn stress_vmalloc_frag() -> StressOutcome {
615    #[inline(always)]
616    fn xorshift64(s: &mut u64) -> u64 {
617        *s ^= *s << 13;
618        *s ^= *s >> 7;
619        *s ^= *s << 17;
620        *s
621    }
622
623    let before = match crate::memory::vmalloc::diag_snapshot() {
624        Some(s) => s,
625        None => return StressOutcome::Skip("vmalloc not initialised"),
626    };
627
628    // Non-zero seed so sizes and shuffle order vary across runs.
629    let mut rng: u64 = crate::process::scheduler::ticks() | 1;
630
631    let mut ptrs = [core::ptr::null_mut::<u8>(); VMALLOC_FRAG_COUNT];
632    let mut sizes = [0usize; VMALLOC_FRAG_COUNT];
633    let mut allocated = 0usize;
634
635    // Phase 1: allocate VMALLOC_FRAG_COUNT regions of random sizes (1-16 pages).
636    for i in 0..VMALLOC_FRAG_COUNT {
637        if crate::shell::is_interrupted() {
638            for j in 0..allocated {
639                if !ptrs[j].is_null() {
640                    crate::sync::with_irqs_disabled(|token| {
641                        crate::memory::free_kernel_virtual(ptrs[j], token);
642                    });
643                }
644            }
645            return StressOutcome::Skip("interrupted");
646        }
647        let pages = (xorshift64(&mut rng) as usize % 16) + 1; // 1..=16 pages
648        let size = pages * 4096;
649        let ptr = crate::sync::with_irqs_disabled(|token| {
650            crate::memory::allocate_kernel_virtual(size, token).ok()
651        });
652        match ptr {
653            Some(p) if !p.is_null() => {
654                ptrs[i] = p;
655                sizes[i] = size;
656                allocated += 1;
657            }
658            _ => break, // arena exhausted : test with however many we got
659        }
660    }
661
662    if allocated < 8 {
663        for j in 0..allocated {
664            if !ptrs[j].is_null() {
665                crate::sync::with_irqs_disabled(|token| {
666                    crate::memory::free_kernel_virtual(ptrs[j], token);
667                });
668            }
669        }
670        return StressOutcome::Skip("vmalloc arena too small for frag test (< 8 regions)");
671    }
672
673    // Phase 2: Fisher-Yates shuffle of indices [0..allocated].
674    let mut order = [0usize; VMALLOC_FRAG_COUNT];
675    for i in 0..allocated {
676        order[i] = i;
677    }
678    for i in (1..allocated).rev() {
679        let j = (xorshift64(&mut rng) as usize) % (i + 1);
680        order.swap(i, j);
681    }
682
683    // Phase 3: free the first half of the shuffled order.
684    // The remaining half stays live, creating non-contiguous holes in the arena.
685    let half = allocated / 2;
686    for &idx in &order[..half] {
687        if !ptrs[idx].is_null() {
688            crate::sync::with_irqs_disabled(|token| {
689                crate::memory::free_kernel_virtual(ptrs[idx], token);
690            });
691            ptrs[idx] = core::ptr::null_mut();
692        }
693    }
694
695    // Phase 4: verify fragmentation is visible.
696    // With `half` regions freed in random order and `allocated - half` still
697    // live, the free space is split into non-contiguous holes →
698    // free_extent_count must exceed the pre-test baseline.
699    let mid = match crate::memory::vmalloc::diag_snapshot() {
700        Some(s) => s,
701        None => {
702            for &idx in &order[half..allocated] {
703                if !ptrs[idx].is_null() {
704                    crate::sync::with_irqs_disabled(|token| {
705                        crate::memory::free_kernel_virtual(ptrs[idx], token);
706                    });
707                }
708            }
709            return StressOutcome::Skip("VMALLOC lock busy during mid-frag snapshot");
710        }
711    };
712    // Capture result before freeing remaining regions so cleanup always runs.
713    let frag_ok = mid.free_extent_count > before.free_extent_count;
714
715    // Phase 5: free the second half of the shuffled order.
716    for &idx in &order[half..allocated] {
717        if !ptrs[idx].is_null() {
718            crate::sync::with_irqs_disabled(|token| {
719                crate::memory::free_kernel_virtual(ptrs[idx], token);
720            });
721            ptrs[idx] = core::ptr::null_mut();
722        }
723    }
724
725    // Phase 6: final coherence checks.
726    let after = match crate::memory::vmalloc::diag_snapshot() {
727        Some(s) => s,
728        None => return StressOutcome::Skip("VMALLOC lock busy during final frag snapshot"),
729    };
730
731    // All allocated pages must be returned (±1 for snapshot race).
732    let total_pages: usize = sizes.iter().take(allocated).map(|&s| s / 4096).sum();
733    let expected_free = before.free_pages.saturating_add(total_pages);
734    if after.free_pages.saturating_add(1) < expected_free {
735        return StressOutcome::Fail("vmalloc_frag: pages not fully returned after drain");
736    }
737
738    // Coherence: largest free extent must fit within total free pages.
739    if after.largest_free_pages > after.free_pages.saturating_add(1) {
740        return StressOutcome::Fail("vmalloc_frag: largest_free_pages > free_pages (incoherent)");
741    }
742
743    if !frag_ok {
744        return StressOutcome::Fail("vmalloc_frag: fragmentation not visible after half-drain");
745    }
746
747    StressOutcome::Pass
748}
749
750// ---------------------------------------------------------------------------
751// Sub-test: telemetry_consistency
752//
753// Sanity-check that all allocator counters are internally consistent.
754// Does not allocate anything : pure read of existing state.
755// ---------------------------------------------------------------------------
756fn stress_telemetry() -> StressOutcome {
757    // Slab: reclaimed must not exceed allocated.
758    let slab = crate::memory::heap::slab_diag_snapshot();
759    if slab.pages_reclaimed > slab.pages_allocated {
760        return StressOutcome::Fail("slab: pages_reclaimed > pages_allocated (counter corruption)");
761    }
762
763    // Phys-contiguous: freed must not exceed allocated.
764    let phys = crate::memory::phys_contiguous_diag();
765    if phys.pages_freed > phys.pages_allocated {
766        return StressOutcome::Fail(
767            "phys_contiguous: pages_freed > pages_allocated (counter corruption)",
768        );
769    }
770
771    // vmalloc: allocated_pages must fit in the arena; peak must be a watermark.
772    if let Some(vm) = crate::memory::vmalloc::diag_snapshot() {
773        let arena_pages = (vm.arena_end.saturating_sub(vm.arena_start)) as usize / 4096;
774
775        if vm.allocated_pages > arena_pages {
776            return StressOutcome::Fail("vmalloc: allocated_pages > arena capacity (impossible)");
777        }
778        if vm.allocated_pages.saturating_add(vm.free_pages) > arena_pages.saturating_add(1) {
779            // ±1 for the ARENA_START_PAGE reservation
780            return StressOutcome::Fail(
781                "vmalloc: allocated + free > arena capacity (accounting error)",
782            );
783        }
784        if (vm.peak_pages as usize) < vm.allocated_pages {
785            return StressOutcome::Fail(
786                "vmalloc: peak_pages < allocated_pages (watermark regression)",
787            );
788        }
789    }
790
791    StressOutcome::Pass
792}
793
794fn stress_userspace_workload() -> StressOutcome {
795    let path = "/initfs/test_mem_stressed";
796    let fd = match crate::vfs::open(path, crate::vfs::OpenFlags::READ) {
797        Ok(fd) => fd,
798        Err(_) => return StressOutcome::Skip("userspace stress binary not present in initfs"),
799    };
800    let data = match crate::vfs::read_all(fd) {
801        Ok(d) => d,
802        Err(_) => {
803            let _ = crate::vfs::close(fd);
804            return StressOutcome::Fail("failed to read userspace stress binary");
805        }
806    };
807    let _ = crate::vfs::close(fd);
808
809    let label = alloc::format!("heap-stress-{}", crate::process::scheduler::ticks());
810    let silo_id = match crate::silo::kernel_spawn_strate(&data, Some(label.as_str()), None) {
811        Ok(sid) => sid,
812        Err(_) => return StressOutcome::Fail("failed to spawn userspace stress workload"),
813    };
814
815    let appeared = match stress_wait_until(STRESS_WORKLOAD_OBSERVE_TICKS / 2, || {
816        crate::silo::list_silos_snapshot()
817            .iter()
818            .any(|s| s.id == silo_id && s.task_count > 0)
819    }) {
820        Ok(v) => v,
821        Err(msg) => {
822            let _ = crate::silo::kernel_destroy_silo(label.as_str());
823            return StressOutcome::Skip(msg);
824        }
825    };
826    if !appeared {
827        let _ = crate::silo::kernel_destroy_silo(label.as_str());
828        return StressOutcome::Fail("userspace workload never became runnable");
829    }
830
831    let observed = match stress_wait_until(STRESS_WORKLOAD_OBSERVE_TICKS, || {
832        crate::silo::list_silos_snapshot()
833            .iter()
834            .any(|s| s.id == silo_id && s.task_count > 0)
835    }) {
836        Ok(v) => v,
837        Err(msg) => {
838            let _ = crate::silo::kernel_destroy_silo(label.as_str());
839            return StressOutcome::Skip(msg);
840        }
841    };
842    if !observed {
843        let _ = crate::silo::kernel_destroy_silo(label.as_str());
844        return StressOutcome::Fail("userspace workload exited too early");
845    }
846
847    let quiesced = match stress_wait_until(STRESS_WORKLOAD_EXIT_TIMEOUT_TICKS, || {
848        let silos = crate::silo::list_silos_snapshot();
849        !silos.iter().any(|s| s.id == silo_id && s.task_count > 0)
850    }) {
851        Ok(v) => v,
852        Err(msg) => {
853            let _ = crate::silo::kernel_destroy_silo(label.as_str());
854            return StressOutcome::Skip(msg);
855        }
856    };
857
858    if !quiesced {
859        let _ = crate::silo::kernel_destroy_silo(label.as_str());
860        return StressOutcome::Fail("userspace workload did not quiesce before timeout");
861    }
862
863    let _ = crate::silo::kernel_destroy_silo(label.as_str());
864    StressOutcome::Pass
865}
866
867fn stress_wait_until(
868    timeout_ticks: u64,
869    mut cond: impl FnMut() -> bool,
870) -> Result<bool, &'static str> {
871    let start = crate::process::scheduler::ticks();
872    loop {
873        if cond() {
874            return Ok(true);
875        }
876        if crate::shell::is_interrupted() {
877            return Err("interrupted");
878        }
879        if crate::process::scheduler::ticks().saturating_sub(start) > timeout_ticks {
880            return Ok(false);
881        }
882        crate::process::yield_task();
883    }
884}
885
886#[inline]
887fn page_base(ptr: *mut u8) -> u64 {
888    (ptr as u64) & !0xfff
889}