Skip to main content

strat9_kernel/process/
elf.rs

1//! ELF64 loader for Strat9-OS.
2//!
3//! Parses ELF64 headers and loads PT_LOAD segments into a user address space,
4//! then creates a kernel task that trampolines into Ring 3 via IRETQ.
5//!
6//! Supports :
7//!   - ET_EXEC
8//!   - ET_DYN (PIE/static-PIE)
9//!   - ELF64 little-endian x86_64 binaries.
10
11use alloc::{sync::Arc, vec::Vec};
12use x86_64::{
13    structures::paging::{Mapper, Page, Size4KiB},
14    VirtAddr,
15};
16
17use crate::{
18    capability::Capability,
19    memory::address_space::{AddressSpace, VmaFlags, VmaPageSize, VmaType},
20    process::{
21        task::{CpuContext, KernelStack, ResumeKind, SyncUnsafeCell, Task},
22        TaskId, TaskPriority, TaskState,
23    },
24};
25
26// ---------------------------------------------------------------------------
27// ELF64 constants
28// ---------------------------------------------------------------------------
29
30const ELF_MAGIC: [u8; 4] = [0x7F, b'E', b'L', b'F'];
31const ELFCLASS64: u8 = 2;
32const ELFDATA2LSB: u8 = 1;
33const ET_EXEC: u16 = 2;
34const ET_DYN: u16 = 3;
35const EV_CURRENT: u32 = 1;
36const EM_X86_64: u16 = 62;
37const PT_LOAD: u32 = 1;
38const PT_DYNAMIC: u32 = 2;
39const PT_INTERP: u32 = 3;
40const PT_TLS: u32 = 7;
41const PF_X: u32 = 1;
42const PF_W: u32 = 2;
43const PF_R: u32 = 4;
44const DT_NULL: i64 = 0;
45const DT_RELA: i64 = 7;
46const DT_RELASZ: i64 = 8;
47const DT_RELAENT: i64 = 9;
48const DT_STRTAB: i64 = 5;
49const DT_SYMTAB: i64 = 6;
50const DT_SYMENT: i64 = 11;
51const DT_JMPREL: i64 = 23;
52const DT_PLTRELSZ: i64 = 2;
53const DT_PLTREL: i64 = 20;
54const DT_RELACOUNT: i64 = 0x6fff_fff9;
55const DT_RELR: i64 = 36;
56const DT_RELRSZ: i64 = 35;
57const DT_RELRENT: i64 = 37;
58const R_X86_64_RELATIVE: u32 = 8;
59const R_X86_64_64: u32 = 1;
60const R_X86_64_COPY: u32 = 5;
61const R_X86_64_GLOB_DAT: u32 = 6;
62const R_X86_64_JUMP_SLOT: u32 = 7;
63const R_X86_64_TPOFF64: u32 = 18;
64const R_X86_64_IRELATIVE: u32 = 37;
65
66/// Maximum virtual address we accept for user-space mappings.
67pub const USER_ADDR_MAX: u64 = 0x0000_8000_0000_0000;
68/// Preferred base when placing ET_DYN (PIE) images.
69const PIE_BASE_ADDR: u64 = 0x0000_0001_0000_0000;
70
71/// User stack location (below the non-canonical gap).
72pub const USER_STACK_BASE: u64 = 0x0000_7FFF_F000_0000;
73/// Number of 4 KiB pages for the user stack (16 pages = 64 KiB).
74pub const USER_STACK_PAGES: usize = 16;
75/// Top of the user stack (stack grows down).
76pub const USER_STACK_TOP: u64 = USER_STACK_BASE + (USER_STACK_PAGES as u64) * 4096;
77
78/// Result of loading an ELF image into an address space.
79#[derive(Debug, Clone, Copy)]
80pub struct LoadedElfInfo {
81    pub runtime_entry: u64,
82    pub program_entry: u64,
83    pub phdr_vaddr: u64,
84    pub phent: u16,
85    pub phnum: u16,
86    pub interp_base: Option<u64>,
87    pub tls_vaddr: u64,
88    pub tls_filesz: u64,
89    pub tls_memsz: u64,
90    pub tls_align: u64,
91}
92
93// ---------------------------------------------------------------------------
94// ELF64 header structures
95// ---------------------------------------------------------------------------
96
97/// ELF64 file header (64 bytes).
98#[repr(C, packed)]
99#[derive(Debug, Clone, Copy)]
100struct Elf64Header {
101    e_ident: [u8; 16],
102    e_type: u16,
103    e_machine: u16,
104    e_version: u32,
105    e_entry: u64,
106    e_phoff: u64,
107    e_shoff: u64,
108    e_flags: u32,
109    e_ehsize: u16,
110    e_phentsize: u16,
111    e_phnum: u16,
112    e_shentsize: u16,
113    e_shnum: u16,
114    e_shstrndx: u16,
115}
116
117/// ELF64 program header (56 bytes).
118#[repr(C, packed)]
119#[derive(Debug, Clone, Copy)]
120struct Elf64Phdr {
121    p_type: u32,
122    p_flags: u32,
123    p_offset: u64,
124    p_vaddr: u64,
125    p_paddr: u64,
126    p_filesz: u64,
127    p_memsz: u64,
128    p_align: u64,
129}
130
131#[repr(C, packed)]
132#[derive(Debug, Clone, Copy)]
133struct Elf64Dyn {
134    d_tag: i64,
135    d_val: u64,
136}
137
138#[repr(C, packed)]
139#[derive(Debug, Clone, Copy)]
140struct Elf64Rela {
141    r_offset: u64,
142    r_info: u64,
143    r_addend: i64,
144}
145
146#[repr(C, packed)]
147#[derive(Debug, Clone, Copy)]
148struct Elf64Sym {
149    st_name: u32,
150    st_info: u8,
151    st_other: u8,
152    st_shndx: u16,
153    st_value: u64,
154    st_size: u64,
155}
156
157// ---------------------------------------------------------------------------
158// Parsing
159// ---------------------------------------------------------------------------
160
161/// Parse and validate the ELF64 file header from raw bytes.
162fn parse_header(data: &[u8]) -> Result<Elf64Header, &'static str> {
163    if data.len() < core::mem::size_of::<Elf64Header>() {
164        return Err("ELF data too small for header");
165    }
166
167    // SAFETY: data is large enough and Elf64Header is repr(C, packed) with no
168    // alignment requirements beyond 1.
169    let header: Elf64Header =
170        unsafe { core::ptr::read_unaligned(data.as_ptr() as *const Elf64Header) };
171
172    // Validate magic
173    if header.e_ident[0..4] != ELF_MAGIC {
174        return Err("Bad ELF magic");
175    }
176
177    // Class: 64-bit
178    if header.e_ident[4] != ELFCLASS64 {
179        return Err("Not ELF64");
180    }
181
182    // Data: little-endian
183    if header.e_ident[5] != ELFDATA2LSB {
184        return Err("Not little-endian ELF");
185    }
186
187    // Machine: x86_64
188    if header.e_machine != EM_X86_64 {
189        return Err("Not x86_64 ELF");
190    }
191
192    // Type: executable or shared object (PIE/static PIE executable image)
193    if header.e_type != ET_EXEC && header.e_type != ET_DYN {
194        return Err("Unsupported ELF type (expected ET_EXEC or ET_DYN)");
195    }
196
197    // ELF version
198    if header.e_version != EV_CURRENT {
199        return Err("Unsupported ELF version");
200    }
201
202    // Entry point must be canonical user space (for ET_DYN this is relative and
203    // validated again after relocation). ET_EXEC must be non-zero.
204    if header.e_entry >= USER_ADDR_MAX {
205        return Err("Entry point outside user address range");
206    }
207    // Some toolchains/images can emit ET_EXEC with e_entry=0.
208    // We handle this case later by deriving a fallback entry from PT_LOAD|PF_X.
209
210    // Sanity check program headers
211    if header.e_phentsize as usize != core::mem::size_of::<Elf64Phdr>() {
212        return Err("Unexpected phentsize");
213    }
214
215    let ph_end = (header.e_phoff as usize)
216        .checked_add((header.e_phnum as usize) * (header.e_phentsize as usize))
217        .ok_or("Program header table overflows")?;
218    if ph_end > data.len() {
219        return Err("Program headers extend past file");
220    }
221
222    Ok(header)
223}
224
225/// Iterate over program headers in the ELF.
226fn program_headers<'a>(
227    data: &'a [u8],
228    header: &Elf64Header,
229) -> impl Iterator<Item = Elf64Phdr> + 'a {
230    let phoff = header.e_phoff as usize;
231    let phsize = header.e_phentsize as usize;
232    let phnum = header.e_phnum as usize;
233
234    (0..phnum).map(move |i| {
235        let offset = phoff + i * phsize;
236        // SAFETY: parse_header already validated that all program headers fit
237        // within `data`, and Elf64Phdr is packed (align 1).
238        unsafe { core::ptr::read_unaligned(data.as_ptr().add(offset) as *const Elf64Phdr) }
239    })
240}
241
242/// Parses interp path.
243fn parse_interp_path<'a>(
244    elf_data: &'a [u8],
245    phdrs: &[Elf64Phdr],
246) -> Result<Option<&'a str>, &'static str> {
247    let Some(interp) = phdrs.iter().find(|ph| ph.p_type == PT_INTERP) else {
248        return Ok(None);
249    };
250    if interp.p_filesz == 0 {
251        return Err("PT_INTERP has empty path");
252    }
253    let start = interp.p_offset as usize;
254    let end = start
255        .checked_add(interp.p_filesz as usize)
256        .ok_or("PT_INTERP range overflow")?;
257    if end > elf_data.len() {
258        return Err("PT_INTERP extends past file");
259    }
260    let raw = &elf_data[start..end];
261    let nul = raw
262        .iter()
263        .position(|&b| b == 0)
264        .ok_or("PT_INTERP path is not NUL terminated")?;
265    let s = core::str::from_utf8(&raw[..nul]).map_err(|_| "PT_INTERP path is not UTF-8")?;
266    if s.is_empty() {
267        return Err("PT_INTERP path is empty");
268    }
269    Ok(Some(s))
270}
271
272/// Performs the find relocated phdr vaddr operation.
273fn find_relocated_phdr_vaddr(
274    header: &Elf64Header,
275    phdrs: &[Elf64Phdr],
276    load_bias: u64,
277) -> Result<u64, &'static str> {
278    let phoff = header.e_phoff;
279    for ph in phdrs {
280        if ph.p_type != PT_LOAD || ph.p_filesz == 0 {
281            continue;
282        }
283        let file_start = ph.p_offset;
284        let file_end = ph
285            .p_offset
286            .checked_add(ph.p_filesz)
287            .ok_or("PHDR location overflow")?;
288        if phoff >= file_start && phoff < file_end {
289            let delta = phoff - file_start;
290            let vaddr = ph
291                .p_vaddr
292                .checked_add(delta)
293                .and_then(|v| v.checked_add(load_bias))
294                .ok_or("Relocated PHDR address overflow")?;
295            if vaddr >= USER_ADDR_MAX {
296                return Err("Relocated PHDR outside user address space");
297            }
298            return Ok(vaddr);
299        }
300    }
301    Err("Program headers are not covered by a PT_LOAD segment")
302}
303
304/// Reads elf from vfs.
305fn read_elf_from_vfs(path: &str) -> Result<Vec<u8>, &'static str> {
306    const MAX_ELF_SIZE: usize = 64 * 1024 * 1024;
307    let fd =
308        crate::vfs::open(path, crate::vfs::OpenFlags::READ).map_err(|_| "PT_INTERP open failed")?;
309    let mut out = Vec::new();
310    let mut buf = [0u8; 4096];
311    loop {
312        let n = match crate::vfs::read(fd, &mut buf) {
313            Ok(n) => n,
314            Err(_) => {
315                let _ = crate::vfs::close(fd);
316                return Err("PT_INTERP read failed");
317            }
318        };
319        if n == 0 {
320            break;
321        }
322        if out.len().saturating_add(n) > MAX_ELF_SIZE {
323            let _ = crate::vfs::close(fd);
324            return Err("PT_INTERP file too large");
325        }
326        out.extend_from_slice(&buf[..n]);
327    }
328    let _ = crate::vfs::close(fd);
329    if out.is_empty() {
330        return Err("PT_INTERP file is empty");
331    }
332    Ok(out)
333}
334
335/// Compute total mapped bounds for all PT_LOAD segments.
336fn compute_load_bounds(phdrs: &[Elf64Phdr]) -> Result<(u64, u64), &'static str> {
337    let mut min_vaddr = u64::MAX;
338    let mut max_vaddr = 0u64;
339    let mut saw_load = false;
340
341    for phdr in phdrs {
342        if phdr.p_type != PT_LOAD {
343            continue;
344        }
345        if phdr.p_memsz == 0 {
346            continue;
347        }
348        saw_load = true;
349
350        if phdr.p_memsz < phdr.p_filesz {
351            return Err("PT_LOAD memsz < filesz");
352        }
353
354        // ELF requires p_vaddr % page == p_offset % page for PT_LOAD.
355        if ((phdr.p_vaddr ^ phdr.p_offset) & 0xFFF) != 0 {
356            return Err("PT_LOAD alignment mismatch (vaddr/offset)");
357        }
358
359        let seg_end = phdr
360            .p_vaddr
361            .checked_add(phdr.p_memsz)
362            .ok_or("PT_LOAD vaddr+memsz overflow")?;
363        if seg_end > USER_ADDR_MAX {
364            return Err("PT_LOAD exceeds user address space");
365        }
366
367        let seg_start_page = phdr.p_vaddr & !0xFFF;
368        let seg_end_page = (seg_end + 0xFFF) & !0xFFF;
369        min_vaddr = min_vaddr.min(seg_start_page);
370        max_vaddr = max_vaddr.max(seg_end_page);
371    }
372
373    if !saw_load {
374        return Err("ELF has no PT_LOAD segments");
375    }
376    Ok((min_vaddr, max_vaddr))
377}
378
379/// Compute load bias and relocated entry for ET_EXEC / ET_DYN.
380fn compute_load_bias_and_entry(
381    user_as: &AddressSpace,
382    header: &Elf64Header,
383    phdrs: &[Elf64Phdr],
384) -> Result<(u64, u64), &'static str> {
385    let (min_vaddr, max_vaddr) = compute_load_bounds(phdrs)?;
386    let span = max_vaddr
387        .checked_sub(min_vaddr)
388        .ok_or("Invalid PT_LOAD bounds")?;
389
390    let load_bias = if header.e_type == ET_EXEC {
391        0
392    } else {
393        let n_pages = (span as usize).div_ceil(4096);
394        let load_base = user_as
395            .find_free_vma_range(PIE_BASE_ADDR, n_pages, VmaPageSize::Small)
396            .or_else(|| {
397                user_as.find_free_vma_range(0x0000_0000_1000_0000, n_pages, VmaPageSize::Small)
398            })
399            .ok_or("No virtual range for ET_DYN image")?;
400        load_base
401            .checked_sub(min_vaddr)
402            .ok_or("ET_DYN load bias underflow")?
403    };
404
405    let relocated_end = max_vaddr
406        .checked_add(load_bias)
407        .ok_or("Relocated PT_LOAD range overflow")?;
408    if relocated_end > USER_ADDR_MAX {
409        return Err("Relocated PT_LOAD range exceeds user space");
410    }
411
412    let entry_raw = if header.e_type == ET_EXEC && header.e_entry == 0 {
413        let fallback = phdrs
414            .iter()
415            .find(|ph| ph.p_type == PT_LOAD && ph.p_memsz != 0 && (ph.p_flags & PF_X) != 0)
416            .map(|ph| ph.p_vaddr)
417            .ok_or("ET_EXEC has null entry and no executable PT_LOAD")?;
418        log::warn!(
419            "[elf] ET_EXEC has null entry, using fallback executable segment vaddr={:#x}",
420            fallback
421        );
422        fallback
423    } else {
424        header.e_entry
425    };
426
427    let relocated_entry = entry_raw
428        .checked_add(load_bias)
429        .ok_or("Relocated entry overflow")?;
430    if relocated_entry == 0 || relocated_entry >= USER_ADDR_MAX {
431        return Err("Relocated entry outside user space");
432    }
433
434    Ok((load_bias, relocated_entry))
435}
436
437/// Performs the apply segment permissions operation.
438fn apply_segment_permissions(
439    user_as: &AddressSpace,
440    page_start: u64,
441    page_count: usize,
442    flags: VmaFlags,
443) -> Result<(), &'static str> {
444    use x86_64::registers::control::Cr3;
445
446    let pte_flags = flags.to_page_flags();
447    // SAFETY: loader owns this AddressSpace during image construction.
448    let mut mapper = unsafe { user_as.mapper() };
449    for i in 0..page_count {
450        let vaddr = page_start
451            .checked_add((i as u64) * 4096)
452            .ok_or("Permission update address overflow")?;
453        let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(vaddr))
454            .map_err(|_| "Invalid page while updating segment flags")?;
455        // SAFETY: the page is already mapped by map_region for this segment.
456        let _ = unsafe {
457            mapper
458                .update_flags(page, pte_flags)
459                .map_err(|_| "Failed to update segment page flags")?
460        };
461        // We ignore flush here and do a targeted flush decision below.
462    }
463
464    // During ELF loading we update a freshly-created user address space that is
465    // not active on other CPUs.  Cross-CPU shootdowns here only add boot-time
466    // latency and can timeout while APs are not yet servicing IPIs.
467    // If this address space is currently active on this CPU, local invalidation
468    // is enough for the loader path.
469    let (current_cr3, _) = Cr3::read();
470    if current_cr3.start_address() == user_as.cr3() {
471        let end = page_start + (page_count as u64) * 4096;
472        crate::arch::x86_64::tlb::local_range(VirtAddr::new(page_start), VirtAddr::new(end));
473    }
474
475    Ok(())
476}
477
478/// Reads user mapped bytes.
479fn read_user_mapped_bytes(
480    user_as: &AddressSpace,
481    mut vaddr: u64,
482    out: &mut [u8],
483) -> Result<(), &'static str> {
484    let end = vaddr
485        .checked_add(out.len() as u64)
486        .ok_or("Read range overflow")?;
487    if end > USER_ADDR_MAX {
488        return Err("Read range outside user space");
489    }
490    let mut copied = 0usize;
491    while copied < out.len() {
492        let page_off = (vaddr & 0xFFF) as usize;
493        let chunk = core::cmp::min(out.len() - copied, 4096 - page_off);
494        let phys = user_as
495            .translate(VirtAddr::new(vaddr))
496            .ok_or("Failed to translate mapped user bytes")?;
497        let src = crate::memory::phys_to_virt(phys.as_u64()) as *const u8;
498        // SAFETY: src points to mapped physical memory via HHDM.
499        unsafe { core::ptr::copy_nonoverlapping(src, out.as_mut_ptr().add(copied), chunk) };
500        copied += chunk;
501        vaddr = vaddr
502            .checked_add(chunk as u64)
503            .ok_or("Virtual address overflow while reading mapped bytes")?;
504    }
505    Ok(())
506}
507
508/// Writes user mapped bytes.
509fn write_user_mapped_bytes(
510    user_as: &AddressSpace,
511    mut vaddr: u64,
512    src: &[u8],
513) -> Result<(), &'static str> {
514    let end = vaddr
515        .checked_add(src.len() as u64)
516        .ok_or("Write range overflow")?;
517    if end > USER_ADDR_MAX {
518        return Err("Write range outside user space");
519    }
520    let mut written = 0usize;
521    while written < src.len() {
522        let page_off = (vaddr & 0xFFF) as usize;
523        let chunk = core::cmp::min(src.len() - written, 4096 - page_off);
524        let phys = user_as
525            .translate(VirtAddr::new(vaddr))
526            .ok_or("Failed to translate relocation target")?;
527        let dst = crate::memory::phys_to_virt(phys.as_u64()) as *mut u8;
528        // SAFETY: destination points to mapped user frame through HHDM.
529        unsafe { core::ptr::copy_nonoverlapping(src.as_ptr().add(written), dst, chunk) };
530        written += chunk;
531        vaddr = vaddr
532            .checked_add(chunk as u64)
533            .ok_or("Virtual address overflow while writing mapped bytes")?;
534    }
535    Ok(())
536}
537
538/// Reads user u64.
539fn read_user_u64(user_as: &AddressSpace, vaddr: u64) -> Result<u64, &'static str> {
540    let mut raw = [0u8; 8];
541    read_user_mapped_bytes(user_as, vaddr, &mut raw)?;
542    Ok(u64::from_le_bytes(raw))
543}
544
545/// Writes user u64.
546fn write_user_u64(user_as: &AddressSpace, vaddr: u64, value: u64) -> Result<(), &'static str> {
547    write_user_mapped_bytes(user_as, vaddr, &value.to_le_bytes())
548}
549
550/// Performs the apply relr relocations operation.
551fn apply_relr_relocations(
552    user_as: &AddressSpace,
553    load_bias: u64,
554    relr_base: u64,
555    relr_size: usize,
556    relr_ent: usize,
557) -> Result<usize, &'static str> {
558    if relr_size == 0 {
559        return Ok(0);
560    }
561    if relr_ent != core::mem::size_of::<u64>() {
562        return Err("Unsupported DT_RELRENT size");
563    }
564    if relr_size % relr_ent != 0 {
565        return Err("DT_RELR table size is not aligned");
566    }
567
568    let count = relr_size / relr_ent;
569    let mut applied = 0usize;
570    let mut where_addr = 0u64;
571
572    for i in 0..count {
573        let entry_addr = relr_base
574            .checked_add((i * relr_ent) as u64)
575            .ok_or("DT_RELR walk overflow")?;
576        let entry = read_user_u64(user_as, entry_addr)?;
577
578        if (entry & 1) == 0 {
579            where_addr = load_bias
580                .checked_add(entry)
581                .ok_or("DT_RELR absolute relocation overflow")?;
582            if where_addr >= USER_ADDR_MAX {
583                return Err("DT_RELR target outside user space");
584            }
585            let cur = read_user_u64(user_as, where_addr)?;
586            write_user_u64(
587                user_as,
588                where_addr,
589                cur.checked_add(load_bias)
590                    .ok_or("DT_RELR relocated value overflow")?,
591            )?;
592            where_addr = where_addr
593                .checked_add(8)
594                .ok_or("DT_RELR where pointer overflow")?;
595            applied += 1;
596        } else {
597            let mut bitmap = entry >> 1;
598            for bit in 0..63u64 {
599                if (bitmap & 1) != 0 {
600                    let slot = where_addr
601                        .checked_add(bit * 8)
602                        .ok_or("DT_RELR bitmap target overflow")?;
603                    if slot >= USER_ADDR_MAX {
604                        return Err("DT_RELR bitmap target outside user space");
605                    }
606                    let cur = read_user_u64(user_as, slot)?;
607                    write_user_u64(
608                        user_as,
609                        slot,
610                        cur.checked_add(load_bias)
611                            .ok_or("DT_RELR bitmap relocated value overflow")?,
612                    )?;
613                    applied += 1;
614                }
615                bitmap >>= 1;
616                if bitmap == 0 {
617                    break;
618                }
619            }
620            where_addr = where_addr
621                .checked_add(63 * 8)
622                .ok_or("DT_RELR where advance overflow")?;
623        }
624    }
625    Ok(applied)
626}
627
628/// Performs the apply dynamic relocations operation.
629fn apply_dynamic_relocations(
630    user_as: &AddressSpace,
631    phdrs: &[Elf64Phdr],
632    elf_type: u16,
633    load_bias: u64,
634) -> Result<(), &'static str> {
635    if elf_type != ET_DYN {
636        return Ok(());
637    }
638
639    let dynamic = phdrs.iter().find(|ph| ph.p_type == PT_DYNAMIC);
640    let Some(dynamic_ph) = dynamic else {
641        return Ok(());
642    };
643    if dynamic_ph.p_filesz == 0 {
644        return Ok(());
645    }
646
647    let dyn_addr = dynamic_ph
648        .p_vaddr
649        .checked_add(load_bias)
650        .ok_or("PT_DYNAMIC relocated address overflow")?;
651    let dyn_count = (dynamic_ph.p_filesz as usize) / core::mem::size_of::<Elf64Dyn>();
652
653    let mut rela_addr: Option<u64> = None;
654    let mut rela_size: usize = 0;
655    let mut rela_ent: usize = core::mem::size_of::<Elf64Rela>();
656    let mut jmprel_addr: Option<u64> = None;
657    let mut jmprel_size: usize = 0;
658    let mut pltrel_kind: Option<u64> = None;
659    let mut symtab_addr: Option<u64> = None;
660    let mut sym_ent: usize = core::mem::size_of::<Elf64Sym>();
661    let _strtab_addr: Option<u64> = None;
662    let mut rela_count_hint: Option<usize> = None;
663    let mut relr_addr: Option<u64> = None;
664    let mut relr_size: usize = 0;
665    let mut relr_ent: usize = 0;
666
667    for i in 0..dyn_count {
668        let entry_addr = dyn_addr
669            .checked_add((i * core::mem::size_of::<Elf64Dyn>()) as u64)
670            .ok_or("PT_DYNAMIC walk overflow")?;
671        let mut raw = [0u8; core::mem::size_of::<Elf64Dyn>()];
672        read_user_mapped_bytes(user_as, entry_addr, &mut raw)?;
673        // SAFETY: raw has exact size of Elf64Dyn; read_unaligned handles packing.
674        let dyn_entry = unsafe { core::ptr::read_unaligned(raw.as_ptr() as *const Elf64Dyn) };
675
676        match dyn_entry.d_tag {
677            DT_NULL => break,
678            DT_RELA => {
679                rela_addr = Some(
680                    dyn_entry
681                        .d_val
682                        .checked_add(load_bias)
683                        .ok_or("DT_RELA relocated address overflow")?,
684                )
685            }
686            DT_RELASZ => rela_size = dyn_entry.d_val as usize,
687            DT_RELAENT => rela_ent = dyn_entry.d_val as usize,
688            DT_RELACOUNT => rela_count_hint = Some(dyn_entry.d_val as usize),
689            DT_JMPREL => {
690                jmprel_addr = Some(
691                    dyn_entry
692                        .d_val
693                        .checked_add(load_bias)
694                        .ok_or("DT_JMPREL relocated address overflow")?,
695                )
696            }
697            DT_PLTRELSZ => jmprel_size = dyn_entry.d_val as usize,
698            DT_PLTREL => pltrel_kind = Some(dyn_entry.d_val),
699            DT_SYMTAB => {
700                symtab_addr = Some(
701                    dyn_entry
702                        .d_val
703                        .checked_add(load_bias)
704                        .ok_or("DT_SYMTAB relocated address overflow")?,
705                )
706            }
707            DT_SYMENT => sym_ent = dyn_entry.d_val as usize,
708            DT_STRTAB => {
709                let _ = dyn_entry
710                    .d_val
711                    .checked_add(load_bias)
712                    .ok_or("DT_STRTAB relocated address overflow")?;
713            }
714            DT_RELR => {
715                relr_addr = Some(
716                    dyn_entry
717                        .d_val
718                        .checked_add(load_bias)
719                        .ok_or("DT_RELR relocated address overflow")?,
720                )
721            }
722            DT_RELRSZ => relr_size = dyn_entry.d_val as usize,
723            DT_RELRENT => relr_ent = dyn_entry.d_val as usize,
724            _ => {}
725        }
726    }
727
728    let mut relr_applied = 0usize;
729    if let Some(relr_base) = relr_addr {
730        relr_applied = apply_relr_relocations(user_as, load_bias, relr_base, relr_size, relr_ent)?;
731    } else if relr_size != 0 || relr_ent != 0 {
732        return Err("DT_RELR metadata present without DT_RELR base");
733    }
734    if rela_ent != core::mem::size_of::<Elf64Rela>() {
735        return Err("Unsupported DT_RELAENT size");
736    }
737    if sym_ent != core::mem::size_of::<Elf64Sym>() {
738        return Err("Unsupported DT_SYMENT size");
739    }
740    if pltrel_kind.is_some() && pltrel_kind != Some(DT_RELA as u64) {
741        return Err("Only DT_PLTREL=DT_RELA is supported");
742    }
743
744    let read_sym_entry = |sym_idx: u32| -> Result<Elf64Sym, &'static str> {
745        let symtab = symtab_addr.ok_or("Missing DT_SYMTAB for symbol relocations")?;
746        let sym_addr = symtab
747            .checked_add((sym_idx as u64) * (sym_ent as u64))
748            .ok_or("Symbol table address overflow")?;
749        let mut raw = [0u8; core::mem::size_of::<Elf64Sym>()];
750        read_user_mapped_bytes(user_as, sym_addr, &mut raw)?;
751        Ok(unsafe { core::ptr::read_unaligned(raw.as_ptr() as *const Elf64Sym) })
752    };
753
754    let resolve_symbol = |sym_idx: u32| -> Result<u64, &'static str> {
755        if sym_idx == 0 {
756            return Ok(0);
757        }
758        let sym = read_sym_entry(sym_idx)?;
759        if sym.st_shndx == 0 {
760            return Err("Undefined symbol relocation not supported");
761        }
762        sym.st_value
763            .checked_add(load_bias)
764            .ok_or("Symbol value relocation overflow")
765    };
766
767    let resolve_symbol_raw = |sym_idx: u32| -> Result<u64, &'static str> {
768        if sym_idx == 0 {
769            return Ok(0);
770        }
771        let sym = read_sym_entry(sym_idx)?;
772        Ok(sym.st_value)
773    };
774
775    let resolve_symbol_size = |sym_idx: u32| -> Result<u64, &'static str> {
776        if sym_idx == 0 {
777            return Ok(0);
778        }
779        let sym = read_sym_entry(sym_idx)?;
780        Ok(sym.st_size)
781    };
782
783    let apply_rela_table = |table_base: u64,
784                            table_size: usize,
785                            count_hint: Option<usize>|
786     -> Result<usize, &'static str> {
787        if table_size == 0 {
788            return Ok(0);
789        }
790        let mut count = table_size / rela_ent;
791        if let Some(hint) = count_hint {
792            count = core::cmp::min(count, hint);
793        }
794        let mut applied = 0usize;
795        for i in 0..count {
796            let rela_addr_i = table_base
797                .checked_add((i * rela_ent) as u64)
798                .ok_or("Rela table overflow")?;
799            let mut raw = [0u8; core::mem::size_of::<Elf64Rela>()];
800            read_user_mapped_bytes(user_as, rela_addr_i, &mut raw)?;
801            // SAFETY: raw has exact size of Elf64Rela.
802            let rela = unsafe { core::ptr::read_unaligned(raw.as_ptr() as *const Elf64Rela) };
803
804            let r_type = (rela.r_info & 0xffff_ffff) as u32;
805            let r_sym = (rela.r_info >> 32) as u32;
806            let target = rela
807                .r_offset
808                .checked_add(load_bias)
809                .ok_or("Relocation target overflow")?;
810            if target >= USER_ADDR_MAX {
811                return Err("Relocation target outside user space");
812            }
813
814            let value = match r_type {
815                R_X86_64_RELATIVE => {
816                    if r_sym != 0 {
817                        return Err("R_X86_64_RELATIVE with non-zero symbol");
818                    }
819                    (load_bias as i128)
820                        .checked_add(rela.r_addend as i128)
821                        .ok_or("Relocation value overflow")?
822                }
823                R_X86_64_GLOB_DAT | R_X86_64_JUMP_SLOT | R_X86_64_64 => {
824                    let sym_val = resolve_symbol(r_sym)? as i128;
825                    sym_val
826                        .checked_add(rela.r_addend as i128)
827                        .ok_or("Relocation value overflow")?
828                }
829                R_X86_64_COPY => {
830                    let sym_val = resolve_symbol(r_sym)?;
831                    if sym_val == 0 {
832                        continue;
833                    }
834                    let sym_sz = resolve_symbol_size(r_sym)?;
835                    if sym_sz > 0 && sym_val < USER_ADDR_MAX {
836                        let mut tmp = [0u8; 256];
837                        let mut off = 0usize;
838                        while off < sym_sz as usize {
839                            let chunk = core::cmp::min(256, sym_sz as usize - off);
840                            read_user_mapped_bytes(
841                                user_as,
842                                sym_val + off as u64,
843                                &mut tmp[..chunk],
844                            )?;
845                            write_user_mapped_bytes(user_as, target + off as u64, &tmp[..chunk])?;
846                            off += chunk;
847                        }
848                    }
849                    applied += 1;
850                    continue;
851                }
852                R_X86_64_TPOFF64 => {
853                    let sym_val = if r_sym != 0 {
854                        resolve_symbol_raw(r_sym)? as i128
855                    } else {
856                        0i128
857                    };
858                    sym_val
859                        .checked_add(rela.r_addend as i128)
860                        .ok_or("TPOFF64 value overflow")?
861                }
862                R_X86_64_IRELATIVE => (load_bias as i128)
863                    .checked_add(rela.r_addend as i128)
864                    .ok_or("IRELATIVE value overflow")?,
865                _ => {
866                    log::warn!("[elf] Unsupported relocation type {}", r_type);
867                    continue;
868                }
869            };
870            if value < 0 || value > u64::MAX as i128 {
871                return Err("Relocation value out of range");
872            }
873            let val_u64 = value as u64;
874            // Read back before write for diagnosis
875            if applied < 5 {
876                let r_addend_copy = rela.r_addend; // copy packed field to local
877                let mut before = [0u8; 8];
878                let _ = read_user_mapped_bytes(user_as, target, &mut before);
879                let before_val = u64::from_le_bytes(before);
880                crate::e9_println!(
881                    "[reloc] [{i}] r_type={} target={:#x} r_addend={:#x} value={:#x} before={:#x}",
882                    r_type,
883                    target,
884                    r_addend_copy,
885                    val_u64,
886                    before_val
887                );
888            }
889            write_user_mapped_bytes(user_as, target, &val_u64.to_le_bytes())?;
890            // Read back after write for diagnosis
891            if applied < 5 {
892                let mut after = [0u8; 8];
893                let _ = read_user_mapped_bytes(user_as, target, &mut after);
894                let after_val = u64::from_le_bytes(after);
895                crate::e9_println!(
896                    "[reloc] [{i}] after_write={:#x} (expected={:#x})",
897                    after_val,
898                    val_u64
899                );
900            }
901            // Catch any relocation that writes a kernel-range address into user space.
902            if val_u64 >= 0xffff_8000_0000_0000 {
903                let r_addend_copy = rela.r_addend;
904                crate::e9_println!(
905                    "[reloc-KERNEL-ADDR] [{i}] r_type={} target={:#x} r_addend={:#x} val={:#x} bias={:#x}",
906                    r_type, target, r_addend_copy, val_u64, load_bias
907                );
908            }
909            applied += 1;
910        }
911        Ok(applied)
912    };
913
914    let mut total_applied = 0usize;
915    crate::e9_println!(
916        "[reloc] apply_dynamic_relocations: bias={:#x} rela_addr={:?} rela_size={} rela_count={:?}",
917        load_bias,
918        rela_addr,
919        rela_size,
920        rela_count_hint
921    );
922    if let Some(rela_base) = rela_addr {
923        total_applied += apply_rela_table(rela_base, rela_size, rela_count_hint)?;
924    }
925    if let Some(jmprel_base) = jmprel_addr {
926        total_applied += apply_rela_table(jmprel_base, jmprel_size, None)?;
927    }
928
929    if total_applied > 0 {
930        crate::e9_println!(
931            "[reloc] applied {} RELA relocations (bias={:#x})",
932            total_applied,
933            load_bias
934        );
935    }
936    if relr_applied > 0 {
937        log::debug!("[elf] Applied {} RELR relocations", relr_applied);
938    }
939    Ok(())
940}
941
942// ---------------------------------------------------------------------------
943// Loading
944// ---------------------------------------------------------------------------
945
946/// Convert ELF p_flags to VmaFlags.
947fn elf_flags_to_vma(p_flags: u32) -> VmaFlags {
948    VmaFlags {
949        readable: p_flags & PF_R != 0,
950        writable: p_flags & PF_W != 0,
951        executable: p_flags & PF_X != 0,
952        user_accessible: true,
953    }
954}
955
956/// Load a single PT_LOAD segment into the given address space.
957///
958/// Allocates physical frames, maps them with appropriate permissions, and
959/// copies file data into the mapping. BSS (memsz > filesz) is already
960/// zero-filled because `map_region` zeroes newly allocated frames.
961fn load_segment(
962    user_as: &AddressSpace,
963    elf_data: &[u8],
964    phdr: &Elf64Phdr,
965    load_bias: u64,
966) -> Result<(), &'static str> {
967    let vaddr = phdr
968        .p_vaddr
969        .checked_add(load_bias)
970        .ok_or("PT_LOAD relocated vaddr overflow")?;
971    let memsz = phdr.p_memsz;
972    let filesz = phdr.p_filesz;
973    let offset = phdr.p_offset;
974
975    // Validate addresses are in user space
976    if vaddr >= USER_ADDR_MAX {
977        return Err("PT_LOAD vaddr outside user space");
978    }
979    let end = vaddr
980        .checked_add(memsz)
981        .ok_or("PT_LOAD vaddr+memsz overflows")?;
982    if end > USER_ADDR_MAX {
983        return Err("PT_LOAD segment extends past user space");
984    }
985
986    // Validate file region
987    let file_end = (offset as usize)
988        .checked_add(filesz as usize)
989        .ok_or("PT_LOAD offset+filesz overflows")?;
990    if file_end > elf_data.len() {
991        return Err("PT_LOAD file data extends past ELF");
992    }
993
994    // Calculate page-aligned mapping
995    let page_start = vaddr & !0xFFF;
996    let page_end = (end + 0xFFF) & !0xFFF;
997    let page_count = ((page_end - page_start) / 4096) as usize;
998
999    // Map writable during copy, then restore final ELF flags.
1000    let actual_flags = elf_flags_to_vma(phdr.p_flags);
1001    let load_flags = VmaFlags {
1002        readable: true,
1003        writable: true, // Need write access to copy data in
1004        executable: actual_flags.executable,
1005        user_accessible: true,
1006    };
1007
1008    let vma_type = if actual_flags.executable {
1009        VmaType::Code
1010    } else {
1011        VmaType::Anonymous
1012    };
1013    log::debug!(
1014        "[elf] map PT_LOAD: start={:#x} pages={} filesz={:#x}",
1015        page_start,
1016        page_count,
1017        filesz
1018    );
1019    user_as.map_region(
1020        page_start,
1021        page_count,
1022        load_flags,
1023        vma_type,
1024        VmaPageSize::Small,
1025    )?;
1026
1027    // Copy file data into the mapped pages.
1028    // We translate each page through the user AS to find its physical frame,
1029    // then access it via HHDM to write.
1030    if filesz > 0 {
1031        let src = &elf_data[offset as usize..file_end];
1032        let mut copied = 0usize;
1033
1034        while copied < src.len() {
1035            let dst_vaddr = vaddr + copied as u64;
1036            let page_offset = (dst_vaddr & 0xFFF) as usize;
1037            let chunk = core::cmp::min(src.len() - copied, 4096 - page_offset);
1038
1039            // Translate user virtual address → physical → HHDM virtual
1040            let phys = user_as
1041                .translate(VirtAddr::new(dst_vaddr))
1042                .ok_or("Failed to translate user page after mapping")?;
1043            let hhdm_ptr = crate::memory::phys_to_virt(phys.as_u64()) as *mut u8;
1044
1045            // SAFETY: hhdm_ptr points to a freshly mapped, zeroed frame via HHDM.
1046            // The source slice is validated above.
1047            unsafe {
1048                core::ptr::copy_nonoverlapping(src.as_ptr().add(copied), hhdm_ptr, chunk);
1049            }
1050
1051            copied += chunk;
1052        }
1053    }
1054
1055    // Tighten PTE permissions after copy.
1056    apply_segment_permissions(user_as, page_start, page_count, actual_flags)?;
1057
1058    log::debug!(
1059        "  PT_LOAD: {:#x}..{:#x} ({} pages, file {:#x}+{:#x}, flags {:?})",
1060        page_start,
1061        page_end,
1062        page_count,
1063        offset,
1064        filesz,
1065        actual_flags,
1066    );
1067
1068    Ok(())
1069}
1070
1071// ---------------------------------------------------------------------------
1072// Task creation with IRETQ trampoline
1073// ---------------------------------------------------------------------------
1074
1075/// Parameters for the Ring 3 trampoline, stored in a static so the
1076/// Trampoline that switches to user address space and does IRETQ to Ring 3.
1077///
1078/// Parameters (entry point, stack top, arg0, address space) are read from the
1079/// *current task* so that each ELF task carries its own copy.  This makes the
1080/// trampoline safe under SMP: two tasks can run their trampolines concurrently
1081/// on different CPUs without any shared mutable state.
1082extern "C" fn elf_ring3_trampoline() -> ! {
1083    use crate::arch::x86_64::gdt;
1084    use core::sync::atomic::Ordering;
1085
1086    crate::e9_println!("[trace][elf] ring3_trampoline before current_task");
1087    let task = crate::process::scheduler::current_task_clone_spin_debug("ring3_trampoline")
1088        .expect("elf_ring3_trampoline: no current task");
1089    crate::e9_println!(
1090        "[trace][elf] ring3_trampoline enter tid={} name={}",
1091        task.id.as_u64(),
1092        task.name
1093    );
1094    crate::serial_println!(
1095        "[trace][elf] ring3_trampoline enter tid={} name={}",
1096        task.id.as_u64(),
1097        task.name
1098    );
1099    task.set_resume_kind(crate::process::task::ResumeKind::IretFrame);
1100
1101    let user_rip = task.trampoline_entry.load(Ordering::Acquire);
1102    let user_rsp = task.trampoline_stack_top.load(Ordering::Acquire);
1103    let user_arg0 = task.trampoline_arg0.load(Ordering::Acquire);
1104    crate::e9_println!(
1105        "[trace][elf] ring3_trampoline args tid={} rip={:#x} rsp={:#x} arg0={:#x}",
1106        task.id.as_u64(),
1107        user_rip,
1108        user_rsp,
1109        user_arg0
1110    );
1111    crate::serial_println!(
1112        "[trace][elf] ring3_trampoline args tid={} rip={:#x} rsp={:#x}",
1113        task.id.as_u64(),
1114        user_rip,
1115        user_rsp
1116    );
1117
1118    // Probe: read GOT entries via HHDM before switching to user AS.
1119    // This is the last kernel-owned moment before user execution begins.
1120    // If values here are wrong, the bug is in load/relocation, not in
1121    // something that happens after this point.
1122    {
1123        // SAFETY: Kernel still holds the boot/kernel CR3. HHDM is valid.
1124        unsafe {
1125            let as_ref = task.process.address_space_arc();
1126            let task_name: &str = &task.name;
1127            for test_off in [0x12920u64, 0x12928u64, 0x12930u64] {
1128                let vaddr = 0x100000000u64.wrapping_add(test_off);
1129                if let Some(phys) = as_ref.translate(VirtAddr::new(vaddr)) {
1130                    let ptr = crate::memory::phys_to_virt(phys.as_u64()) as *const u64;
1131                    let val = core::ptr::read_unaligned(ptr);
1132                    crate::e9_println!(
1133                        "[trampoline-got] tid={} name={} GOT[{:#x}]=phys={:#x} val={:#x}",
1134                        task.id.as_u64(),
1135                        task_name,
1136                        vaddr,
1137                        phys.as_u64(),
1138                        val
1139                    );
1140                } else {
1141                    crate::e9_println!(
1142                        "[trampoline-got] tid={} name={} GOT[{:#x}]=<not mapped>",
1143                        task.id.as_u64(),
1144                        task_name,
1145                        vaddr
1146                    );
1147                }
1148            }
1149        }
1150    }
1151
1152    // Switch to the user address space stored in the task.
1153    // SAFETY: The address space was set up during task creation and is valid.
1154    unsafe {
1155        let as_ref = task.process.address_space_arc();
1156        as_ref.switch_to();
1157    }
1158    crate::e9_println!(
1159        "[trace][elf] ring3_trampoline switch_to done tid={}",
1160        task.id.as_u64()
1161    );
1162    crate::serial_println!(
1163        "[trace][elf] ring3_trampoline switch_to done tid={}",
1164        task.id.as_u64()
1165    );
1166
1167    let user_cs = gdt::user_code_selector().0 as u64;
1168    let user_ss = gdt::user_data_selector().0 as u64;
1169    let user_rflags: u64 = 0x202; // IF=1, reserved bit 1 = 1
1170    crate::e9_println!(
1171        "[trace][elf] ring3_trampoline iret tid={} cs={:#x} ss={:#x} rflags={:#x}",
1172        task.id.as_u64(),
1173        user_cs,
1174        user_ss,
1175        user_rflags
1176    );
1177    crate::serial_println!(
1178        "[trace][elf] ring3_trampoline iret tid={} rip={:#x} rsp={:#x}",
1179        task.id.as_u64(),
1180        user_rip,
1181        user_rsp
1182    );
1183
1184    // ----- Pre-iret LAPIC timer diagnostic -----
1185    // Verify that the APIC timer is actually running on this CPU before we
1186    // enter Ring 3 (if it is not, no timer tick = no heartbeat = silent hang).
1187    unsafe {
1188        let lvt = crate::arch::x86_64::apic::read_reg(crate::arch::x86_64::apic::REG_LVT_TIMER);
1189        let init_cnt =
1190            crate::arch::x86_64::apic::read_reg(crate::arch::x86_64::apic::REG_TIMER_INIT);
1191        let cur_cnt =
1192            crate::arch::x86_64::apic::read_reg(crate::arch::x86_64::apic::REG_TIMER_CURRENT);
1193        let rflags_now: u64;
1194        core::arch::asm!("pushfq; pop {}", out(reg) rflags_now, options(nostack));
1195        crate::e9_println!(
1196            "[trace][elf] pre-iret LAPIC: LVT={:#x} init={} cur={} IF={}",
1197            lvt,
1198            init_cnt,
1199            cur_cnt,
1200            (rflags_now >> 9) & 1
1201        );
1202        if lvt & (1 << 16) != 0 {
1203            crate::e9_println!(
1204                "[trace][elf] WARNING: LAPIC timer is MASKED (bit 16 set) : no ticks will fire!"
1205            );
1206        }
1207        if init_cnt == 0 {
1208            crate::e9_println!(
1209                "[trace][elf] WARNING: LAPIC timer init_count=0 : timer not started!"
1210            );
1211        }
1212    }
1213
1214    crate::arch::x86_64::ring3_diag::validate_ring3_state(
1215        user_rip,
1216        user_rsp,
1217        user_cs as u16,
1218        user_ss as u16,
1219    );
1220
1221    crate::e9_println!(
1222        "[elf] PRE-IRETQ tid={} rip={:#x} rsp={:#x} rflags={:#x}",
1223        task.id.as_u64(),
1224        user_rip,
1225        user_rsp,
1226        user_rflags
1227    );
1228
1229    // Probe E9 Rust : validate_ring3_state passé, on entre dans l'asm.
1230    // Si '0' est visible mais pas '1', le compilateur a inséré du code entre
1231    // les deux qui a planté (peu probable, mais élimine cette hypothèse).
1232    crate::e9_println!(
1233        "E9[0] pre-asm rip={:#x} rsp={:#x} cs={:#x} ss={:#x}",
1234        user_rip,
1235        user_rsp,
1236        user_cs,
1237        user_ss,
1238    );
1239
1240    // SAFETY: Valid user mappings have been set up. IRETQ switches to Ring 3.
1241    //
1242    // Interrupts must be masked in the final kernel instructions before
1243    // `swapgs ; iretq`. Otherwise a timer IRQ can land after `swapgs` but
1244    // before `iretq`, with `CS=0x8` and `GS=user`, and the first `gs:[..]`
1245    // access in the handler faults in the swapgs->iretq window.
1246    //
1247    //  E9-hack probes ==========================================================================================================================================================================
1248    // Each `out 0xe9, al` writes an ASCII character to QEMU's E9 port
1249    // (visible with `-debugcon stdio` or `-debugcon file:e9.log`).
1250    // The push/pop rax around each probe protects registers allocated by the
1251    // compiler for the `in(reg)` constraints; the net effect on RSP is zero.
1252    //
1253    //   '1' (0x31): start of the asm block, input registers in place
1254    //   '2' (0x32): iretq frame fully on stack (5 words)
1255    //   '3' (0x33): RDI loaded with arg0, just before SWAPGS
1256    //   '4' (0x34): SWAPGS done : if CPU crashes on iretq the last char is '4'
1257    //
1258    // If output stops at:
1259    //   '1' → RSP/alignment problem before any pushes
1260    //   '2' → a push faulted (kernel mapping broken?)
1261    //   '3' → bug in arg0 value or in RDI
1262    //   '4' → iretq is indeed triple-faulting (GDT/paging/TSS issue)
1263    //   nothing → E9 port not enabled in QEMU (add `-debugcon stdio`)
1264    unsafe {
1265        core::arch::asm!(
1266            // Close the IRQ window before touching GS. `iretq` restores IF=1
1267            // from the user RFLAGS frame, so user mode still starts with
1268            // interrupts enabled.
1269            "cli",
1270
1271            //  Probe 1 : entrée dans le bloc asm ================================================================================
1272            // Les registres d'entrée sont déjà alloués par le compilateur ;
1273            // push/pop rax les laisse intacts.
1274            "push rax",
1275            "mov al, 0x31",     // '1'
1276            "out 0xe9, al",
1277            "pop rax",
1278
1279            //  Construction de la frame iretq ==========================================================================================
1280            // Ordre requis par IRETQ (dépilé dans l'ordre inverse) :
1281            //   [RSP+32] SS
1282            //   [RSP+24] user RSP
1283            //   [RSP+16] RFLAGS
1284            //   [RSP+8]  CS
1285            //   [RSP+0]  RIP  ← RSP ici après les 5 push
1286            "push {ss}",
1287            "push {rsp_val}",
1288            "push {rflags}",
1289            "push {cs}",
1290            "push {rip}",
1291
1292            //  Probe 2 : frame iretq complète ==========================================================================================
1293            "push rax",
1294            "mov al, 0x32",     // '2'
1295            "out 0xe9, al",
1296            "pop rax",
1297
1298            //  Chargement de arg0 dans RDI ====================================================================================================
1299            "mov rdi, {arg0}",
1300
1301            //  Probe 3 : RDI chargé, juste avant SWAPGS ==================================================
1302            "push rax",
1303            "mov al, 0x33",     // '3'
1304            "out 0xe9, al",
1305            "pop rax",
1306
1307            //  SWAPGS : GS.base kernel ↔ GS.base user ============================================================
1308            // Après cette instruction, GS pointe vers le bloc per-thread user.
1309            // Le push/pop ci-dessous ne touche pas GS, il est sûr.
1310            "swapgs",
1311
1312            //  Probe 4 : SWAPGS réussi, IRETQ imminent ============================================================
1313            // Si le double-fault survient sur iretq, '4' sera le DERNIER
1314            // caractère visible dans la console E9.
1315            "push rax",
1316            "mov al, 0x34",     // '4'
1317            "out 0xe9, al",
1318            "pop rax",
1319
1320            //  IRETQ : point de non-retour ====================================================================================================
1321            "iretq",
1322
1323            ss      = in(reg) user_ss,
1324            rsp_val = in(reg) user_rsp,
1325            rflags  = in(reg) user_rflags,
1326            cs      = in(reg) user_cs,
1327            rip     = in(reg) user_rip,
1328            arg0    = in(reg) user_arg0,
1329            options(noreturn),
1330        );
1331    }
1332}
1333
1334// ---------------------------------------------------------------------------
1335// Public API
1336// ---------------------------------------------------------------------------
1337
1338/// Load an ELF64 binary and schedule it as a Ring 3 user task.
1339///
1340/// # Arguments
1341/// * `elf_data` - Raw ELF file bytes (must remain valid until load completes).
1342/// * `name` - Name for the task (debugging purposes).
1343///
1344/// # Returns
1345/// `Ok(())` on success, `Err` with a static error message on failure.
1346pub fn load_and_run_elf(elf_data: &[u8], name: &'static str) -> Result<TaskId, &'static str> {
1347    load_and_run_elf_with_caps(elf_data, name, &[])
1348}
1349
1350/// Performs the load and run elf with caps operation.
1351pub fn load_and_run_elf_with_caps(
1352    elf_data: &[u8],
1353    name: &'static str,
1354    seed_caps: &[Capability],
1355) -> Result<TaskId, &'static str> {
1356    crate::e9_println!(
1357        "[trace][elf] load_and_run_elf enter name={} size={}",
1358        name,
1359        elf_data.len()
1360    );
1361    let task = load_elf_task_with_caps(elf_data, name, seed_caps)?;
1362    let task_id = task.id;
1363    let runtime_entry = task
1364        .trampoline_entry
1365        .load(core::sync::atomic::Ordering::Acquire);
1366    crate::e9_println!(
1367        "[trace][elf] load_and_run_elf add_task begin tid={} entry={:#x}",
1368        task_id.as_u64(),
1369        runtime_entry
1370    );
1371    crate::process::add_task(task);
1372    crate::e9_println!(
1373        "[trace][elf] load_and_run_elf add_task done tid={}",
1374        task_id.as_u64()
1375    );
1376
1377    log::info!(
1378        "[elf] Task '{}' created: entry={:#x}, stack_top={:#x}",
1379        name,
1380        runtime_entry,
1381        USER_STACK_TOP,
1382    );
1383
1384    Ok(task_id)
1385}
1386
1387const AT_PHDR: u64 = 3;
1388const AT_PHENT: u64 = 4;
1389const AT_PHNUM: u64 = 5;
1390const AT_PAGESZ: u64 = 6;
1391const AT_BASE: u64 = 7;
1392const AT_ENTRY: u64 = 9;
1393const AT_RANDOM: u64 = 25;
1394
1395/// Performs the push auxv operation.
1396fn push_auxv(user_as: &AddressSpace, sp: &mut u64, tag: u64, val: u64) -> Result<(), &'static str> {
1397    *sp -= 8;
1398    write_user_u64(user_as, *sp, val)?;
1399    *sp -= 8;
1400    write_user_u64(user_as, *sp, tag)?;
1401    Ok(())
1402}
1403
1404/// Performs the setup boot user stack operation.
1405fn setup_boot_user_stack(
1406    user_as: &AddressSpace,
1407    name: &str,
1408    phdr_vaddr: u64,
1409    phent: u16,
1410    phnum: u16,
1411    program_entry: u64,
1412    interp_base: Option<u64>,
1413) -> Result<u64, &'static str> {
1414    let mut sp = USER_STACK_TOP;
1415
1416    let name_nul_len = (name.len() + 1) as u64;
1417    sp -= name_nul_len;
1418    let argv0_ptr = sp;
1419    write_user_mapped_bytes(user_as, sp, name.as_bytes())?;
1420    write_user_mapped_bytes(user_as, sp + name.len() as u64, &[0])?;
1421
1422    sp -= 16;
1423    let random_ptr = sp;
1424    write_user_mapped_bytes(user_as, sp, &[0x42u8; 16])?;
1425
1426    sp &= !0xF;
1427
1428    // AT_NULL
1429    push_auxv(user_as, &mut sp, 0, 0)?;
1430    push_auxv(user_as, &mut sp, AT_RANDOM, random_ptr)?;
1431    push_auxv(user_as, &mut sp, AT_ENTRY, program_entry)?;
1432    if let Some(base) = interp_base {
1433        push_auxv(user_as, &mut sp, AT_BASE, base)?;
1434    }
1435    push_auxv(user_as, &mut sp, AT_PAGESZ, 4096)?;
1436    push_auxv(user_as, &mut sp, AT_PHNUM, phnum as u64)?;
1437    push_auxv(user_as, &mut sp, AT_PHENT, phent as u64)?;
1438    push_auxv(user_as, &mut sp, AT_PHDR, phdr_vaddr)?;
1439
1440    // envp NULL terminator
1441    sp -= 8;
1442    write_user_u64(user_as, sp, 0)?;
1443    // argv[0], argv NULL terminator
1444    sp -= 8;
1445    write_user_u64(user_as, sp, 0)?;
1446    sp -= 8;
1447    write_user_u64(user_as, sp, argv0_ptr)?;
1448    // argc
1449    sp -= 8;
1450    write_user_u64(user_as, sp, 1)?;
1451
1452    // System V ABI: %rsp % 16 == 0 at process entry
1453    sp &= !0xF;
1454    Ok(sp)
1455}
1456
1457/// Performs the load elf task with caps operation.
1458pub fn load_elf_task_with_caps(
1459    elf_data: &[u8],
1460    name: &'static str,
1461    seed_caps: &[Capability],
1462) -> Result<Arc<Task>, &'static str> {
1463    crate::e9_println!(
1464        "[trace][elf] load_elf_task enter name={} size={}",
1465        name,
1466        elf_data.len()
1467    );
1468    log::info!("[elf] Loading ELF '{}'...", name);
1469
1470    // Step 1: Parse and validate ELF header
1471    crate::e9_println!("[trace][elf] load_elf_task parse_header begin");
1472    let header = parse_header(elf_data)?;
1473    crate::e9_println!(
1474        "[trace][elf] load_elf_task parse_header ok type={}",
1475        if header.e_type == ET_DYN {
1476            "ET_DYN"
1477        } else {
1478            "ET_EXEC"
1479        }
1480    );
1481    // Step 2: Create user address space
1482    crate::e9_println!("[trace][elf] load_elf_task user_as begin");
1483    let user_as = Arc::new(AddressSpace::new_user()?);
1484    crate::e9_println!("[trace][elf] load_elf_task user_as done");
1485
1486    let phdrs: Vec<Elf64Phdr> = program_headers(elf_data, &header).collect();
1487    let interp_path = parse_interp_path(elf_data, &phdrs)?;
1488    let (load_bias, entry) = compute_load_bias_and_entry(&user_as, &header, &phdrs)?;
1489    let phdr_vaddr = find_relocated_phdr_vaddr(&header, &phdrs, load_bias)?;
1490
1491    let phnum = header.e_phnum;
1492    crate::e9_println!(
1493        "[trace][elf] load_elf_task layout entry={:#x} bias={:#x} phdrs={}",
1494        entry,
1495        load_bias,
1496        phnum
1497    );
1498    log::info!(
1499        "[elf] ELF '{}': type={}, entry={:#x}, bias={:#x}, {} program headers",
1500        name,
1501        if header.e_type == ET_DYN {
1502            "ET_DYN"
1503        } else {
1504            "ET_EXEC"
1505        },
1506        entry,
1507        load_bias,
1508        phnum,
1509    );
1510
1511    // Step 3: Load all PT_LOAD segments
1512    let mut load_count = 0u32;
1513    for phdr in phdrs.iter() {
1514        if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1515            load_segment(&user_as, elf_data, phdr, load_bias)?;
1516            load_count += 1;
1517        }
1518    }
1519    if interp_path.is_none() {
1520        apply_dynamic_relocations(&user_as, &phdrs, header.e_type, load_bias)?;
1521    }
1522
1523    // Diagnostic: read back key GOT entries after relocation to verify they were applied.
1524    for test_offset in [0x12920u64, 0x12928u64, 0x12930u64] {
1525        let vaddr = load_bias.wrapping_add(test_offset);
1526        if vaddr < USER_ADDR_MAX {
1527            if let Some(phys) = user_as.translate(VirtAddr::new(vaddr)) {
1528                let ptr = crate::memory::phys_to_virt(phys.as_u64()) as *const u64;
1529                // SAFETY: HHDM access to a mapped user page, owned exclusively during ELF loading.
1530                let val = unsafe { core::ptr::read_unaligned(ptr) };
1531                crate::e9_println!(
1532                    "[reloc-check] GOT[{:#x}]=phys={:#x} val={:#x} ({})",
1533                    vaddr,
1534                    phys.as_u64(),
1535                    val,
1536                    name
1537                );
1538            } else {
1539                crate::e9_println!("[reloc-check] GOT[{:#x}] = <not mapped> ({})", vaddr, name);
1540            }
1541        }
1542    }
1543
1544    crate::e9_println!(
1545        "[trace][elf] load_elf_task segments_done count={} has_interp={}",
1546        load_count,
1547        interp_path.is_some()
1548    );
1549    log::info!("[elf] Loaded {} PT_LOAD segment(s)", load_count);
1550
1551    let mut runtime_entry = entry;
1552    let mut interp_base: Option<u64> = None;
1553    if let Some(path) = interp_path {
1554        let interp_data = read_elf_from_vfs(path)?;
1555        let interp_header = parse_header(&interp_data)?;
1556        let interp_phdrs: Vec<Elf64Phdr> = program_headers(&interp_data, &interp_header).collect();
1557        if parse_interp_path(&interp_data, &interp_phdrs)?.is_some() {
1558            return Err("Nested PT_INTERP is not supported");
1559        }
1560        let (interp_bias, interp_entry) =
1561            compute_load_bias_and_entry(&user_as, &interp_header, &interp_phdrs)?;
1562        let (interp_min_vaddr, _) = compute_load_bounds(&interp_phdrs)?;
1563        let mut interp_load_count = 0u32;
1564        for phdr in interp_phdrs.iter() {
1565            if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1566                load_segment(&user_as, &interp_data, phdr, interp_bias)?;
1567                interp_load_count += 1;
1568            }
1569        }
1570        apply_dynamic_relocations(&user_as, &interp_phdrs, interp_header.e_type, interp_bias)?;
1571        runtime_entry = interp_entry;
1572        interp_base = Some(interp_min_vaddr.saturating_add(interp_bias));
1573        log::info!(
1574            "[elf] PT_INTERP '{}' loaded: {} PT_LOAD, entry={:#x}",
1575            path,
1576            interp_load_count,
1577            runtime_entry
1578        );
1579    }
1580
1581    // TLS setup (Variant II: data at negative offsets from FS:0)
1582    let mut user_fs_base_val = 0u64;
1583    if let Some(tls) = phdrs.iter().find(|p| p.p_type == PT_TLS) {
1584        let tls_memsz = tls.p_memsz;
1585        let tls_filesz = tls.p_filesz;
1586        let tls_align = core::cmp::max(tls.p_align, 8).next_power_of_two();
1587        let aligned_memsz = (tls_memsz + tls_align - 1) & !(tls_align - 1);
1588        let total_size = aligned_memsz + 8;
1589        let n_tls_pages = ((total_size + 4095) / 4096) as usize;
1590        let tls_flags = VmaFlags {
1591            readable: true,
1592            writable: true,
1593            executable: false,
1594            user_accessible: true,
1595        };
1596        let tls_base = user_as
1597            .find_free_vma_range(0x7FFF_E000_0000, n_tls_pages, VmaPageSize::Small)
1598            .ok_or("No space for TLS block")?;
1599        user_as.map_region(
1600            tls_base,
1601            n_tls_pages,
1602            tls_flags,
1603            VmaType::Anonymous,
1604            VmaPageSize::Small,
1605        )?;
1606        if tls_filesz > 0 {
1607            let src_off = tls.p_offset as usize;
1608            let src_end = src_off + tls_filesz as usize;
1609            if src_end <= elf_data.len() {
1610                write_user_mapped_bytes(&user_as, tls_base, &elf_data[src_off..src_end])?;
1611            }
1612        }
1613        let tp = tls_base + aligned_memsz;
1614        write_user_u64(&user_as, tp, tp)?;
1615        user_fs_base_val = tp;
1616    }
1617
1618    // Step 4: Map user stack
1619    let stack_flags = VmaFlags {
1620        readable: true,
1621        writable: true,
1622        executable: false,
1623        user_accessible: true,
1624    };
1625    user_as.map_region(
1626        USER_STACK_BASE,
1627        USER_STACK_PAGES,
1628        stack_flags,
1629        VmaType::Stack,
1630        VmaPageSize::Small,
1631    )?;
1632    log::debug!(
1633        "[elf] User stack: {:#x}..{:#x} ({} pages)",
1634        USER_STACK_BASE,
1635        USER_STACK_TOP,
1636        USER_STACK_PAGES,
1637    );
1638
1639    let boot_sp = setup_boot_user_stack(
1640        &user_as,
1641        name,
1642        phdr_vaddr,
1643        header.e_phentsize,
1644        header.e_phnum,
1645        entry,
1646        interp_base,
1647    )?;
1648
1649    // Step 5: Create kernel task : trampoline params are stored inside the task
1650    // itself so that concurrent SMP execution of multiple trampolines is safe.
1651    crate::e9_println!(
1652        "[trace][elf] load_elf_task kstack_begin size={}",
1653        Task::DEFAULT_STACK_SIZE
1654    );
1655    let kernel_stack = KernelStack::allocate(Task::DEFAULT_STACK_SIZE)?;
1656    crate::e9_println!(
1657        "[trace][elf] load_elf_task kstack_done virt={:#x} top={:#x}",
1658        kernel_stack.virt_base.as_u64(),
1659        kernel_stack.virt_base.as_u64() + kernel_stack.size as u64
1660    );
1661    let context = CpuContext::new(elf_ring3_trampoline as *const () as u64, &kernel_stack);
1662    let (pid, tid, tgid) = Task::allocate_process_ids();
1663    let fpu_state = crate::process::task::ExtendedState::new();
1664    let xcr0_mask = fpu_state.xcr0_mask;
1665
1666    let task = Arc::new(Task {
1667        id: TaskId::new(),
1668        pid,
1669        tid,
1670        tgid,
1671        pgid: core::sync::atomic::AtomicU32::new(pid),
1672        sid: core::sync::atomic::AtomicU32::new(pid),
1673        uid: core::sync::atomic::AtomicU32::new(0),
1674        euid: core::sync::atomic::AtomicU32::new(0),
1675        gid: core::sync::atomic::AtomicU32::new(0),
1676        egid: core::sync::atomic::AtomicU32::new(0),
1677        state: core::sync::atomic::AtomicU8::new(TaskState::Ready as u8),
1678        priority: TaskPriority::Normal,
1679        context: SyncUnsafeCell::new(context),
1680        resume_kind: SyncUnsafeCell::new(ResumeKind::RetFrame),
1681        interrupt_rsp: core::sync::atomic::AtomicU64::new(0),
1682        kernel_stack,
1683        user_stack: None,
1684        name,
1685        process: Arc::new(crate::process::process::Process::new(pid, user_as)),
1686        pending_signals: super::signal::SignalSet::new(),
1687        blocked_signals: super::signal::SignalSet::new(),
1688        irq_signal_delivery_blocked: core::sync::atomic::AtomicBool::new(false),
1689        signal_stack: SyncUnsafeCell::new(None),
1690        itimers: super::timer::ITimers::new(),
1691        wake_pending: core::sync::atomic::AtomicBool::new(false),
1692        wake_deadline_ns: core::sync::atomic::AtomicU64::new(0),
1693        trampoline_entry: core::sync::atomic::AtomicU64::new(runtime_entry),
1694        trampoline_stack_top: core::sync::atomic::AtomicU64::new(boot_sp),
1695        trampoline_arg0: core::sync::atomic::AtomicU64::new(0),
1696        ticks: core::sync::atomic::AtomicU64::new(0),
1697        sched_policy: crate::process::task::SyncUnsafeCell::new(Task::default_sched_policy(
1698            TaskPriority::Normal,
1699        )),
1700        home_cpu: core::sync::atomic::AtomicUsize::new(usize::MAX),
1701        vruntime: core::sync::atomic::AtomicU64::new(0),
1702        fair_rq_generation: core::sync::atomic::AtomicU64::new(0),
1703        fair_on_rq: core::sync::atomic::AtomicBool::new(false),
1704        clear_child_tid: core::sync::atomic::AtomicU64::new(0),
1705        user_fs_base: core::sync::atomic::AtomicU64::new(user_fs_base_val),
1706        fpu_state: crate::process::task::SyncUnsafeCell::new(fpu_state),
1707        xcr0_mask: core::sync::atomic::AtomicU64::new(xcr0_mask),
1708        rt_link: intrusive_collections::LinkedListLink::new(),
1709    });
1710
1711    crate::e9_println!(
1712        "[trace][elf] load_elf_task task_built tid={} pid={} entry={:#x} sp={:#x}",
1713        task.id.as_u64(),
1714        task.pid,
1715        runtime_entry,
1716        boot_sp
1717    );
1718    // Seed capabilities into the new task (before scheduling).
1719    let mut bootstrap_handle: Option<u64> = None;
1720    if !seed_caps.is_empty() {
1721        let caps = unsafe { &mut *task.process.capabilities.get() };
1722        for cap in seed_caps {
1723            let id = caps.insert(cap.clone());
1724            if bootstrap_handle.is_none()
1725                && cap.resource_type == crate::capability::ResourceType::Volume
1726            {
1727                bootstrap_handle = Some(id.as_u64());
1728            }
1729        }
1730    }
1731
1732    // Setup stdin/stdout/stderr (fd 0/1/2) pointing to /dev/console
1733    // SAFETY: task is not yet scheduled, exclusive access to fd_table
1734    {
1735        let fd_table = unsafe { &mut *task.process.fd_table.get() };
1736        crate::vfs::console_scheme::setup_stdio(fd_table);
1737    }
1738
1739    if let Some(h) = bootstrap_handle {
1740        // Program entry will see this in its first argument register (RDI).
1741        task.trampoline_arg0
1742            .store(h, core::sync::atomic::Ordering::Release);
1743    }
1744
1745    task.seed_interrupt_frame(crate::syscall::SyscallFrame {
1746        r15: 0,
1747        r14: 0,
1748        r13: 0,
1749        r12: 0,
1750        rbp: 0,
1751        rbx: 0,
1752        r11: 0x202,
1753        r10: 0,
1754        r9: 0,
1755        r8: 0,
1756        rsi: 0,
1757        rdi: task
1758            .trampoline_arg0
1759            .load(core::sync::atomic::Ordering::Acquire),
1760        rdx: 0,
1761        rcx: runtime_entry,
1762        rax: 0,
1763        iret_rip: runtime_entry,
1764        iret_cs: crate::arch::x86_64::gdt::user_code_selector().0 as u64,
1765        iret_rflags: 0x202,
1766        iret_rsp: boot_sp,
1767        iret_ss: crate::arch::x86_64::gdt::user_data_selector().0 as u64,
1768    });
1769
1770    // Bootstrapping: grant Silo Admin capability to the initial userspace task.
1771    if name == "init"
1772        || name == "silo-admin"
1773        || name.starts_with("strate-admin:")
1774        || name.contains("/strate-admin-")
1775    {
1776        let _ = crate::silo::grant_silo_admin_to_task(&task);
1777    }
1778
1779    {
1780        let arc_data_ptr = alloc::sync::Arc::as_ptr(&task) as usize;
1781        let fpu_ptr = task.fpu_state.get() as usize;
1782        if let Some(cur) = crate::process::scheduler::current_task_clone() {
1783            let cur_data_ptr = alloc::sync::Arc::as_ptr(&cur) as usize;
1784            let cur_strong = alloc::sync::Arc::strong_count(&cur);
1785            log::info!(
1786                "[elf] Task '{}' prepared: entry={:#x}, stack_top={:#x} \
1787                 new_arc={:#x} new_fpu={:#x} cur_arc={:#x} cur_strong={}",
1788                name,
1789                runtime_entry,
1790                boot_sp,
1791                arc_data_ptr,
1792                fpu_ptr,
1793                cur_data_ptr,
1794                cur_strong,
1795            );
1796        } else {
1797            log::info!(
1798                "[elf] Task '{}' prepared: entry={:#x}, stack_top={:#x} \
1799                 new_arc={:#x} new_fpu={:#x} (no current task)",
1800                name,
1801                runtime_entry,
1802                boot_sp,
1803                arc_data_ptr,
1804                fpu_ptr,
1805            );
1806        }
1807    }
1808
1809    Ok(task)
1810}
1811
1812/// Load an ELF binary into the provided address space.
1813/// Returns the entry point address.
1814pub fn load_elf_image(
1815    elf_data: &[u8],
1816    user_as: &AddressSpace,
1817) -> Result<LoadedElfInfo, &'static str> {
1818    let header = parse_header(elf_data)?;
1819    let phdrs: Vec<Elf64Phdr> = program_headers(elf_data, &header).collect();
1820    let interp_path = parse_interp_path(elf_data, &phdrs)?;
1821    let (load_bias, entry) = compute_load_bias_and_entry(user_as, &header, &phdrs)?;
1822    let phdr_vaddr = find_relocated_phdr_vaddr(&header, &phdrs, load_bias)?;
1823
1824    for phdr in phdrs.iter() {
1825        if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1826            load_segment(user_as, elf_data, phdr, load_bias)?;
1827        }
1828    }
1829    if interp_path.is_none() {
1830        apply_dynamic_relocations(user_as, &phdrs, header.e_type, load_bias)?;
1831    }
1832
1833    let (tls_vaddr, tls_filesz, tls_memsz, tls_align) =
1834        if let Some(tls) = phdrs.iter().find(|ph| ph.p_type == PT_TLS) {
1835            let align = core::cmp::max(tls.p_align, 1).next_power_of_two();
1836            (
1837                tls.p_vaddr.saturating_add(load_bias),
1838                tls.p_filesz,
1839                tls.p_memsz,
1840                align,
1841            )
1842        } else {
1843            (0, 0, 0, 1)
1844        };
1845
1846    let mut runtime_entry = entry;
1847    let mut interp_base = None;
1848    if let Some(path) = interp_path {
1849        let interp_data = read_elf_from_vfs(path)?;
1850        let interp_header = parse_header(&interp_data)?;
1851        let interp_phdrs: Vec<Elf64Phdr> = program_headers(&interp_data, &interp_header).collect();
1852        if parse_interp_path(&interp_data, &interp_phdrs)?.is_some() {
1853            return Err("Nested PT_INTERP is not supported");
1854        }
1855        let (interp_bias, interp_entry) =
1856            compute_load_bias_and_entry(user_as, &interp_header, &interp_phdrs)?;
1857        let (interp_min_vaddr, _) = compute_load_bounds(&interp_phdrs)?;
1858        for phdr in interp_phdrs.iter() {
1859            if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1860                load_segment(user_as, &interp_data, phdr, interp_bias)?;
1861            }
1862        }
1863        apply_dynamic_relocations(user_as, &interp_phdrs, interp_header.e_type, interp_bias)?;
1864        runtime_entry = interp_entry;
1865        interp_base = Some(interp_min_vaddr.saturating_add(interp_bias));
1866    }
1867
1868    Ok(LoadedElfInfo {
1869        runtime_entry,
1870        program_entry: entry,
1871        phdr_vaddr,
1872        phent: header.e_phentsize,
1873        phnum: header.e_phnum,
1874        interp_base,
1875        tls_vaddr,
1876        tls_filesz,
1877        tls_memsz,
1878        tls_align,
1879    })
1880}
1881
1882/// Reads user mapped bytes pub.
1883pub fn read_user_mapped_bytes_pub(
1884    user_as: &AddressSpace,
1885    vaddr: u64,
1886    out: &mut [u8],
1887) -> Result<(), &'static str> {
1888    read_user_mapped_bytes(user_as, vaddr, out)
1889}
1890
1891/// Writes user mapped bytes pub.
1892pub fn write_user_mapped_bytes_pub(
1893    user_as: &AddressSpace,
1894    vaddr: u64,
1895    src: &[u8],
1896) -> Result<(), &'static str> {
1897    write_user_mapped_bytes(user_as, vaddr, src)
1898}
1899
1900/// Writes user u64 pub.
1901pub fn write_user_u64_pub(
1902    user_as: &AddressSpace,
1903    vaddr: u64,
1904    value: u64,
1905) -> Result<(), &'static str> {
1906    write_user_u64(user_as, vaddr, value)
1907}