1use alloc::{sync::Arc, vec::Vec};
12use x86_64::{
13 structures::paging::{Mapper, Page, Size4KiB},
14 VirtAddr,
15};
16
17use crate::{
18 capability::Capability,
19 memory::address_space::{AddressSpace, VmaFlags, VmaPageSize, VmaType},
20 process::{
21 task::{CpuContext, KernelStack, ResumeKind, SyncUnsafeCell, Task},
22 TaskId, TaskPriority, TaskState,
23 },
24};
25
26const ELF_MAGIC: [u8; 4] = [0x7F, b'E', b'L', b'F'];
31const ELFCLASS64: u8 = 2;
32const ELFDATA2LSB: u8 = 1;
33const ET_EXEC: u16 = 2;
34const ET_DYN: u16 = 3;
35const EV_CURRENT: u32 = 1;
36const EM_X86_64: u16 = 62;
37const PT_LOAD: u32 = 1;
38const PT_DYNAMIC: u32 = 2;
39const PT_INTERP: u32 = 3;
40const PT_TLS: u32 = 7;
41const PF_X: u32 = 1;
42const PF_W: u32 = 2;
43const PF_R: u32 = 4;
44const DT_NULL: i64 = 0;
45const DT_RELA: i64 = 7;
46const DT_RELASZ: i64 = 8;
47const DT_RELAENT: i64 = 9;
48const DT_STRTAB: i64 = 5;
49const DT_SYMTAB: i64 = 6;
50const DT_SYMENT: i64 = 11;
51const DT_JMPREL: i64 = 23;
52const DT_PLTRELSZ: i64 = 2;
53const DT_PLTREL: i64 = 20;
54const DT_RELACOUNT: i64 = 0x6fff_fff9;
55const DT_RELR: i64 = 36;
56const DT_RELRSZ: i64 = 35;
57const DT_RELRENT: i64 = 37;
58const R_X86_64_RELATIVE: u32 = 8;
59const R_X86_64_64: u32 = 1;
60const R_X86_64_COPY: u32 = 5;
61const R_X86_64_GLOB_DAT: u32 = 6;
62const R_X86_64_JUMP_SLOT: u32 = 7;
63const R_X86_64_TPOFF64: u32 = 18;
64const R_X86_64_IRELATIVE: u32 = 37;
65
66pub const USER_ADDR_MAX: u64 = 0x0000_8000_0000_0000;
68const PIE_BASE_ADDR: u64 = 0x0000_0001_0000_0000;
70
71pub const USER_STACK_BASE: u64 = 0x0000_7FFF_F000_0000;
73pub const USER_STACK_PAGES: usize = 16;
75pub const USER_STACK_TOP: u64 = USER_STACK_BASE + (USER_STACK_PAGES as u64) * 4096;
77
78#[derive(Debug, Clone, Copy)]
80pub struct LoadedElfInfo {
81 pub runtime_entry: u64,
82 pub program_entry: u64,
83 pub phdr_vaddr: u64,
84 pub phent: u16,
85 pub phnum: u16,
86 pub interp_base: Option<u64>,
87 pub tls_vaddr: u64,
88 pub tls_filesz: u64,
89 pub tls_memsz: u64,
90 pub tls_align: u64,
91}
92
93#[repr(C, packed)]
99#[derive(Debug, Clone, Copy)]
100struct Elf64Header {
101 e_ident: [u8; 16],
102 e_type: u16,
103 e_machine: u16,
104 e_version: u32,
105 e_entry: u64,
106 e_phoff: u64,
107 e_shoff: u64,
108 e_flags: u32,
109 e_ehsize: u16,
110 e_phentsize: u16,
111 e_phnum: u16,
112 e_shentsize: u16,
113 e_shnum: u16,
114 e_shstrndx: u16,
115}
116
117#[repr(C, packed)]
119#[derive(Debug, Clone, Copy)]
120struct Elf64Phdr {
121 p_type: u32,
122 p_flags: u32,
123 p_offset: u64,
124 p_vaddr: u64,
125 p_paddr: u64,
126 p_filesz: u64,
127 p_memsz: u64,
128 p_align: u64,
129}
130
131#[repr(C, packed)]
132#[derive(Debug, Clone, Copy)]
133struct Elf64Dyn {
134 d_tag: i64,
135 d_val: u64,
136}
137
138#[repr(C, packed)]
139#[derive(Debug, Clone, Copy)]
140struct Elf64Rela {
141 r_offset: u64,
142 r_info: u64,
143 r_addend: i64,
144}
145
146#[repr(C, packed)]
147#[derive(Debug, Clone, Copy)]
148struct Elf64Sym {
149 st_name: u32,
150 st_info: u8,
151 st_other: u8,
152 st_shndx: u16,
153 st_value: u64,
154 st_size: u64,
155}
156
157fn parse_header(data: &[u8]) -> Result<Elf64Header, &'static str> {
163 if data.len() < core::mem::size_of::<Elf64Header>() {
164 return Err("ELF data too small for header");
165 }
166
167 let header: Elf64Header =
170 unsafe { core::ptr::read_unaligned(data.as_ptr() as *const Elf64Header) };
171
172 if header.e_ident[0..4] != ELF_MAGIC {
174 return Err("Bad ELF magic");
175 }
176
177 if header.e_ident[4] != ELFCLASS64 {
179 return Err("Not ELF64");
180 }
181
182 if header.e_ident[5] != ELFDATA2LSB {
184 return Err("Not little-endian ELF");
185 }
186
187 if header.e_machine != EM_X86_64 {
189 return Err("Not x86_64 ELF");
190 }
191
192 if header.e_type != ET_EXEC && header.e_type != ET_DYN {
194 return Err("Unsupported ELF type (expected ET_EXEC or ET_DYN)");
195 }
196
197 if header.e_version != EV_CURRENT {
199 return Err("Unsupported ELF version");
200 }
201
202 if header.e_entry >= USER_ADDR_MAX {
205 return Err("Entry point outside user address range");
206 }
207 if header.e_phentsize as usize != core::mem::size_of::<Elf64Phdr>() {
212 return Err("Unexpected phentsize");
213 }
214
215 let ph_end = (header.e_phoff as usize)
216 .checked_add((header.e_phnum as usize) * (header.e_phentsize as usize))
217 .ok_or("Program header table overflows")?;
218 if ph_end > data.len() {
219 return Err("Program headers extend past file");
220 }
221
222 Ok(header)
223}
224
225fn program_headers<'a>(
227 data: &'a [u8],
228 header: &Elf64Header,
229) -> impl Iterator<Item = Elf64Phdr> + 'a {
230 let phoff = header.e_phoff as usize;
231 let phsize = header.e_phentsize as usize;
232 let phnum = header.e_phnum as usize;
233
234 (0..phnum).map(move |i| {
235 let offset = phoff + i * phsize;
236 unsafe { core::ptr::read_unaligned(data.as_ptr().add(offset) as *const Elf64Phdr) }
239 })
240}
241
242fn parse_interp_path<'a>(
244 elf_data: &'a [u8],
245 phdrs: &[Elf64Phdr],
246) -> Result<Option<&'a str>, &'static str> {
247 let Some(interp) = phdrs.iter().find(|ph| ph.p_type == PT_INTERP) else {
248 return Ok(None);
249 };
250 if interp.p_filesz == 0 {
251 return Err("PT_INTERP has empty path");
252 }
253 let start = interp.p_offset as usize;
254 let end = start
255 .checked_add(interp.p_filesz as usize)
256 .ok_or("PT_INTERP range overflow")?;
257 if end > elf_data.len() {
258 return Err("PT_INTERP extends past file");
259 }
260 let raw = &elf_data[start..end];
261 let nul = raw
262 .iter()
263 .position(|&b| b == 0)
264 .ok_or("PT_INTERP path is not NUL terminated")?;
265 let s = core::str::from_utf8(&raw[..nul]).map_err(|_| "PT_INTERP path is not UTF-8")?;
266 if s.is_empty() {
267 return Err("PT_INTERP path is empty");
268 }
269 Ok(Some(s))
270}
271
272fn find_relocated_phdr_vaddr(
274 header: &Elf64Header,
275 phdrs: &[Elf64Phdr],
276 load_bias: u64,
277) -> Result<u64, &'static str> {
278 let phoff = header.e_phoff;
279 for ph in phdrs {
280 if ph.p_type != PT_LOAD || ph.p_filesz == 0 {
281 continue;
282 }
283 let file_start = ph.p_offset;
284 let file_end = ph
285 .p_offset
286 .checked_add(ph.p_filesz)
287 .ok_or("PHDR location overflow")?;
288 if phoff >= file_start && phoff < file_end {
289 let delta = phoff - file_start;
290 let vaddr = ph
291 .p_vaddr
292 .checked_add(delta)
293 .and_then(|v| v.checked_add(load_bias))
294 .ok_or("Relocated PHDR address overflow")?;
295 if vaddr >= USER_ADDR_MAX {
296 return Err("Relocated PHDR outside user address space");
297 }
298 return Ok(vaddr);
299 }
300 }
301 Err("Program headers are not covered by a PT_LOAD segment")
302}
303
304fn read_elf_from_vfs(path: &str) -> Result<Vec<u8>, &'static str> {
306 const MAX_ELF_SIZE: usize = 64 * 1024 * 1024;
307 let fd =
308 crate::vfs::open(path, crate::vfs::OpenFlags::READ).map_err(|_| "PT_INTERP open failed")?;
309 let mut out = Vec::new();
310 let mut buf = [0u8; 4096];
311 loop {
312 let n = match crate::vfs::read(fd, &mut buf) {
313 Ok(n) => n,
314 Err(_) => {
315 let _ = crate::vfs::close(fd);
316 return Err("PT_INTERP read failed");
317 }
318 };
319 if n == 0 {
320 break;
321 }
322 if out.len().saturating_add(n) > MAX_ELF_SIZE {
323 let _ = crate::vfs::close(fd);
324 return Err("PT_INTERP file too large");
325 }
326 out.extend_from_slice(&buf[..n]);
327 }
328 let _ = crate::vfs::close(fd);
329 if out.is_empty() {
330 return Err("PT_INTERP file is empty");
331 }
332 Ok(out)
333}
334
335fn compute_load_bounds(phdrs: &[Elf64Phdr]) -> Result<(u64, u64), &'static str> {
337 let mut min_vaddr = u64::MAX;
338 let mut max_vaddr = 0u64;
339 let mut saw_load = false;
340
341 for phdr in phdrs {
342 if phdr.p_type != PT_LOAD {
343 continue;
344 }
345 if phdr.p_memsz == 0 {
346 continue;
347 }
348 saw_load = true;
349
350 if phdr.p_memsz < phdr.p_filesz {
351 return Err("PT_LOAD memsz < filesz");
352 }
353
354 if ((phdr.p_vaddr ^ phdr.p_offset) & 0xFFF) != 0 {
356 return Err("PT_LOAD alignment mismatch (vaddr/offset)");
357 }
358
359 let seg_end = phdr
360 .p_vaddr
361 .checked_add(phdr.p_memsz)
362 .ok_or("PT_LOAD vaddr+memsz overflow")?;
363 if seg_end > USER_ADDR_MAX {
364 return Err("PT_LOAD exceeds user address space");
365 }
366
367 let seg_start_page = phdr.p_vaddr & !0xFFF;
368 let seg_end_page = (seg_end + 0xFFF) & !0xFFF;
369 min_vaddr = min_vaddr.min(seg_start_page);
370 max_vaddr = max_vaddr.max(seg_end_page);
371 }
372
373 if !saw_load {
374 return Err("ELF has no PT_LOAD segments");
375 }
376 Ok((min_vaddr, max_vaddr))
377}
378
379fn compute_load_bias_and_entry(
381 user_as: &AddressSpace,
382 header: &Elf64Header,
383 phdrs: &[Elf64Phdr],
384) -> Result<(u64, u64), &'static str> {
385 let (min_vaddr, max_vaddr) = compute_load_bounds(phdrs)?;
386 let span = max_vaddr
387 .checked_sub(min_vaddr)
388 .ok_or("Invalid PT_LOAD bounds")?;
389
390 let load_bias = if header.e_type == ET_EXEC {
391 0
392 } else {
393 let n_pages = (span as usize).div_ceil(4096);
394 let load_base = user_as
395 .find_free_vma_range(PIE_BASE_ADDR, n_pages, VmaPageSize::Small)
396 .or_else(|| {
397 user_as.find_free_vma_range(0x0000_0000_1000_0000, n_pages, VmaPageSize::Small)
398 })
399 .ok_or("No virtual range for ET_DYN image")?;
400 load_base
401 .checked_sub(min_vaddr)
402 .ok_or("ET_DYN load bias underflow")?
403 };
404
405 let relocated_end = max_vaddr
406 .checked_add(load_bias)
407 .ok_or("Relocated PT_LOAD range overflow")?;
408 if relocated_end > USER_ADDR_MAX {
409 return Err("Relocated PT_LOAD range exceeds user space");
410 }
411
412 let entry_raw = if header.e_type == ET_EXEC && header.e_entry == 0 {
413 let fallback = phdrs
414 .iter()
415 .find(|ph| ph.p_type == PT_LOAD && ph.p_memsz != 0 && (ph.p_flags & PF_X) != 0)
416 .map(|ph| ph.p_vaddr)
417 .ok_or("ET_EXEC has null entry and no executable PT_LOAD")?;
418 log::warn!(
419 "[elf] ET_EXEC has null entry, using fallback executable segment vaddr={:#x}",
420 fallback
421 );
422 fallback
423 } else {
424 header.e_entry
425 };
426
427 let relocated_entry = entry_raw
428 .checked_add(load_bias)
429 .ok_or("Relocated entry overflow")?;
430 if relocated_entry == 0 || relocated_entry >= USER_ADDR_MAX {
431 return Err("Relocated entry outside user space");
432 }
433
434 Ok((load_bias, relocated_entry))
435}
436
437fn apply_segment_permissions(
439 user_as: &AddressSpace,
440 page_start: u64,
441 page_count: usize,
442 flags: VmaFlags,
443) -> Result<(), &'static str> {
444 use x86_64::registers::control::Cr3;
445
446 let pte_flags = flags.to_page_flags();
447 let mut mapper = unsafe { user_as.mapper() };
449 for i in 0..page_count {
450 let vaddr = page_start
451 .checked_add((i as u64) * 4096)
452 .ok_or("Permission update address overflow")?;
453 let page = Page::<Size4KiB>::from_start_address(VirtAddr::new(vaddr))
454 .map_err(|_| "Invalid page while updating segment flags")?;
455 let _ = unsafe {
457 mapper
458 .update_flags(page, pte_flags)
459 .map_err(|_| "Failed to update segment page flags")?
460 };
461 }
463
464 let (current_cr3, _) = Cr3::read();
470 if current_cr3.start_address() == user_as.cr3() {
471 let end = page_start + (page_count as u64) * 4096;
472 crate::arch::x86_64::tlb::local_range(VirtAddr::new(page_start), VirtAddr::new(end));
473 }
474
475 Ok(())
476}
477
478fn read_user_mapped_bytes(
480 user_as: &AddressSpace,
481 mut vaddr: u64,
482 out: &mut [u8],
483) -> Result<(), &'static str> {
484 let end = vaddr
485 .checked_add(out.len() as u64)
486 .ok_or("Read range overflow")?;
487 if end > USER_ADDR_MAX {
488 return Err("Read range outside user space");
489 }
490 let mut copied = 0usize;
491 while copied < out.len() {
492 let page_off = (vaddr & 0xFFF) as usize;
493 let chunk = core::cmp::min(out.len() - copied, 4096 - page_off);
494 let phys = user_as
495 .translate(VirtAddr::new(vaddr))
496 .ok_or("Failed to translate mapped user bytes")?;
497 let src = crate::memory::phys_to_virt(phys.as_u64()) as *const u8;
498 unsafe { core::ptr::copy_nonoverlapping(src, out.as_mut_ptr().add(copied), chunk) };
500 copied += chunk;
501 vaddr = vaddr
502 .checked_add(chunk as u64)
503 .ok_or("Virtual address overflow while reading mapped bytes")?;
504 }
505 Ok(())
506}
507
508fn write_user_mapped_bytes(
510 user_as: &AddressSpace,
511 mut vaddr: u64,
512 src: &[u8],
513) -> Result<(), &'static str> {
514 let end = vaddr
515 .checked_add(src.len() as u64)
516 .ok_or("Write range overflow")?;
517 if end > USER_ADDR_MAX {
518 return Err("Write range outside user space");
519 }
520 let mut written = 0usize;
521 while written < src.len() {
522 let page_off = (vaddr & 0xFFF) as usize;
523 let chunk = core::cmp::min(src.len() - written, 4096 - page_off);
524 let phys = user_as
525 .translate(VirtAddr::new(vaddr))
526 .ok_or("Failed to translate relocation target")?;
527 let dst = crate::memory::phys_to_virt(phys.as_u64()) as *mut u8;
528 unsafe { core::ptr::copy_nonoverlapping(src.as_ptr().add(written), dst, chunk) };
530 written += chunk;
531 vaddr = vaddr
532 .checked_add(chunk as u64)
533 .ok_or("Virtual address overflow while writing mapped bytes")?;
534 }
535 Ok(())
536}
537
538fn read_user_u64(user_as: &AddressSpace, vaddr: u64) -> Result<u64, &'static str> {
540 let mut raw = [0u8; 8];
541 read_user_mapped_bytes(user_as, vaddr, &mut raw)?;
542 Ok(u64::from_le_bytes(raw))
543}
544
545fn write_user_u64(user_as: &AddressSpace, vaddr: u64, value: u64) -> Result<(), &'static str> {
547 write_user_mapped_bytes(user_as, vaddr, &value.to_le_bytes())
548}
549
550fn apply_relr_relocations(
552 user_as: &AddressSpace,
553 load_bias: u64,
554 relr_base: u64,
555 relr_size: usize,
556 relr_ent: usize,
557) -> Result<usize, &'static str> {
558 if relr_size == 0 {
559 return Ok(0);
560 }
561 if relr_ent != core::mem::size_of::<u64>() {
562 return Err("Unsupported DT_RELRENT size");
563 }
564 if relr_size % relr_ent != 0 {
565 return Err("DT_RELR table size is not aligned");
566 }
567
568 let count = relr_size / relr_ent;
569 let mut applied = 0usize;
570 let mut where_addr = 0u64;
571
572 for i in 0..count {
573 let entry_addr = relr_base
574 .checked_add((i * relr_ent) as u64)
575 .ok_or("DT_RELR walk overflow")?;
576 let entry = read_user_u64(user_as, entry_addr)?;
577
578 if (entry & 1) == 0 {
579 where_addr = load_bias
580 .checked_add(entry)
581 .ok_or("DT_RELR absolute relocation overflow")?;
582 if where_addr >= USER_ADDR_MAX {
583 return Err("DT_RELR target outside user space");
584 }
585 let cur = read_user_u64(user_as, where_addr)?;
586 write_user_u64(
587 user_as,
588 where_addr,
589 cur.checked_add(load_bias)
590 .ok_or("DT_RELR relocated value overflow")?,
591 )?;
592 where_addr = where_addr
593 .checked_add(8)
594 .ok_or("DT_RELR where pointer overflow")?;
595 applied += 1;
596 } else {
597 let mut bitmap = entry >> 1;
598 for bit in 0..63u64 {
599 if (bitmap & 1) != 0 {
600 let slot = where_addr
601 .checked_add(bit * 8)
602 .ok_or("DT_RELR bitmap target overflow")?;
603 if slot >= USER_ADDR_MAX {
604 return Err("DT_RELR bitmap target outside user space");
605 }
606 let cur = read_user_u64(user_as, slot)?;
607 write_user_u64(
608 user_as,
609 slot,
610 cur.checked_add(load_bias)
611 .ok_or("DT_RELR bitmap relocated value overflow")?,
612 )?;
613 applied += 1;
614 }
615 bitmap >>= 1;
616 if bitmap == 0 {
617 break;
618 }
619 }
620 where_addr = where_addr
621 .checked_add(63 * 8)
622 .ok_or("DT_RELR where advance overflow")?;
623 }
624 }
625 Ok(applied)
626}
627
628fn apply_dynamic_relocations(
630 user_as: &AddressSpace,
631 phdrs: &[Elf64Phdr],
632 elf_type: u16,
633 load_bias: u64,
634) -> Result<(), &'static str> {
635 if elf_type != ET_DYN {
636 return Ok(());
637 }
638
639 let dynamic = phdrs.iter().find(|ph| ph.p_type == PT_DYNAMIC);
640 let Some(dynamic_ph) = dynamic else {
641 return Ok(());
642 };
643 if dynamic_ph.p_filesz == 0 {
644 return Ok(());
645 }
646
647 let dyn_addr = dynamic_ph
648 .p_vaddr
649 .checked_add(load_bias)
650 .ok_or("PT_DYNAMIC relocated address overflow")?;
651 let dyn_count = (dynamic_ph.p_filesz as usize) / core::mem::size_of::<Elf64Dyn>();
652
653 let mut rela_addr: Option<u64> = None;
654 let mut rela_size: usize = 0;
655 let mut rela_ent: usize = core::mem::size_of::<Elf64Rela>();
656 let mut jmprel_addr: Option<u64> = None;
657 let mut jmprel_size: usize = 0;
658 let mut pltrel_kind: Option<u64> = None;
659 let mut symtab_addr: Option<u64> = None;
660 let mut sym_ent: usize = core::mem::size_of::<Elf64Sym>();
661 let _strtab_addr: Option<u64> = None;
662 let mut rela_count_hint: Option<usize> = None;
663 let mut relr_addr: Option<u64> = None;
664 let mut relr_size: usize = 0;
665 let mut relr_ent: usize = 0;
666
667 for i in 0..dyn_count {
668 let entry_addr = dyn_addr
669 .checked_add((i * core::mem::size_of::<Elf64Dyn>()) as u64)
670 .ok_or("PT_DYNAMIC walk overflow")?;
671 let mut raw = [0u8; core::mem::size_of::<Elf64Dyn>()];
672 read_user_mapped_bytes(user_as, entry_addr, &mut raw)?;
673 let dyn_entry = unsafe { core::ptr::read_unaligned(raw.as_ptr() as *const Elf64Dyn) };
675
676 match dyn_entry.d_tag {
677 DT_NULL => break,
678 DT_RELA => {
679 rela_addr = Some(
680 dyn_entry
681 .d_val
682 .checked_add(load_bias)
683 .ok_or("DT_RELA relocated address overflow")?,
684 )
685 }
686 DT_RELASZ => rela_size = dyn_entry.d_val as usize,
687 DT_RELAENT => rela_ent = dyn_entry.d_val as usize,
688 DT_RELACOUNT => rela_count_hint = Some(dyn_entry.d_val as usize),
689 DT_JMPREL => {
690 jmprel_addr = Some(
691 dyn_entry
692 .d_val
693 .checked_add(load_bias)
694 .ok_or("DT_JMPREL relocated address overflow")?,
695 )
696 }
697 DT_PLTRELSZ => jmprel_size = dyn_entry.d_val as usize,
698 DT_PLTREL => pltrel_kind = Some(dyn_entry.d_val),
699 DT_SYMTAB => {
700 symtab_addr = Some(
701 dyn_entry
702 .d_val
703 .checked_add(load_bias)
704 .ok_or("DT_SYMTAB relocated address overflow")?,
705 )
706 }
707 DT_SYMENT => sym_ent = dyn_entry.d_val as usize,
708 DT_STRTAB => {
709 let _ = dyn_entry
710 .d_val
711 .checked_add(load_bias)
712 .ok_or("DT_STRTAB relocated address overflow")?;
713 }
714 DT_RELR => {
715 relr_addr = Some(
716 dyn_entry
717 .d_val
718 .checked_add(load_bias)
719 .ok_or("DT_RELR relocated address overflow")?,
720 )
721 }
722 DT_RELRSZ => relr_size = dyn_entry.d_val as usize,
723 DT_RELRENT => relr_ent = dyn_entry.d_val as usize,
724 _ => {}
725 }
726 }
727
728 let mut relr_applied = 0usize;
729 if let Some(relr_base) = relr_addr {
730 relr_applied = apply_relr_relocations(user_as, load_bias, relr_base, relr_size, relr_ent)?;
731 } else if relr_size != 0 || relr_ent != 0 {
732 return Err("DT_RELR metadata present without DT_RELR base");
733 }
734 if rela_ent != core::mem::size_of::<Elf64Rela>() {
735 return Err("Unsupported DT_RELAENT size");
736 }
737 if sym_ent != core::mem::size_of::<Elf64Sym>() {
738 return Err("Unsupported DT_SYMENT size");
739 }
740 if pltrel_kind.is_some() && pltrel_kind != Some(DT_RELA as u64) {
741 return Err("Only DT_PLTREL=DT_RELA is supported");
742 }
743
744 let read_sym_entry = |sym_idx: u32| -> Result<Elf64Sym, &'static str> {
745 let symtab = symtab_addr.ok_or("Missing DT_SYMTAB for symbol relocations")?;
746 let sym_addr = symtab
747 .checked_add((sym_idx as u64) * (sym_ent as u64))
748 .ok_or("Symbol table address overflow")?;
749 let mut raw = [0u8; core::mem::size_of::<Elf64Sym>()];
750 read_user_mapped_bytes(user_as, sym_addr, &mut raw)?;
751 Ok(unsafe { core::ptr::read_unaligned(raw.as_ptr() as *const Elf64Sym) })
752 };
753
754 let resolve_symbol = |sym_idx: u32| -> Result<u64, &'static str> {
755 if sym_idx == 0 {
756 return Ok(0);
757 }
758 let sym = read_sym_entry(sym_idx)?;
759 if sym.st_shndx == 0 {
760 return Err("Undefined symbol relocation not supported");
761 }
762 sym.st_value
763 .checked_add(load_bias)
764 .ok_or("Symbol value relocation overflow")
765 };
766
767 let resolve_symbol_raw = |sym_idx: u32| -> Result<u64, &'static str> {
768 if sym_idx == 0 {
769 return Ok(0);
770 }
771 let sym = read_sym_entry(sym_idx)?;
772 Ok(sym.st_value)
773 };
774
775 let resolve_symbol_size = |sym_idx: u32| -> Result<u64, &'static str> {
776 if sym_idx == 0 {
777 return Ok(0);
778 }
779 let sym = read_sym_entry(sym_idx)?;
780 Ok(sym.st_size)
781 };
782
783 let apply_rela_table = |table_base: u64,
784 table_size: usize,
785 count_hint: Option<usize>|
786 -> Result<usize, &'static str> {
787 if table_size == 0 {
788 return Ok(0);
789 }
790 let mut count = table_size / rela_ent;
791 if let Some(hint) = count_hint {
792 count = core::cmp::min(count, hint);
793 }
794 let mut applied = 0usize;
795 for i in 0..count {
796 let rela_addr_i = table_base
797 .checked_add((i * rela_ent) as u64)
798 .ok_or("Rela table overflow")?;
799 let mut raw = [0u8; core::mem::size_of::<Elf64Rela>()];
800 read_user_mapped_bytes(user_as, rela_addr_i, &mut raw)?;
801 let rela = unsafe { core::ptr::read_unaligned(raw.as_ptr() as *const Elf64Rela) };
803
804 let r_type = (rela.r_info & 0xffff_ffff) as u32;
805 let r_sym = (rela.r_info >> 32) as u32;
806 let target = rela
807 .r_offset
808 .checked_add(load_bias)
809 .ok_or("Relocation target overflow")?;
810 if target >= USER_ADDR_MAX {
811 return Err("Relocation target outside user space");
812 }
813
814 let value = match r_type {
815 R_X86_64_RELATIVE => {
816 if r_sym != 0 {
817 return Err("R_X86_64_RELATIVE with non-zero symbol");
818 }
819 (load_bias as i128)
820 .checked_add(rela.r_addend as i128)
821 .ok_or("Relocation value overflow")?
822 }
823 R_X86_64_GLOB_DAT | R_X86_64_JUMP_SLOT | R_X86_64_64 => {
824 let sym_val = resolve_symbol(r_sym)? as i128;
825 sym_val
826 .checked_add(rela.r_addend as i128)
827 .ok_or("Relocation value overflow")?
828 }
829 R_X86_64_COPY => {
830 let sym_val = resolve_symbol(r_sym)?;
831 if sym_val == 0 {
832 continue;
833 }
834 let sym_sz = resolve_symbol_size(r_sym)?;
835 if sym_sz > 0 && sym_val < USER_ADDR_MAX {
836 let mut tmp = [0u8; 256];
837 let mut off = 0usize;
838 while off < sym_sz as usize {
839 let chunk = core::cmp::min(256, sym_sz as usize - off);
840 read_user_mapped_bytes(
841 user_as,
842 sym_val + off as u64,
843 &mut tmp[..chunk],
844 )?;
845 write_user_mapped_bytes(user_as, target + off as u64, &tmp[..chunk])?;
846 off += chunk;
847 }
848 }
849 applied += 1;
850 continue;
851 }
852 R_X86_64_TPOFF64 => {
853 let sym_val = if r_sym != 0 {
854 resolve_symbol_raw(r_sym)? as i128
855 } else {
856 0i128
857 };
858 sym_val
859 .checked_add(rela.r_addend as i128)
860 .ok_or("TPOFF64 value overflow")?
861 }
862 R_X86_64_IRELATIVE => (load_bias as i128)
863 .checked_add(rela.r_addend as i128)
864 .ok_or("IRELATIVE value overflow")?,
865 _ => {
866 log::warn!("[elf] Unsupported relocation type {}", r_type);
867 continue;
868 }
869 };
870 if value < 0 || value > u64::MAX as i128 {
871 return Err("Relocation value out of range");
872 }
873 let val_u64 = value as u64;
874 if applied < 5 {
876 let r_addend_copy = rela.r_addend; let mut before = [0u8; 8];
878 let _ = read_user_mapped_bytes(user_as, target, &mut before);
879 let before_val = u64::from_le_bytes(before);
880 crate::e9_println!(
881 "[reloc] [{i}] r_type={} target={:#x} r_addend={:#x} value={:#x} before={:#x}",
882 r_type,
883 target,
884 r_addend_copy,
885 val_u64,
886 before_val
887 );
888 }
889 write_user_mapped_bytes(user_as, target, &val_u64.to_le_bytes())?;
890 if applied < 5 {
892 let mut after = [0u8; 8];
893 let _ = read_user_mapped_bytes(user_as, target, &mut after);
894 let after_val = u64::from_le_bytes(after);
895 crate::e9_println!(
896 "[reloc] [{i}] after_write={:#x} (expected={:#x})",
897 after_val,
898 val_u64
899 );
900 }
901 if val_u64 >= 0xffff_8000_0000_0000 {
903 let r_addend_copy = rela.r_addend;
904 crate::e9_println!(
905 "[reloc-KERNEL-ADDR] [{i}] r_type={} target={:#x} r_addend={:#x} val={:#x} bias={:#x}",
906 r_type, target, r_addend_copy, val_u64, load_bias
907 );
908 }
909 applied += 1;
910 }
911 Ok(applied)
912 };
913
914 let mut total_applied = 0usize;
915 crate::e9_println!(
916 "[reloc] apply_dynamic_relocations: bias={:#x} rela_addr={:?} rela_size={} rela_count={:?}",
917 load_bias,
918 rela_addr,
919 rela_size,
920 rela_count_hint
921 );
922 if let Some(rela_base) = rela_addr {
923 total_applied += apply_rela_table(rela_base, rela_size, rela_count_hint)?;
924 }
925 if let Some(jmprel_base) = jmprel_addr {
926 total_applied += apply_rela_table(jmprel_base, jmprel_size, None)?;
927 }
928
929 if total_applied > 0 {
930 crate::e9_println!(
931 "[reloc] applied {} RELA relocations (bias={:#x})",
932 total_applied,
933 load_bias
934 );
935 }
936 if relr_applied > 0 {
937 log::debug!("[elf] Applied {} RELR relocations", relr_applied);
938 }
939 Ok(())
940}
941
942fn elf_flags_to_vma(p_flags: u32) -> VmaFlags {
948 VmaFlags {
949 readable: p_flags & PF_R != 0,
950 writable: p_flags & PF_W != 0,
951 executable: p_flags & PF_X != 0,
952 user_accessible: true,
953 }
954}
955
956fn load_segment(
962 user_as: &AddressSpace,
963 elf_data: &[u8],
964 phdr: &Elf64Phdr,
965 load_bias: u64,
966) -> Result<(), &'static str> {
967 let vaddr = phdr
968 .p_vaddr
969 .checked_add(load_bias)
970 .ok_or("PT_LOAD relocated vaddr overflow")?;
971 let memsz = phdr.p_memsz;
972 let filesz = phdr.p_filesz;
973 let offset = phdr.p_offset;
974
975 if vaddr >= USER_ADDR_MAX {
977 return Err("PT_LOAD vaddr outside user space");
978 }
979 let end = vaddr
980 .checked_add(memsz)
981 .ok_or("PT_LOAD vaddr+memsz overflows")?;
982 if end > USER_ADDR_MAX {
983 return Err("PT_LOAD segment extends past user space");
984 }
985
986 let file_end = (offset as usize)
988 .checked_add(filesz as usize)
989 .ok_or("PT_LOAD offset+filesz overflows")?;
990 if file_end > elf_data.len() {
991 return Err("PT_LOAD file data extends past ELF");
992 }
993
994 let page_start = vaddr & !0xFFF;
996 let page_end = (end + 0xFFF) & !0xFFF;
997 let page_count = ((page_end - page_start) / 4096) as usize;
998
999 let actual_flags = elf_flags_to_vma(phdr.p_flags);
1001 let load_flags = VmaFlags {
1002 readable: true,
1003 writable: true, executable: actual_flags.executable,
1005 user_accessible: true,
1006 };
1007
1008 let vma_type = if actual_flags.executable {
1009 VmaType::Code
1010 } else {
1011 VmaType::Anonymous
1012 };
1013 log::debug!(
1014 "[elf] map PT_LOAD: start={:#x} pages={} filesz={:#x}",
1015 page_start,
1016 page_count,
1017 filesz
1018 );
1019 user_as.map_region(
1020 page_start,
1021 page_count,
1022 load_flags,
1023 vma_type,
1024 VmaPageSize::Small,
1025 )?;
1026
1027 if filesz > 0 {
1031 let src = &elf_data[offset as usize..file_end];
1032 let mut copied = 0usize;
1033
1034 while copied < src.len() {
1035 let dst_vaddr = vaddr + copied as u64;
1036 let page_offset = (dst_vaddr & 0xFFF) as usize;
1037 let chunk = core::cmp::min(src.len() - copied, 4096 - page_offset);
1038
1039 let phys = user_as
1041 .translate(VirtAddr::new(dst_vaddr))
1042 .ok_or("Failed to translate user page after mapping")?;
1043 let hhdm_ptr = crate::memory::phys_to_virt(phys.as_u64()) as *mut u8;
1044
1045 unsafe {
1048 core::ptr::copy_nonoverlapping(src.as_ptr().add(copied), hhdm_ptr, chunk);
1049 }
1050
1051 copied += chunk;
1052 }
1053 }
1054
1055 apply_segment_permissions(user_as, page_start, page_count, actual_flags)?;
1057
1058 log::debug!(
1059 " PT_LOAD: {:#x}..{:#x} ({} pages, file {:#x}+{:#x}, flags {:?})",
1060 page_start,
1061 page_end,
1062 page_count,
1063 offset,
1064 filesz,
1065 actual_flags,
1066 );
1067
1068 Ok(())
1069}
1070
1071extern "C" fn elf_ring3_trampoline() -> ! {
1083 use crate::arch::x86_64::gdt;
1084 use core::sync::atomic::Ordering;
1085
1086 crate::e9_println!("[trace][elf] ring3_trampoline before current_task");
1087 let task = crate::process::scheduler::current_task_clone_spin_debug("ring3_trampoline")
1088 .expect("elf_ring3_trampoline: no current task");
1089 crate::e9_println!(
1090 "[trace][elf] ring3_trampoline enter tid={} name={}",
1091 task.id.as_u64(),
1092 task.name
1093 );
1094 crate::serial_println!(
1095 "[trace][elf] ring3_trampoline enter tid={} name={}",
1096 task.id.as_u64(),
1097 task.name
1098 );
1099 task.set_resume_kind(crate::process::task::ResumeKind::IretFrame);
1100
1101 let user_rip = task.trampoline_entry.load(Ordering::Acquire);
1102 let user_rsp = task.trampoline_stack_top.load(Ordering::Acquire);
1103 let user_arg0 = task.trampoline_arg0.load(Ordering::Acquire);
1104 crate::e9_println!(
1105 "[trace][elf] ring3_trampoline args tid={} rip={:#x} rsp={:#x} arg0={:#x}",
1106 task.id.as_u64(),
1107 user_rip,
1108 user_rsp,
1109 user_arg0
1110 );
1111 crate::serial_println!(
1112 "[trace][elf] ring3_trampoline args tid={} rip={:#x} rsp={:#x}",
1113 task.id.as_u64(),
1114 user_rip,
1115 user_rsp
1116 );
1117
1118 {
1123 unsafe {
1125 let as_ref = task.process.address_space_arc();
1126 let task_name: &str = &task.name;
1127 for test_off in [0x12920u64, 0x12928u64, 0x12930u64] {
1128 let vaddr = 0x100000000u64.wrapping_add(test_off);
1129 if let Some(phys) = as_ref.translate(VirtAddr::new(vaddr)) {
1130 let ptr = crate::memory::phys_to_virt(phys.as_u64()) as *const u64;
1131 let val = core::ptr::read_unaligned(ptr);
1132 crate::e9_println!(
1133 "[trampoline-got] tid={} name={} GOT[{:#x}]=phys={:#x} val={:#x}",
1134 task.id.as_u64(),
1135 task_name,
1136 vaddr,
1137 phys.as_u64(),
1138 val
1139 );
1140 } else {
1141 crate::e9_println!(
1142 "[trampoline-got] tid={} name={} GOT[{:#x}]=<not mapped>",
1143 task.id.as_u64(),
1144 task_name,
1145 vaddr
1146 );
1147 }
1148 }
1149 }
1150 }
1151
1152 unsafe {
1155 let as_ref = task.process.address_space_arc();
1156 as_ref.switch_to();
1157 }
1158 crate::e9_println!(
1159 "[trace][elf] ring3_trampoline switch_to done tid={}",
1160 task.id.as_u64()
1161 );
1162 crate::serial_println!(
1163 "[trace][elf] ring3_trampoline switch_to done tid={}",
1164 task.id.as_u64()
1165 );
1166
1167 let user_cs = gdt::user_code_selector().0 as u64;
1168 let user_ss = gdt::user_data_selector().0 as u64;
1169 let user_rflags: u64 = 0x202; crate::e9_println!(
1171 "[trace][elf] ring3_trampoline iret tid={} cs={:#x} ss={:#x} rflags={:#x}",
1172 task.id.as_u64(),
1173 user_cs,
1174 user_ss,
1175 user_rflags
1176 );
1177 crate::serial_println!(
1178 "[trace][elf] ring3_trampoline iret tid={} rip={:#x} rsp={:#x}",
1179 task.id.as_u64(),
1180 user_rip,
1181 user_rsp
1182 );
1183
1184 unsafe {
1188 let lvt = crate::arch::x86_64::apic::read_reg(crate::arch::x86_64::apic::REG_LVT_TIMER);
1189 let init_cnt =
1190 crate::arch::x86_64::apic::read_reg(crate::arch::x86_64::apic::REG_TIMER_INIT);
1191 let cur_cnt =
1192 crate::arch::x86_64::apic::read_reg(crate::arch::x86_64::apic::REG_TIMER_CURRENT);
1193 let rflags_now: u64;
1194 core::arch::asm!("pushfq; pop {}", out(reg) rflags_now, options(nostack));
1195 crate::e9_println!(
1196 "[trace][elf] pre-iret LAPIC: LVT={:#x} init={} cur={} IF={}",
1197 lvt,
1198 init_cnt,
1199 cur_cnt,
1200 (rflags_now >> 9) & 1
1201 );
1202 if lvt & (1 << 16) != 0 {
1203 crate::e9_println!(
1204 "[trace][elf] WARNING: LAPIC timer is MASKED (bit 16 set) : no ticks will fire!"
1205 );
1206 }
1207 if init_cnt == 0 {
1208 crate::e9_println!(
1209 "[trace][elf] WARNING: LAPIC timer init_count=0 : timer not started!"
1210 );
1211 }
1212 }
1213
1214 crate::arch::x86_64::ring3_diag::validate_ring3_state(
1215 user_rip,
1216 user_rsp,
1217 user_cs as u16,
1218 user_ss as u16,
1219 );
1220
1221 crate::e9_println!(
1222 "[elf] PRE-IRETQ tid={} rip={:#x} rsp={:#x} rflags={:#x}",
1223 task.id.as_u64(),
1224 user_rip,
1225 user_rsp,
1226 user_rflags
1227 );
1228
1229 crate::e9_println!(
1233 "E9[0] pre-asm rip={:#x} rsp={:#x} cs={:#x} ss={:#x}",
1234 user_rip,
1235 user_rsp,
1236 user_cs,
1237 user_ss,
1238 );
1239
1240 unsafe {
1265 core::arch::asm!(
1266 "cli",
1270
1271 "push rax",
1275 "mov al, 0x31", "out 0xe9, al",
1277 "pop rax",
1278
1279 "push {ss}",
1287 "push {rsp_val}",
1288 "push {rflags}",
1289 "push {cs}",
1290 "push {rip}",
1291
1292 "push rax",
1294 "mov al, 0x32", "out 0xe9, al",
1296 "pop rax",
1297
1298 "mov rdi, {arg0}",
1300
1301 "push rax",
1303 "mov al, 0x33", "out 0xe9, al",
1305 "pop rax",
1306
1307 "swapgs",
1311
1312 "push rax",
1316 "mov al, 0x34", "out 0xe9, al",
1318 "pop rax",
1319
1320 "iretq",
1322
1323 ss = in(reg) user_ss,
1324 rsp_val = in(reg) user_rsp,
1325 rflags = in(reg) user_rflags,
1326 cs = in(reg) user_cs,
1327 rip = in(reg) user_rip,
1328 arg0 = in(reg) user_arg0,
1329 options(noreturn),
1330 );
1331 }
1332}
1333
1334pub fn load_and_run_elf(elf_data: &[u8], name: &'static str) -> Result<TaskId, &'static str> {
1347 load_and_run_elf_with_caps(elf_data, name, &[])
1348}
1349
1350pub fn load_and_run_elf_with_caps(
1352 elf_data: &[u8],
1353 name: &'static str,
1354 seed_caps: &[Capability],
1355) -> Result<TaskId, &'static str> {
1356 crate::e9_println!(
1357 "[trace][elf] load_and_run_elf enter name={} size={}",
1358 name,
1359 elf_data.len()
1360 );
1361 let task = load_elf_task_with_caps(elf_data, name, seed_caps)?;
1362 let task_id = task.id;
1363 let runtime_entry = task
1364 .trampoline_entry
1365 .load(core::sync::atomic::Ordering::Acquire);
1366 crate::e9_println!(
1367 "[trace][elf] load_and_run_elf add_task begin tid={} entry={:#x}",
1368 task_id.as_u64(),
1369 runtime_entry
1370 );
1371 crate::process::add_task(task);
1372 crate::e9_println!(
1373 "[trace][elf] load_and_run_elf add_task done tid={}",
1374 task_id.as_u64()
1375 );
1376
1377 log::info!(
1378 "[elf] Task '{}' created: entry={:#x}, stack_top={:#x}",
1379 name,
1380 runtime_entry,
1381 USER_STACK_TOP,
1382 );
1383
1384 Ok(task_id)
1385}
1386
1387const AT_PHDR: u64 = 3;
1388const AT_PHENT: u64 = 4;
1389const AT_PHNUM: u64 = 5;
1390const AT_PAGESZ: u64 = 6;
1391const AT_BASE: u64 = 7;
1392const AT_ENTRY: u64 = 9;
1393const AT_RANDOM: u64 = 25;
1394
1395fn push_auxv(user_as: &AddressSpace, sp: &mut u64, tag: u64, val: u64) -> Result<(), &'static str> {
1397 *sp -= 8;
1398 write_user_u64(user_as, *sp, val)?;
1399 *sp -= 8;
1400 write_user_u64(user_as, *sp, tag)?;
1401 Ok(())
1402}
1403
1404fn setup_boot_user_stack(
1406 user_as: &AddressSpace,
1407 name: &str,
1408 phdr_vaddr: u64,
1409 phent: u16,
1410 phnum: u16,
1411 program_entry: u64,
1412 interp_base: Option<u64>,
1413) -> Result<u64, &'static str> {
1414 let mut sp = USER_STACK_TOP;
1415
1416 let name_nul_len = (name.len() + 1) as u64;
1417 sp -= name_nul_len;
1418 let argv0_ptr = sp;
1419 write_user_mapped_bytes(user_as, sp, name.as_bytes())?;
1420 write_user_mapped_bytes(user_as, sp + name.len() as u64, &[0])?;
1421
1422 sp -= 16;
1423 let random_ptr = sp;
1424 write_user_mapped_bytes(user_as, sp, &[0x42u8; 16])?;
1425
1426 sp &= !0xF;
1427
1428 push_auxv(user_as, &mut sp, 0, 0)?;
1430 push_auxv(user_as, &mut sp, AT_RANDOM, random_ptr)?;
1431 push_auxv(user_as, &mut sp, AT_ENTRY, program_entry)?;
1432 if let Some(base) = interp_base {
1433 push_auxv(user_as, &mut sp, AT_BASE, base)?;
1434 }
1435 push_auxv(user_as, &mut sp, AT_PAGESZ, 4096)?;
1436 push_auxv(user_as, &mut sp, AT_PHNUM, phnum as u64)?;
1437 push_auxv(user_as, &mut sp, AT_PHENT, phent as u64)?;
1438 push_auxv(user_as, &mut sp, AT_PHDR, phdr_vaddr)?;
1439
1440 sp -= 8;
1442 write_user_u64(user_as, sp, 0)?;
1443 sp -= 8;
1445 write_user_u64(user_as, sp, 0)?;
1446 sp -= 8;
1447 write_user_u64(user_as, sp, argv0_ptr)?;
1448 sp -= 8;
1450 write_user_u64(user_as, sp, 1)?;
1451
1452 sp &= !0xF;
1454 Ok(sp)
1455}
1456
1457pub fn load_elf_task_with_caps(
1459 elf_data: &[u8],
1460 name: &'static str,
1461 seed_caps: &[Capability],
1462) -> Result<Arc<Task>, &'static str> {
1463 crate::e9_println!(
1464 "[trace][elf] load_elf_task enter name={} size={}",
1465 name,
1466 elf_data.len()
1467 );
1468 log::info!("[elf] Loading ELF '{}'...", name);
1469
1470 crate::e9_println!("[trace][elf] load_elf_task parse_header begin");
1472 let header = parse_header(elf_data)?;
1473 crate::e9_println!(
1474 "[trace][elf] load_elf_task parse_header ok type={}",
1475 if header.e_type == ET_DYN {
1476 "ET_DYN"
1477 } else {
1478 "ET_EXEC"
1479 }
1480 );
1481 crate::e9_println!("[trace][elf] load_elf_task user_as begin");
1483 let user_as = Arc::new(AddressSpace::new_user()?);
1484 crate::e9_println!("[trace][elf] load_elf_task user_as done");
1485
1486 let phdrs: Vec<Elf64Phdr> = program_headers(elf_data, &header).collect();
1487 let interp_path = parse_interp_path(elf_data, &phdrs)?;
1488 let (load_bias, entry) = compute_load_bias_and_entry(&user_as, &header, &phdrs)?;
1489 let phdr_vaddr = find_relocated_phdr_vaddr(&header, &phdrs, load_bias)?;
1490
1491 let phnum = header.e_phnum;
1492 crate::e9_println!(
1493 "[trace][elf] load_elf_task layout entry={:#x} bias={:#x} phdrs={}",
1494 entry,
1495 load_bias,
1496 phnum
1497 );
1498 log::info!(
1499 "[elf] ELF '{}': type={}, entry={:#x}, bias={:#x}, {} program headers",
1500 name,
1501 if header.e_type == ET_DYN {
1502 "ET_DYN"
1503 } else {
1504 "ET_EXEC"
1505 },
1506 entry,
1507 load_bias,
1508 phnum,
1509 );
1510
1511 let mut load_count = 0u32;
1513 for phdr in phdrs.iter() {
1514 if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1515 load_segment(&user_as, elf_data, phdr, load_bias)?;
1516 load_count += 1;
1517 }
1518 }
1519 if interp_path.is_none() {
1520 apply_dynamic_relocations(&user_as, &phdrs, header.e_type, load_bias)?;
1521 }
1522
1523 for test_offset in [0x12920u64, 0x12928u64, 0x12930u64] {
1525 let vaddr = load_bias.wrapping_add(test_offset);
1526 if vaddr < USER_ADDR_MAX {
1527 if let Some(phys) = user_as.translate(VirtAddr::new(vaddr)) {
1528 let ptr = crate::memory::phys_to_virt(phys.as_u64()) as *const u64;
1529 let val = unsafe { core::ptr::read_unaligned(ptr) };
1531 crate::e9_println!(
1532 "[reloc-check] GOT[{:#x}]=phys={:#x} val={:#x} ({})",
1533 vaddr,
1534 phys.as_u64(),
1535 val,
1536 name
1537 );
1538 } else {
1539 crate::e9_println!("[reloc-check] GOT[{:#x}] = <not mapped> ({})", vaddr, name);
1540 }
1541 }
1542 }
1543
1544 crate::e9_println!(
1545 "[trace][elf] load_elf_task segments_done count={} has_interp={}",
1546 load_count,
1547 interp_path.is_some()
1548 );
1549 log::info!("[elf] Loaded {} PT_LOAD segment(s)", load_count);
1550
1551 let mut runtime_entry = entry;
1552 let mut interp_base: Option<u64> = None;
1553 if let Some(path) = interp_path {
1554 let interp_data = read_elf_from_vfs(path)?;
1555 let interp_header = parse_header(&interp_data)?;
1556 let interp_phdrs: Vec<Elf64Phdr> = program_headers(&interp_data, &interp_header).collect();
1557 if parse_interp_path(&interp_data, &interp_phdrs)?.is_some() {
1558 return Err("Nested PT_INTERP is not supported");
1559 }
1560 let (interp_bias, interp_entry) =
1561 compute_load_bias_and_entry(&user_as, &interp_header, &interp_phdrs)?;
1562 let (interp_min_vaddr, _) = compute_load_bounds(&interp_phdrs)?;
1563 let mut interp_load_count = 0u32;
1564 for phdr in interp_phdrs.iter() {
1565 if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1566 load_segment(&user_as, &interp_data, phdr, interp_bias)?;
1567 interp_load_count += 1;
1568 }
1569 }
1570 apply_dynamic_relocations(&user_as, &interp_phdrs, interp_header.e_type, interp_bias)?;
1571 runtime_entry = interp_entry;
1572 interp_base = Some(interp_min_vaddr.saturating_add(interp_bias));
1573 log::info!(
1574 "[elf] PT_INTERP '{}' loaded: {} PT_LOAD, entry={:#x}",
1575 path,
1576 interp_load_count,
1577 runtime_entry
1578 );
1579 }
1580
1581 let mut user_fs_base_val = 0u64;
1583 if let Some(tls) = phdrs.iter().find(|p| p.p_type == PT_TLS) {
1584 let tls_memsz = tls.p_memsz;
1585 let tls_filesz = tls.p_filesz;
1586 let tls_align = core::cmp::max(tls.p_align, 8).next_power_of_two();
1587 let aligned_memsz = (tls_memsz + tls_align - 1) & !(tls_align - 1);
1588 let total_size = aligned_memsz + 8;
1589 let n_tls_pages = ((total_size + 4095) / 4096) as usize;
1590 let tls_flags = VmaFlags {
1591 readable: true,
1592 writable: true,
1593 executable: false,
1594 user_accessible: true,
1595 };
1596 let tls_base = user_as
1597 .find_free_vma_range(0x7FFF_E000_0000, n_tls_pages, VmaPageSize::Small)
1598 .ok_or("No space for TLS block")?;
1599 user_as.map_region(
1600 tls_base,
1601 n_tls_pages,
1602 tls_flags,
1603 VmaType::Anonymous,
1604 VmaPageSize::Small,
1605 )?;
1606 if tls_filesz > 0 {
1607 let src_off = tls.p_offset as usize;
1608 let src_end = src_off + tls_filesz as usize;
1609 if src_end <= elf_data.len() {
1610 write_user_mapped_bytes(&user_as, tls_base, &elf_data[src_off..src_end])?;
1611 }
1612 }
1613 let tp = tls_base + aligned_memsz;
1614 write_user_u64(&user_as, tp, tp)?;
1615 user_fs_base_val = tp;
1616 }
1617
1618 let stack_flags = VmaFlags {
1620 readable: true,
1621 writable: true,
1622 executable: false,
1623 user_accessible: true,
1624 };
1625 user_as.map_region(
1626 USER_STACK_BASE,
1627 USER_STACK_PAGES,
1628 stack_flags,
1629 VmaType::Stack,
1630 VmaPageSize::Small,
1631 )?;
1632 log::debug!(
1633 "[elf] User stack: {:#x}..{:#x} ({} pages)",
1634 USER_STACK_BASE,
1635 USER_STACK_TOP,
1636 USER_STACK_PAGES,
1637 );
1638
1639 let boot_sp = setup_boot_user_stack(
1640 &user_as,
1641 name,
1642 phdr_vaddr,
1643 header.e_phentsize,
1644 header.e_phnum,
1645 entry,
1646 interp_base,
1647 )?;
1648
1649 crate::e9_println!(
1652 "[trace][elf] load_elf_task kstack_begin size={}",
1653 Task::DEFAULT_STACK_SIZE
1654 );
1655 let kernel_stack = KernelStack::allocate(Task::DEFAULT_STACK_SIZE)?;
1656 crate::e9_println!(
1657 "[trace][elf] load_elf_task kstack_done virt={:#x} top={:#x}",
1658 kernel_stack.virt_base.as_u64(),
1659 kernel_stack.virt_base.as_u64() + kernel_stack.size as u64
1660 );
1661 let context = CpuContext::new(elf_ring3_trampoline as *const () as u64, &kernel_stack);
1662 let (pid, tid, tgid) = Task::allocate_process_ids();
1663 let fpu_state = crate::process::task::ExtendedState::new();
1664 let xcr0_mask = fpu_state.xcr0_mask;
1665
1666 let task = Arc::new(Task {
1667 id: TaskId::new(),
1668 pid,
1669 tid,
1670 tgid,
1671 pgid: core::sync::atomic::AtomicU32::new(pid),
1672 sid: core::sync::atomic::AtomicU32::new(pid),
1673 uid: core::sync::atomic::AtomicU32::new(0),
1674 euid: core::sync::atomic::AtomicU32::new(0),
1675 gid: core::sync::atomic::AtomicU32::new(0),
1676 egid: core::sync::atomic::AtomicU32::new(0),
1677 state: core::sync::atomic::AtomicU8::new(TaskState::Ready as u8),
1678 priority: TaskPriority::Normal,
1679 context: SyncUnsafeCell::new(context),
1680 resume_kind: SyncUnsafeCell::new(ResumeKind::RetFrame),
1681 interrupt_rsp: core::sync::atomic::AtomicU64::new(0),
1682 kernel_stack,
1683 user_stack: None,
1684 name,
1685 process: Arc::new(crate::process::process::Process::new(pid, user_as)),
1686 pending_signals: super::signal::SignalSet::new(),
1687 blocked_signals: super::signal::SignalSet::new(),
1688 irq_signal_delivery_blocked: core::sync::atomic::AtomicBool::new(false),
1689 signal_stack: SyncUnsafeCell::new(None),
1690 itimers: super::timer::ITimers::new(),
1691 wake_pending: core::sync::atomic::AtomicBool::new(false),
1692 wake_deadline_ns: core::sync::atomic::AtomicU64::new(0),
1693 trampoline_entry: core::sync::atomic::AtomicU64::new(runtime_entry),
1694 trampoline_stack_top: core::sync::atomic::AtomicU64::new(boot_sp),
1695 trampoline_arg0: core::sync::atomic::AtomicU64::new(0),
1696 ticks: core::sync::atomic::AtomicU64::new(0),
1697 sched_policy: crate::process::task::SyncUnsafeCell::new(Task::default_sched_policy(
1698 TaskPriority::Normal,
1699 )),
1700 home_cpu: core::sync::atomic::AtomicUsize::new(usize::MAX),
1701 vruntime: core::sync::atomic::AtomicU64::new(0),
1702 fair_rq_generation: core::sync::atomic::AtomicU64::new(0),
1703 fair_on_rq: core::sync::atomic::AtomicBool::new(false),
1704 clear_child_tid: core::sync::atomic::AtomicU64::new(0),
1705 user_fs_base: core::sync::atomic::AtomicU64::new(user_fs_base_val),
1706 fpu_state: crate::process::task::SyncUnsafeCell::new(fpu_state),
1707 xcr0_mask: core::sync::atomic::AtomicU64::new(xcr0_mask),
1708 rt_link: intrusive_collections::LinkedListLink::new(),
1709 });
1710
1711 crate::e9_println!(
1712 "[trace][elf] load_elf_task task_built tid={} pid={} entry={:#x} sp={:#x}",
1713 task.id.as_u64(),
1714 task.pid,
1715 runtime_entry,
1716 boot_sp
1717 );
1718 let mut bootstrap_handle: Option<u64> = None;
1720 if !seed_caps.is_empty() {
1721 let caps = unsafe { &mut *task.process.capabilities.get() };
1722 for cap in seed_caps {
1723 let id = caps.insert(cap.clone());
1724 if bootstrap_handle.is_none()
1725 && cap.resource_type == crate::capability::ResourceType::Volume
1726 {
1727 bootstrap_handle = Some(id.as_u64());
1728 }
1729 }
1730 }
1731
1732 {
1735 let fd_table = unsafe { &mut *task.process.fd_table.get() };
1736 crate::vfs::console_scheme::setup_stdio(fd_table);
1737 }
1738
1739 if let Some(h) = bootstrap_handle {
1740 task.trampoline_arg0
1742 .store(h, core::sync::atomic::Ordering::Release);
1743 }
1744
1745 task.seed_interrupt_frame(crate::syscall::SyscallFrame {
1746 r15: 0,
1747 r14: 0,
1748 r13: 0,
1749 r12: 0,
1750 rbp: 0,
1751 rbx: 0,
1752 r11: 0x202,
1753 r10: 0,
1754 r9: 0,
1755 r8: 0,
1756 rsi: 0,
1757 rdi: task
1758 .trampoline_arg0
1759 .load(core::sync::atomic::Ordering::Acquire),
1760 rdx: 0,
1761 rcx: runtime_entry,
1762 rax: 0,
1763 iret_rip: runtime_entry,
1764 iret_cs: crate::arch::x86_64::gdt::user_code_selector().0 as u64,
1765 iret_rflags: 0x202,
1766 iret_rsp: boot_sp,
1767 iret_ss: crate::arch::x86_64::gdt::user_data_selector().0 as u64,
1768 });
1769
1770 if name == "init"
1772 || name == "silo-admin"
1773 || name.starts_with("strate-admin:")
1774 || name.contains("/strate-admin-")
1775 {
1776 let _ = crate::silo::grant_silo_admin_to_task(&task);
1777 }
1778
1779 {
1780 let arc_data_ptr = alloc::sync::Arc::as_ptr(&task) as usize;
1781 let fpu_ptr = task.fpu_state.get() as usize;
1782 if let Some(cur) = crate::process::scheduler::current_task_clone() {
1783 let cur_data_ptr = alloc::sync::Arc::as_ptr(&cur) as usize;
1784 let cur_strong = alloc::sync::Arc::strong_count(&cur);
1785 log::info!(
1786 "[elf] Task '{}' prepared: entry={:#x}, stack_top={:#x} \
1787 new_arc={:#x} new_fpu={:#x} cur_arc={:#x} cur_strong={}",
1788 name,
1789 runtime_entry,
1790 boot_sp,
1791 arc_data_ptr,
1792 fpu_ptr,
1793 cur_data_ptr,
1794 cur_strong,
1795 );
1796 } else {
1797 log::info!(
1798 "[elf] Task '{}' prepared: entry={:#x}, stack_top={:#x} \
1799 new_arc={:#x} new_fpu={:#x} (no current task)",
1800 name,
1801 runtime_entry,
1802 boot_sp,
1803 arc_data_ptr,
1804 fpu_ptr,
1805 );
1806 }
1807 }
1808
1809 Ok(task)
1810}
1811
1812pub fn load_elf_image(
1815 elf_data: &[u8],
1816 user_as: &AddressSpace,
1817) -> Result<LoadedElfInfo, &'static str> {
1818 let header = parse_header(elf_data)?;
1819 let phdrs: Vec<Elf64Phdr> = program_headers(elf_data, &header).collect();
1820 let interp_path = parse_interp_path(elf_data, &phdrs)?;
1821 let (load_bias, entry) = compute_load_bias_and_entry(user_as, &header, &phdrs)?;
1822 let phdr_vaddr = find_relocated_phdr_vaddr(&header, &phdrs, load_bias)?;
1823
1824 for phdr in phdrs.iter() {
1825 if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1826 load_segment(user_as, elf_data, phdr, load_bias)?;
1827 }
1828 }
1829 if interp_path.is_none() {
1830 apply_dynamic_relocations(user_as, &phdrs, header.e_type, load_bias)?;
1831 }
1832
1833 let (tls_vaddr, tls_filesz, tls_memsz, tls_align) =
1834 if let Some(tls) = phdrs.iter().find(|ph| ph.p_type == PT_TLS) {
1835 let align = core::cmp::max(tls.p_align, 1).next_power_of_two();
1836 (
1837 tls.p_vaddr.saturating_add(load_bias),
1838 tls.p_filesz,
1839 tls.p_memsz,
1840 align,
1841 )
1842 } else {
1843 (0, 0, 0, 1)
1844 };
1845
1846 let mut runtime_entry = entry;
1847 let mut interp_base = None;
1848 if let Some(path) = interp_path {
1849 let interp_data = read_elf_from_vfs(path)?;
1850 let interp_header = parse_header(&interp_data)?;
1851 let interp_phdrs: Vec<Elf64Phdr> = program_headers(&interp_data, &interp_header).collect();
1852 if parse_interp_path(&interp_data, &interp_phdrs)?.is_some() {
1853 return Err("Nested PT_INTERP is not supported");
1854 }
1855 let (interp_bias, interp_entry) =
1856 compute_load_bias_and_entry(user_as, &interp_header, &interp_phdrs)?;
1857 let (interp_min_vaddr, _) = compute_load_bounds(&interp_phdrs)?;
1858 for phdr in interp_phdrs.iter() {
1859 if phdr.p_type == PT_LOAD && phdr.p_memsz != 0 {
1860 load_segment(user_as, &interp_data, phdr, interp_bias)?;
1861 }
1862 }
1863 apply_dynamic_relocations(user_as, &interp_phdrs, interp_header.e_type, interp_bias)?;
1864 runtime_entry = interp_entry;
1865 interp_base = Some(interp_min_vaddr.saturating_add(interp_bias));
1866 }
1867
1868 Ok(LoadedElfInfo {
1869 runtime_entry,
1870 program_entry: entry,
1871 phdr_vaddr,
1872 phent: header.e_phentsize,
1873 phnum: header.e_phnum,
1874 interp_base,
1875 tls_vaddr,
1876 tls_filesz,
1877 tls_memsz,
1878 tls_align,
1879 })
1880}
1881
1882pub fn read_user_mapped_bytes_pub(
1884 user_as: &AddressSpace,
1885 vaddr: u64,
1886 out: &mut [u8],
1887) -> Result<(), &'static str> {
1888 read_user_mapped_bytes(user_as, vaddr, out)
1889}
1890
1891pub fn write_user_mapped_bytes_pub(
1893 user_as: &AddressSpace,
1894 vaddr: u64,
1895 src: &[u8],
1896) -> Result<(), &'static str> {
1897 write_user_mapped_bytes(user_as, vaddr, src)
1898}
1899
1900pub fn write_user_u64_pub(
1902 user_as: &AddressSpace,
1903 vaddr: u64,
1904 value: u64,
1905) -> Result<(), &'static str> {
1906 write_user_u64(user_as, vaddr, value)
1907}