Skip to main content

strat9_kernel/silo/
mod.rs

1//! Silo manager (kernel-side, minimal mechanisms only)
2//!
3//! This module provides the core kernel structures and syscalls
4//! to create and manage silos. Policy lives in userspace (silo admin).
5
6use crate::{
7    capability::{get_capability_manager, CapId, CapPermissions, ResourceType},
8    hardware::storage::{ahci, virtio_block},
9    ipc::port::{self, PortId},
10    memory::{UserSliceRead, UserSliceWrite},
11    process::{current_task_clone, task::Task, TaskId},
12    sync::SpinLock,
13    syscall::error::SyscallError,
14};
15use alloc::{
16    boxed::Box,
17    collections::{BTreeMap, VecDeque},
18    string::{String, ToString},
19    sync::Arc,
20    vec::Vec,
21};
22use core::sync::atomic::{AtomicU64, Ordering};
23
24// ============================================================================
25// Public ABI structs (repr(C) for syscall boundary)
26// ============================================================================
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
29#[repr(u8)]
30pub enum SiloTier {
31    Critical = 0,
32    System = 1,
33    User = 2,
34}
35
36#[repr(C)]
37#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
38pub struct SiloId {
39    pub sid: u32,
40    pub tier: SiloTier,
41}
42
43impl SiloId {
44    /// Creates a new instance.
45    pub const fn new(sid: u32) -> Self {
46        let tier = match sid {
47            1..=9 => SiloTier::Critical,
48            10..=999 => SiloTier::System,
49            _ => SiloTier::User,
50        };
51        Self { sid, tier }
52    }
53
54    /// Returns this as u64.
55    pub fn as_u64(&self) -> u64 {
56        self.sid as u64
57    }
58}
59
60use bitflags::bitflags;
61
62bitflags! {
63    #[repr(transparent)]
64    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
65    pub struct ControlMode: u8 {
66        const LIST  = 0b100;
67        const STOP  = 0b010;
68        const SPAWN = 0b001;
69    }
70}
71
72bitflags! {
73    #[repr(transparent)]
74    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
75    pub struct HardwareMode: u8 {
76        const INTERRUPT = 0b100;
77        const IO        = 0b010;
78        const DMA       = 0b001;
79    }
80}
81
82bitflags! {
83    #[repr(transparent)]
84    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
85    pub struct RegistryMode: u8 {
86        const LOOKUP = 0b100;
87        const BIND   = 0b010;
88        const PROXY  = 0b001;
89    }
90}
91
92#[repr(C)]
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub struct OctalMode {
95    pub control: ControlMode,
96    pub hardware: HardwareMode,
97    pub registry: RegistryMode,
98}
99
100impl OctalMode {
101    /// Builds this from octal.
102    pub const fn from_octal(val: u16) -> Self {
103        Self {
104            control: ControlMode::from_bits_truncate(((val >> 6) & 0o7) as u8),
105            hardware: HardwareMode::from_bits_truncate(((val >> 3) & 0o7) as u8),
106            registry: RegistryMode::from_bits_truncate((val & 0o7) as u8),
107        }
108    }
109
110    /// Returns whether subset of.
111    pub const fn is_subset_of(&self, other: &OctalMode) -> bool {
112        (self.control.bits() & !other.control.bits() == 0)
113            && (self.hardware.bits() & !other.hardware.bits() == 0)
114            && (self.registry.bits() & !other.registry.bits() == 0)
115    }
116
117    /// Performs the pledge operation.
118    pub fn pledge(&mut self, new_mode: OctalMode) -> Result<(), SyscallError> {
119        if !new_mode.is_subset_of(self) {
120            return Err(SyscallError::PermissionDenied); // Escalation attempt
121        }
122        *self = new_mode;
123        Ok(())
124    }
125}
126
127/// Performs the sys silo pledge operation.
128pub fn sys_silo_pledge(mode_val: u64) -> Result<u64, SyscallError> {
129    let new_mode = OctalMode::from_octal(mode_val as u16);
130    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
131
132    let mut mgr = SILO_MANAGER.lock();
133    if let Some(silo_id) = mgr.silo_for_task(task.id) {
134        if let Ok(silo) = mgr.get_mut(silo_id) {
135            silo.mode.pledge(new_mode)?;
136
137            mgr.push_event(SiloEvent {
138                silo_id: silo_id as u64,
139                kind: SiloEventKind::Started, // Re-using Started as "Updated" for now
140                data0: mode_val,
141                data1: 0,
142                tick: crate::process::scheduler::ticks(),
143            });
144            return Ok(0);
145        }
146    }
147    Err(SyscallError::BadHandle)
148}
149
150/// Performs the sys silo unveil operation.
151pub fn sys_silo_unveil(
152    path_ptr: u64,
153    path_len: u64,
154    rights_bits: u64,
155) -> Result<u64, SyscallError> {
156    const MAX_UNVEIL_PATH: usize = 1024;
157    const MAX_UNVEIL_RULES: usize = 128;
158
159    if path_ptr == 0 {
160        return Err(SyscallError::Fault);
161    }
162    let len = usize::try_from(path_len).map_err(|_| SyscallError::InvalidArgument)?;
163    if len == 0 || len > MAX_UNVEIL_PATH {
164        return Err(SyscallError::InvalidArgument);
165    }
166    let user = UserSliceRead::new(path_ptr, len)?;
167    let raw = user.read_to_vec();
168    let path = core::str::from_utf8(&raw).map_err(|_| SyscallError::InvalidArgument)?;
169    if path.is_empty() || !path.starts_with('/') || path.as_bytes().iter().any(|b| *b == 0) {
170        return Err(SyscallError::InvalidArgument);
171    }
172    let path = normalize_unveil_path(path)?;
173    let rights = UnveilRights::from_bits(rights_bits)?;
174
175    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
176    let mut mgr = SILO_MANAGER.lock();
177    let silo_id = mgr.silo_for_task(task.id).ok_or(SyscallError::BadHandle)?;
178    let silo = mgr.get_mut(silo_id)?;
179
180    if let Some(rule) = silo.unveil_rules.iter_mut().find(|r| r.path == path) {
181        rule.rights = rule.rights.intersect(rights);
182        return Ok(0);
183    }
184    if silo.unveil_rules.len() >= MAX_UNVEIL_RULES {
185        return Err(SyscallError::QueueFull);
186    }
187    silo.unveil_rules.push(UnveilRule { path, rights });
188    Ok(0)
189}
190
191/// Performs the sys silo enter sandbox operation.
192pub fn sys_silo_enter_sandbox() -> Result<u64, SyscallError> {
193    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
194    let mut mgr = SILO_MANAGER.lock();
195    let silo_id = mgr.silo_for_task(task.id).ok_or(SyscallError::BadHandle)?;
196    let silo = mgr.get_mut(silo_id)?;
197    if silo.sandboxed {
198        return Ok(0);
199    }
200    silo.sandboxed = true;
201    silo.mode.registry = RegistryMode::empty();
202    silo.config.mode =
203        ((silo.mode.control.bits() as u16) << 6) | ((silo.mode.hardware.bits() as u16) << 3);
204    Ok(0)
205}
206
207/// Performs the enforce silo may grant operation.
208pub fn enforce_silo_may_grant() -> Result<(), SyscallError> {
209    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
210    if is_admin_task(&task) {
211        return Ok(());
212    }
213    let mgr = SILO_MANAGER.lock();
214    let Some(silo_id) = mgr.silo_for_task(task.id) else {
215        return Ok(());
216    };
217    let silo = mgr.get(silo_id)?;
218    if silo.sandboxed {
219        return Err(SyscallError::PermissionDenied);
220    }
221    Ok(())
222}
223
224/// Performs the normalize unveil path operation.
225fn normalize_unveil_path(path: &str) -> Result<String, SyscallError> {
226    if !path.starts_with('/') {
227        return Err(SyscallError::InvalidArgument);
228    }
229    let mut out = String::new();
230    let mut prev_slash = false;
231    for ch in path.chars() {
232        if ch == '/' {
233            if !prev_slash {
234                out.push('/');
235            }
236            prev_slash = true;
237            continue;
238        }
239        if ch == '\0' {
240            return Err(SyscallError::InvalidArgument);
241        }
242        prev_slash = false;
243        out.push(ch);
244    }
245    while out.len() > 1 && out.ends_with('/') {
246        out.pop();
247    }
248    if out.is_empty() {
249        out.push('/');
250    }
251    Ok(out)
252}
253
254/// Performs the path rule matches operation.
255fn path_rule_matches(rule: &str, path: &str) -> bool {
256    if rule == "/" {
257        return true;
258    }
259    if path == rule {
260        return true;
261    }
262    if !path.starts_with(rule) {
263        return false;
264    }
265    let bytes = path.as_bytes();
266    let idx = rule.len();
267    idx < bytes.len() && bytes[idx] == b'/'
268}
269
270/// Performs the enforce path for current task operation.
271pub fn enforce_path_for_current_task(
272    path: &str,
273    want_read: bool,
274    want_write: bool,
275    want_execute: bool,
276) -> Result<(), SyscallError> {
277    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
278    if is_admin_task(&task) {
279        return Ok(());
280    }
281    let path = normalize_unveil_path(path)?;
282    let mgr = SILO_MANAGER.lock();
283    let Some(silo_id) = mgr.silo_for_task(task.id) else {
284        return Ok(());
285    };
286    let silo = mgr.get(silo_id)?;
287    if silo.sandboxed {
288        return Err(SyscallError::PermissionDenied);
289    }
290    if silo.unveil_rules.is_empty() {
291        return Ok(());
292    }
293    for rule in &silo.unveil_rules {
294        if !path_rule_matches(&rule.path, &path) {
295            continue;
296        }
297        if (!want_read || rule.rights.read)
298            && (!want_write || rule.rights.write)
299            && (!want_execute || rule.rights.execute)
300        {
301            return Ok(());
302        }
303    }
304    Err(SyscallError::PermissionDenied)
305}
306
307#[derive(Debug, Clone, Copy, PartialEq, Eq)]
308#[repr(C)]
309struct UnveilRights {
310    read: bool,
311    write: bool,
312    execute: bool,
313}
314
315impl UnveilRights {
316    /// Builds this from bits.
317    fn from_bits(bits: u64) -> Result<Self, SyscallError> {
318        if bits & !0x7 != 0 {
319            return Err(SyscallError::InvalidArgument);
320        }
321        Ok(Self {
322            read: (bits & 0x1) != 0,
323            write: (bits & 0x2) != 0,
324            execute: (bits & 0x4) != 0,
325        })
326    }
327
328    /// Performs the intersect operation.
329    fn intersect(self, other: Self) -> Self {
330        Self {
331            read: self.read && other.read,
332            write: self.write && other.write,
333            execute: self.execute && other.execute,
334        }
335    }
336}
337
338#[derive(Debug, Clone)]
339struct UnveilRule {
340    path: String,
341    rights: UnveilRights,
342}
343
344#[repr(u8)]
345#[derive(Debug, Clone, Copy, PartialEq, Eq)]
346pub enum StrateFamily {
347    SYS = 0,
348    DRV = 1,
349    FS = 2,
350    NET = 3,
351    WASM = 4,
352    USR = 5,
353}
354
355#[repr(u32)]
356#[derive(Debug, Clone, Copy, PartialEq, Eq)]
357pub enum SiloState {
358    Created = 0,
359    Loading = 1,
360    Ready = 2,
361    Running = 3,
362    Paused = 4,
363    Stopping = 5,
364    Stopped = 6,
365    Crashed = 7,
366    Zombie = 8,
367    Destroyed = 9,
368}
369
370pub const SILO_FLAG_ADMIN: u64 = 1 << 0;
371pub const SILO_FLAG_GRAPHICS: u64 = 1 << 1;
372pub const SILO_FLAG_WEBRTC_NATIVE: u64 = 1 << 2;
373pub const SILO_FLAG_GRAPHICS_READ_ONLY: u64 = 1 << 3;
374pub const SILO_FLAG_WEBRTC_TURN_FORCE: u64 = 1 << 4;
375
376#[repr(C)]
377#[derive(Debug, Clone, Copy)]
378pub struct SiloConfig {
379    pub mem_min: u64,
380    pub mem_max: u64,
381    pub cpu_shares: u32,
382    pub cpu_quota_us: u64,
383    pub cpu_period_us: u64,
384    pub cpu_affinity_mask: u64,
385    pub max_tasks: u32,
386    pub io_bw_read: u64,
387    pub io_bw_write: u64,
388    pub caps_ptr: u64,
389    pub caps_len: u64,
390    pub flags: u64,
391    pub sid: u32,
392    pub mode: u16,
393    pub family: u8,
394    /// CPU features that this silo requires (bitflags from `CpuFeatures`).
395    pub cpu_features_required: u64,
396    /// CPU features that this silo is allowed to use.
397    pub cpu_features_allowed: u64,
398    /// Effective XCR0 mask (computed from allowed features & host capabilities).
399    pub xcr0_mask: u64,
400    /// Maximum concurrent graphics sessions for this silo (0 = disabled).
401    pub graphics_max_sessions: u16,
402    /// Graphics session time-to-live in seconds.
403    pub graphics_session_ttl_sec: u32,
404    /// Reserved for ABI expansion.
405    pub graphics_reserved: u16,
406}
407
408impl Default for SiloConfig {
409    fn default() -> Self {
410        SiloConfig {
411            mem_min: 0,
412            mem_max: 0,
413            cpu_shares: 0,
414            cpu_quota_us: 0,
415            cpu_period_us: 0,
416            cpu_affinity_mask: 0,
417            max_tasks: 0,
418            io_bw_read: 0,
419            io_bw_write: 0,
420            caps_ptr: 0,
421            caps_len: 0,
422            flags: 0,
423            sid: 42,
424            mode: 0,
425            family: StrateFamily::USR as u8,
426            cpu_features_required: 0,
427            cpu_features_allowed: u64::MAX,
428            xcr0_mask: 0,
429            graphics_max_sessions: 0,
430            graphics_session_ttl_sec: 0,
431            graphics_reserved: 0,
432        }
433    }
434}
435
436impl SiloConfig {
437    /// Performs the validate operation.
438    fn validate(&self) -> Result<(), SyscallError> {
439        if self.mem_min > self.mem_max && self.mem_max != 0 {
440            return Err(SyscallError::InvalidArgument);
441        }
442        if self.cpu_quota_us > 0 && self.cpu_period_us == 0 {
443            return Err(SyscallError::InvalidArgument);
444        }
445        if self.caps_len > MAX_SILO_CAPS as u64 {
446            return Err(SyscallError::InvalidArgument);
447        }
448        if self.caps_len > 0 && self.caps_ptr == 0 {
449            return Err(SyscallError::InvalidArgument);
450        }
451        if self.flags & SILO_FLAG_WEBRTC_NATIVE != 0 && self.flags & SILO_FLAG_GRAPHICS == 0 {
452            return Err(SyscallError::InvalidArgument);
453        }
454        if self.flags & SILO_FLAG_GRAPHICS == 0 {
455            if self.graphics_max_sessions != 0 || self.graphics_session_ttl_sec != 0 {
456                return Err(SyscallError::InvalidArgument);
457            }
458        } else {
459            if self.graphics_max_sessions == 0 {
460                return Err(SyscallError::InvalidArgument);
461            }
462            if self.graphics_session_ttl_sec == 0 {
463                return Err(SyscallError::InvalidArgument);
464            }
465        }
466        Ok(())
467    }
468}
469
470#[repr(C, packed)]
471#[derive(Clone, Copy)]
472pub struct Strat9ModuleHeader {
473    pub magic: [u8; 4], // "CMOD"
474    pub version: u16,
475    pub cpu_arch: u8, // 0 = x86_64
476    pub flags: u32,
477    pub code_offset: u64,
478    pub code_size: u64,
479    pub data_offset: u64,
480    pub data_size: u64,
481    pub bss_size: u64,
482    pub entry_point: u64,
483    pub export_table_offset: u64,
484    pub import_table_offset: u64,
485    pub relocation_table_offset: u64,
486    pub key_id: [u8; 8],
487    pub signature: [u8; 64],
488    /// CPU features required by this module (CpuFeatures bitflags). Header v2+.
489    pub cpu_features_required: u64,
490    pub reserved: [u8; 48],
491}
492
493impl core::fmt::Debug for Strat9ModuleHeader {
494    /// Performs the fmt operation.
495    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
496        // SAFETY: read fields via read_unaligned to avoid UB on packed struct.
497        let version = unsafe { core::ptr::addr_of!(self.version).read_unaligned() };
498        let flags = unsafe { core::ptr::addr_of!(self.flags).read_unaligned() };
499        let entry = unsafe { core::ptr::addr_of!(self.entry_point).read_unaligned() };
500        let code_sz = unsafe { core::ptr::addr_of!(self.code_size).read_unaligned() };
501        let data_sz = unsafe { core::ptr::addr_of!(self.data_size).read_unaligned() };
502        f.debug_struct("Strat9ModuleHeader")
503            .field("magic", &self.magic)
504            .field("version", &version)
505            .field("cpu_arch", &self.cpu_arch)
506            .field("flags", &flags)
507            .field("entry_point", &entry)
508            .field("code_size", &code_sz)
509            .field("data_size", &data_sz)
510            .finish_non_exhaustive()
511    }
512}
513
514#[repr(C)]
515#[derive(Debug, Clone, Copy)]
516pub struct ModuleInfo {
517    pub id: u64,
518    pub format: u32, // 0 = raw/ELF, 1 = CMOD
519    pub flags: u32,
520    pub version: u16,
521    pub cpu_arch: u8,
522    pub reserved: u8,
523    pub code_size: u64,
524    pub data_size: u64,
525    pub bss_size: u64,
526    pub entry_point: u64,
527    pub total_size: u64,
528}
529
530#[repr(u32)]
531#[derive(Debug, Clone, Copy, PartialEq, Eq)]
532pub enum SiloEventKind {
533    Started = 1,
534    Stopped = 2,
535    Killed = 3,
536    Crashed = 4,
537    Paused = 5,
538    Resumed = 6,
539}
540
541#[repr(u64)]
542#[derive(Debug, Clone, Copy, PartialEq, Eq)]
543pub enum SiloFaultReason {
544    PageFault = 1,
545    GeneralProtection = 2,
546    InvalidOpcode = 3,
547}
548
549#[repr(C)]
550#[derive(Debug, Clone, Copy)]
551pub struct SiloEvent {
552    pub silo_id: u64,
553    pub kind: SiloEventKind,
554    pub data0: u64,
555    pub data1: u64,
556    pub tick: u64,
557}
558
559// data0 encoding for Crashed:
560// - bits 0..15: fault reason (SiloFaultReason)
561// - bits 16..31: fault subcode (arch-specific)
562// - bits 32..63: reserved
563pub const FAULT_SUBCODE_SHIFT: u64 = 16;
564
565/// Performs the pack fault operation.
566pub fn pack_fault(reason: SiloFaultReason, subcode: u64) -> u64 {
567    (reason as u64) | (subcode << FAULT_SUBCODE_SHIFT)
568}
569
570// ============================================================================
571// Internal kernel structs
572// ============================================================================
573
574#[derive(Debug)]
575struct Silo {
576    id: SiloId,
577    name: String,
578    strate_label: Option<String>,
579    state: SiloState,
580    config: SiloConfig,
581    mode: OctalMode,
582    family: StrateFamily,
583    /// Current memory usage accounted to this silo (bytes).
584    /// This tracks user-space virtual regions reserved/mapped via AddressSpace APIs.
585    mem_usage_bytes: u64,
586    flags: u32,
587    module_id: Option<u64>,
588    tasks: Vec<TaskId>,
589    granted_caps: Vec<u64>,
590    granted_resources: Vec<GrantedResource>,
591    unveil_rules: Vec<UnveilRule>,
592    sandboxed: bool,
593    event_seq: u64,
594    /// Ring buffer capturing debug output for `silo attach`.
595    output_buf: Option<Box<SiloOutputBuf>>,
596}
597
598const SILO_OUTPUT_CAPACITY: usize = 4096;
599
600struct SiloOutputBuf {
601    buf: [u8; SILO_OUTPUT_CAPACITY],
602    head: usize,
603    count: usize,
604}
605
606impl core::fmt::Debug for SiloOutputBuf {
607    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
608        f.debug_struct("SiloOutputBuf")
609            .field("count", &self.count)
610            .finish()
611    }
612}
613
614impl SiloOutputBuf {
615    const fn new() -> Self {
616        Self {
617            buf: [0; SILO_OUTPUT_CAPACITY],
618            head: 0,
619            count: 0,
620        }
621    }
622
623    fn push(&mut self, data: &[u8]) {
624        for &b in data {
625            let tail = (self.head + self.count) % SILO_OUTPUT_CAPACITY;
626            self.buf[tail] = b;
627            if self.count < SILO_OUTPUT_CAPACITY {
628                self.count += 1;
629            } else {
630                self.head = (self.head + 1) % SILO_OUTPUT_CAPACITY;
631            }
632        }
633    }
634
635    fn drain(&mut self) -> Vec<u8> {
636        let mut out = Vec::with_capacity(self.count);
637        for i in 0..self.count {
638            out.push(self.buf[(self.head + i) % SILO_OUTPUT_CAPACITY]);
639        }
640        self.head = 0;
641        self.count = 0;
642        out
643    }
644}
645
646#[derive(Debug, Clone)]
647pub struct SiloSnapshot {
648    pub id: u32,
649    pub tier: SiloTier,
650    pub name: String,
651    pub strate_label: Option<String>,
652    pub state: SiloState,
653    pub task_count: usize,
654    pub mem_usage_bytes: u64,
655    pub mem_min_bytes: u64,
656    pub mem_max_bytes: u64,
657    pub mode: u16,
658    pub graphics_flags: u64,
659    pub graphics_max_sessions: u16,
660    pub graphics_session_ttl_sec: u32,
661}
662
663#[derive(Debug, Clone)]
664pub struct SiloDetailSnapshot {
665    pub base: SiloSnapshot,
666    pub family: StrateFamily,
667    pub sandboxed: bool,
668    pub cpu_shares: u32,
669    pub cpu_affinity_mask: u64,
670    pub max_tasks: u32,
671    pub task_ids: Vec<u64>,
672    pub unveil_rules: Vec<(String, u8)>,
673    pub granted_caps_count: usize,
674    pub cpu_features_required: u64,
675    pub cpu_features_allowed: u64,
676    pub xcr0_mask: u64,
677    pub graphics_flags: u64,
678    pub graphics_max_sessions: u16,
679    pub graphics_session_ttl_sec: u32,
680}
681
682#[derive(Debug, Clone)]
683pub struct SiloEventSnapshot {
684    pub silo_id: u64,
685    pub kind: SiloEventKind,
686    pub data0: u64,
687    pub data1: u64,
688    pub tick: u64,
689}
690
691struct SiloManager {
692    silos: BTreeMap<u32, Box<Silo>>,
693    events: VecDeque<SiloEvent>,
694    task_to_silo: BTreeMap<TaskId, u32>,
695}
696
697impl SiloManager {
698    /// Creates a new instance.
699    const fn new() -> Self {
700        SiloManager {
701            silos: BTreeMap::new(),
702            events: VecDeque::new(),
703            task_to_silo: BTreeMap::new(),
704        }
705    }
706
707    /// Creates silo.
708    fn create_silo(&mut self, config: &SiloConfig) -> Result<SiloId, SyscallError> {
709        let id = SiloId::new(config.sid);
710        if self.silos.contains_key(&id.sid) {
711            return Err(SyscallError::AlreadyExists);
712        }
713
714        kernel_check_spawn_invariants(&id, &OctalMode::from_octal(config.mode))?;
715
716        let mut name = String::from("silo-");
717        name.push_str(&id.sid.to_string());
718
719        let family = decode_family(config.family)?;
720
721        let silo = Silo {
722            id,
723            name,
724            strate_label: None,
725            state: SiloState::Created,
726            config: *config,
727            mode: OctalMode::from_octal(config.mode),
728            family,
729            mem_usage_bytes: 0,
730            flags: config.flags as u32,
731            module_id: None,
732            tasks: Vec::new(),
733            granted_caps: Vec::new(),
734            granted_resources: Vec::new(),
735            unveil_rules: Vec::new(),
736            sandboxed: false,
737            event_seq: 0,
738            output_buf: None,
739        };
740
741        self.silos.insert(id.sid, Box::new(silo));
742        Ok(id)
743    }
744
745    /// Returns mut.
746    fn get_mut(&mut self, id: u32) -> Result<&mut Silo, SyscallError> {
747        self.silos
748            .get_mut(&id)
749            .map(Box::as_mut)
750            .ok_or(SyscallError::BadHandle)
751    }
752
753    /// Performs the get operation.
754    fn get(&self, id: u32) -> Result<&Silo, SyscallError> {
755        self.silos
756            .get(&id)
757            .map(Box::as_ref)
758            .ok_or(SyscallError::BadHandle)
759    }
760
761    /// Performs the push event operation.
762    fn push_event(&mut self, ev: SiloEvent) {
763        const MAX_EVENTS: usize = 256;
764        if self.events.len() >= MAX_EVENTS {
765            self.events.pop_front();
766        }
767        self.events.push_back(ev);
768    }
769
770    /// Maps task.
771    fn map_task(&mut self, task_id: TaskId, silo_id: u32) {
772        crate::serial_println!(
773            "[trace][silo] map_task enter tid={} sid={} len={}",
774            task_id.as_u64(),
775            silo_id,
776            self.task_to_silo.len()
777        );
778        let existed = self.task_to_silo.contains_key(&task_id);
779        crate::serial_println!(
780            "[trace][silo] map_task before insert tid={} sid={} existed={}",
781            task_id.as_u64(),
782            silo_id,
783            existed
784        );
785        self.task_to_silo.insert(task_id, silo_id);
786        crate::serial_println!(
787            "[trace][silo] map_task after insert tid={} sid={} len={}",
788            task_id.as_u64(),
789            silo_id,
790            self.task_to_silo.len()
791        );
792    }
793
794    /// Unmaps task.
795    fn unmap_task(&mut self, task_id: TaskId) {
796        self.task_to_silo.remove(&task_id);
797    }
798
799    /// Performs the silo for task operation.
800    fn silo_for_task(&self, task_id: TaskId) -> Option<u32> {
801        if let Some(silo_id) = self.task_to_silo.get(&task_id).copied() {
802            return Some(silo_id);
803        }
804
805        // Critical boot fallback: boot-time registration avoids BTreeMap inserts
806        // while holding SILO_MANAGER to eliminate allocator re-entrancy risk on
807        // the fragile early-init path.
808        self.silos
809            .iter()
810            .find_map(|(sid, silo)| silo.tasks.iter().any(|tid| *tid == task_id).then_some(*sid))
811    }
812}
813
814/// Performs the kernel check spawn invariants operation.
815pub fn kernel_check_spawn_invariants(id: &SiloId, mode: &OctalMode) -> Result<(), SyscallError> {
816    if id.tier == SiloTier::User && !mode.hardware.is_empty() {
817        return Err(SyscallError::PermissionDenied);
818    }
819    if id.tier == SiloTier::User && !mode.control.is_empty() {
820        return Err(SyscallError::PermissionDenied);
821    }
822    Ok(())
823}
824
825/// Performs the decode family operation.
826fn decode_family(raw: u8) -> Result<StrateFamily, SyscallError> {
827    match raw {
828        0 => Ok(StrateFamily::SYS),
829        1 => Ok(StrateFamily::DRV),
830        2 => Ok(StrateFamily::FS),
831        3 => Ok(StrateFamily::NET),
832        4 => Ok(StrateFamily::WASM),
833        5 => Ok(StrateFamily::USR),
834        _ => Err(SyscallError::InvalidArgument),
835    }
836}
837
838static SILO_MANAGER: SpinLock<SiloManager> = SpinLock::new(SiloManager::new());
839static BOOT_REG_IN_PROGRESS: core::sync::atomic::AtomicBool =
840    core::sync::atomic::AtomicBool::new(false);
841
842const SILO_ADMIN_RESOURCE: usize = 0;
843const MAX_SILO_CAPS: usize = 64;
844const MAX_MODULE_BLOB_LEN: usize = 16 * 1024 * 1024; // 16 MiB (UserSlice limit)
845const IPC_STREAM_DATA: u32 = 0xFFFF_FFFE;
846const IPC_STREAM_EOF: u32 = 0xFFFF_FFFF;
847const MODULE_FLAG_SIGNED: u32 = 1 << 0;
848const MODULE_FLAG_KERNEL: u32 = 1 << 1;
849
850/// Reads user config.
851fn read_user_config(ptr: u64) -> Result<SiloConfig, SyscallError> {
852    if ptr == 0 {
853        return Err(SyscallError::Fault);
854    }
855    const SIZE: usize = core::mem::size_of::<SiloConfig>();
856    let user = UserSliceRead::new(ptr, SIZE)?;
857    let mut buf = [0u8; SIZE];
858    user.copy_to(&mut buf);
859    // SAFETY: We copied the exact bytes for SiloConfig from userspace.
860    let config = unsafe { core::ptr::read_unaligned(buf.as_ptr() as *const SiloConfig) };
861    Ok(config)
862}
863
864/// Reads caps list.
865fn read_caps_list(ptr: u64, len: u64) -> Result<Vec<u64>, SyscallError> {
866    if len == 0 {
867        return Ok(Vec::new());
868    }
869    if len > MAX_SILO_CAPS as u64 {
870        return Err(SyscallError::InvalidArgument);
871    }
872    let byte_len = len as usize * core::mem::size_of::<u64>();
873    let user = UserSliceRead::new(ptr, byte_len)?;
874    let bytes = user.read_to_vec();
875    let mut out = Vec::with_capacity(len as usize);
876    for chunk in bytes.chunks_exact(8) {
877        let mut arr = [0u8; 8];
878        arr.copy_from_slice(chunk);
879        out.push(u64::from_le_bytes(arr));
880    }
881    Ok(out)
882}
883
884/// Reads module stream from port.
885fn read_module_stream_from_port(
886    port: &alloc::sync::Arc<port::Port>,
887) -> Result<Vec<u8>, SyscallError> {
888    let mut out = Vec::new();
889    loop {
890        let msg = port.recv().map_err(|_| SyscallError::BadHandle)?;
891
892        if msg.msg_type == IPC_STREAM_EOF {
893            break;
894        }
895        if msg.msg_type != IPC_STREAM_DATA {
896            return Err(SyscallError::InvalidArgument);
897        }
898        if msg.flags != 0 {
899            return Err(SyscallError::InvalidArgument);
900        }
901
902        let chunk_len = u16::from_le_bytes([msg.payload[0], msg.payload[1]]) as usize;
903        if chunk_len == 0 {
904            break;
905        }
906        if chunk_len > msg.payload.len() - 2 {
907            return Err(SyscallError::InvalidArgument);
908        }
909        if out.len().saturating_add(chunk_len) > MAX_MODULE_BLOB_LEN {
910            return Err(SyscallError::InvalidArgument);
911        }
912
913        out.extend_from_slice(&msg.payload[2..2 + chunk_len]);
914    }
915    Ok(out)
916}
917
918/// Parses module header.
919fn parse_module_header(data: &[u8]) -> Result<Option<Strat9ModuleHeader>, SyscallError> {
920    const MAGIC: [u8; 4] = *b"CMOD";
921    let header_size = core::mem::size_of::<Strat9ModuleHeader>();
922
923    if data.len() < MAGIC.len() {
924        return Ok(None);
925    }
926    if data[0..4] != MAGIC {
927        return Ok(None);
928    }
929    if data.len() < header_size {
930        return Err(SyscallError::InvalidArgument);
931    }
932
933    // SAFETY: We checked length, and we read unaligned from a byte slice.
934    let header = unsafe { core::ptr::read_unaligned(data.as_ptr() as *const Strat9ModuleHeader) };
935
936    if header.version != 1 && header.version != 2 {
937        return Err(SyscallError::InvalidArgument);
938    }
939    if header.cpu_arch != 0 {
940        return Err(SyscallError::InvalidArgument);
941    }
942
943    let version = unsafe { core::ptr::addr_of!(header.version).read_unaligned() };
944    let req = if version >= 2 {
945        unsafe { core::ptr::addr_of!(header.cpu_features_required).read_unaligned() }
946    } else {
947        0
948    };
949    if req != 0 {
950        let host = crate::arch::x86_64::cpuid::host();
951        let required = crate::arch::x86_64::cpuid::CpuFeatures::from_bits_truncate(req);
952        if !host.features.contains(required) {
953            log::warn!(
954                "[cmod] module requires CPU features {:#x} but host has {:#x}",
955                req,
956                host.features.bits()
957            );
958            return Err(SyscallError::InvalidArgument);
959        }
960    }
961
962    let data_len = data.len() as u64;
963    let code_end = header
964        .code_offset
965        .checked_add(header.code_size)
966        .ok_or(SyscallError::InvalidArgument)?;
967    let data_end = header
968        .data_offset
969        .checked_add(header.data_size)
970        .ok_or(SyscallError::InvalidArgument)?;
971    if code_end > data_len || data_end > data_len {
972        return Err(SyscallError::InvalidArgument);
973    }
974    if header.entry_point >= header.code_size && header.code_size != 0 {
975        return Err(SyscallError::InvalidArgument);
976    }
977    if header.export_table_offset > data_len
978        || header.import_table_offset > data_len
979        || header.relocation_table_offset > data_len
980    {
981        return Err(SyscallError::InvalidArgument);
982    }
983
984    // Segmentation rules: code/data must not overlap and must be page-aligned.
985    const PAGE_SIZE: u64 = 4096;
986    if header.code_size > 0 {
987        if header.code_offset % PAGE_SIZE != 0 || header.code_size % PAGE_SIZE != 0 {
988            return Err(SyscallError::InvalidArgument);
989        }
990    }
991    if header.data_size > 0 {
992        if header.data_offset % PAGE_SIZE != 0 || header.data_size % PAGE_SIZE != 0 {
993            return Err(SyscallError::InvalidArgument);
994        }
995    }
996    let code_range = header.code_offset..code_end;
997    let data_range = header.data_offset..data_end;
998    if code_range.start < data_range.end && data_range.start < code_range.end {
999        return Err(SyscallError::InvalidArgument);
1000    }
1001
1002    // Flags/signature checks (verification is TODO).
1003    if header.flags & MODULE_FLAG_SIGNED != 0 {
1004        let sig_nonzero = header.signature.iter().any(|b| *b != 0);
1005        let key_nonzero = header.key_id.iter().any(|b| *b != 0);
1006        if !sig_nonzero || !key_nonzero {
1007            return Err(SyscallError::PermissionDenied);
1008        }
1009    }
1010    if header.flags & MODULE_FLAG_KERNEL != 0 {
1011        // Kernel modules are allowed only when loaded by admin (already enforced).
1012    }
1013
1014    Ok(Some(header))
1015}
1016
1017/// Reads u32 le.
1018fn read_u32_le(data: &[u8], offset: usize) -> Result<u32, SyscallError> {
1019    if offset + 4 > data.len() {
1020        return Err(SyscallError::InvalidArgument);
1021    }
1022    let mut buf = [0u8; 4];
1023    buf.copy_from_slice(&data[offset..offset + 4]);
1024    Ok(u32::from_le_bytes(buf))
1025}
1026
1027/// Reads u64 le.
1028fn read_u64_le(data: &[u8], offset: usize) -> Result<u64, SyscallError> {
1029    if offset + 8 > data.len() {
1030        return Err(SyscallError::InvalidArgument);
1031    }
1032    let mut buf = [0u8; 8];
1033    buf.copy_from_slice(&data[offset..offset + 8]);
1034    Ok(u64::from_le_bytes(buf))
1035}
1036
1037/// Performs the resolve export offset operation.
1038fn resolve_export_offset(module: &ModuleImage, ordinal: u64) -> Result<u64, SyscallError> {
1039    let header = module.header.ok_or(SyscallError::InvalidArgument)?;
1040    if header.export_table_offset == 0 {
1041        return Err(SyscallError::NotFound);
1042    }
1043    let table_off = header.export_table_offset as usize;
1044    let count = read_u32_le(&module.data, table_off)? as u64;
1045    if ordinal >= count {
1046        return Err(SyscallError::InvalidArgument);
1047    }
1048    // Layout: u32 count + u32 reserved, then u64 entries.
1049    let entries_off = table_off + 8;
1050    let entry_off = entries_off + (ordinal as usize * 8);
1051    let rva = read_u64_le(&module.data, entry_off)?;
1052    Ok(rva)
1053}
1054
1055/// Performs the require silo admin operation.
1056pub fn require_silo_admin() -> Result<(), SyscallError> {
1057    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1058    // SAFETY: Current task owns its capability table during syscall execution.
1059    let caps = unsafe { &*task.process.capabilities.get() };
1060    let required = CapPermissions {
1061        read: false,
1062        write: false,
1063        execute: false,
1064        grant: true,
1065        revoke: false,
1066    };
1067
1068    if caps.has_resource_with_permissions(ResourceType::Silo, SILO_ADMIN_RESOURCE, required) {
1069        Ok(())
1070    } else {
1071        Err(SyscallError::PermissionDenied)
1072    }
1073}
1074
1075/// Performs the resolve silo handle operation.
1076fn resolve_silo_handle(handle: u64, required: CapPermissions) -> Result<u32, SyscallError> {
1077    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1078    let caps = unsafe { &*task.process.capabilities.get() };
1079    let cap_id = CapId::from_raw(handle);
1080    let cap = caps.get(cap_id).ok_or(SyscallError::BadHandle)?;
1081
1082    // Ensure this is a Silo capability and permissions are sufficient.
1083    if cap.resource_type != ResourceType::Silo {
1084        return Err(SyscallError::BadHandle);
1085    }
1086
1087    if (!required.read || cap.permissions.read)
1088        && (!required.write || cap.permissions.write)
1089        && (!required.execute || cap.permissions.execute)
1090        && (!required.grant || cap.permissions.grant)
1091        && (!required.revoke || cap.permissions.revoke)
1092    {
1093        Ok(cap.resource as u32)
1094    } else {
1095        Err(SyscallError::PermissionDenied)
1096    }
1097}
1098
1099// ============================================================================
1100// Module registry (temporary blob store for .cmod/ELF)
1101// ============================================================================
1102
1103#[derive(Debug)]
1104struct ModuleImage {
1105    id: u64,
1106    data: Arc<[u8]>,
1107    header: Option<Strat9ModuleHeader>,
1108}
1109
1110struct ModuleRegistry {
1111    modules: BTreeMap<u64, ModuleImage>,
1112}
1113
1114impl ModuleRegistry {
1115    /// Creates a new instance.
1116    const fn new() -> Self {
1117        ModuleRegistry {
1118            modules: BTreeMap::new(),
1119        }
1120    }
1121
1122    /// Performs the register operation.
1123    fn register(&mut self, data: Vec<u8>) -> Result<u64, SyscallError> {
1124        let header = parse_module_header(&data)?;
1125        static NEXT_MOD: AtomicU64 = AtomicU64::new(1);
1126        let id = NEXT_MOD.fetch_add(1, Ordering::Relaxed);
1127        self.modules.insert(
1128            id,
1129            ModuleImage {
1130                id,
1131                data: Arc::from(data.into_boxed_slice()),
1132                header,
1133            },
1134        );
1135        Ok(id)
1136    }
1137
1138    /// Performs the get operation.
1139    fn get(&self, id: u64) -> Option<&ModuleImage> {
1140        self.modules.get(&id)
1141    }
1142
1143    /// Performs the remove operation.
1144    fn remove(&mut self, id: u64) -> Option<ModuleImage> {
1145        self.modules.remove(&id)
1146    }
1147}
1148
1149static MODULE_REGISTRY: SpinLock<ModuleRegistry> = SpinLock::new(ModuleRegistry::new());
1150
1151/// Performs the charge task silo memory operation.
1152fn charge_task_silo_memory(task_id: TaskId, bytes: u64) -> Result<(), SyscallError> {
1153    if bytes == 0 {
1154        return Ok(());
1155    }
1156    let mut mgr = SILO_MANAGER.lock();
1157    let Some(silo_id) = mgr.silo_for_task(task_id) else {
1158        return Ok(());
1159    };
1160    let silo = mgr.get_mut(silo_id)?;
1161    let next = silo
1162        .mem_usage_bytes
1163        .checked_add(bytes)
1164        .ok_or(SyscallError::OutOfMemory)?;
1165    if silo.config.mem_max != 0 && next > silo.config.mem_max {
1166        return Err(SyscallError::OutOfMemory);
1167    }
1168    silo.mem_usage_bytes = next;
1169    Ok(())
1170}
1171
1172/// Performs the release task silo memory operation.
1173fn release_task_silo_memory(task_id: TaskId, bytes: u64) {
1174    if bytes == 0 {
1175        return;
1176    }
1177    let mut mgr = SILO_MANAGER.lock();
1178    let Some(silo_id) = mgr.silo_for_task(task_id) else {
1179        return;
1180    };
1181    if let Ok(silo) = mgr.get_mut(silo_id) {
1182        silo.mem_usage_bytes = silo.mem_usage_bytes.saturating_sub(bytes);
1183    }
1184}
1185
1186/// Charge memory usage against the current task's silo quota (if any).
1187///
1188/// Returns `OutOfMemory` when charging would exceed `SiloConfig.mem_max`.
1189/// Tasks that are not part of a silo are ignored.
1190pub fn charge_current_task_memory(bytes: u64) -> Result<(), SyscallError> {
1191    let Some(task) = crate::process::scheduler::current_task_clone_try() else {
1192        // Boot-time/kernel contexts may have no current task.
1193        // Also avoid deadlock when scheduler lock is already held in cleanup paths.
1194        return Ok(());
1195    };
1196    charge_task_silo_memory(task.id, bytes)
1197}
1198
1199/// Release memory usage from the current task's silo quota (if any).
1200///
1201/// Tasks that are not part of a silo are ignored.
1202pub fn release_current_task_memory(bytes: u64) {
1203    if let Some(task) = crate::process::scheduler::current_task_clone_try() {
1204        release_task_silo_memory(task.id, bytes);
1205    }
1206}
1207
1208/// Performs the extract strate label operation.
1209fn extract_strate_label(path: &str) -> Option<String> {
1210    let prefix = "/srv/strate-fs-";
1211    let rest = path.strip_prefix(prefix)?;
1212    let mut parts = rest.split('/').filter(|p| !p.is_empty());
1213    let _strate_type = parts.next()?;
1214    let label = parts.next()?;
1215    if label.is_empty() || parts.next().is_some() {
1216        return None;
1217    }
1218    Some(String::from(label))
1219}
1220
1221/// Performs the sanitize label operation.
1222fn sanitize_label(raw: &str) -> String {
1223    let mut out = String::new();
1224    for b in raw.bytes().take(31) {
1225        let ok = (b as char).is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'.';
1226        out.push(if ok { b as char } else { '_' });
1227    }
1228    if out.is_empty() {
1229        String::from("default")
1230    } else {
1231        out
1232    }
1233}
1234
1235/// Returns whether valid label.
1236fn is_valid_label(raw: &str) -> bool {
1237    if raw.is_empty() || raw.len() > 31 {
1238        return false;
1239    }
1240    raw.bytes()
1241        .all(|b| (b as char).is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'.')
1242}
1243
1244/// Sets current silo label from path.
1245pub fn set_current_silo_label_from_path(path: &str) -> Result<(), SyscallError> {
1246    let Some(label) = extract_strate_label(path) else {
1247        return Ok(());
1248    };
1249    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1250    let mut mgr = SILO_MANAGER.lock();
1251    let Some(silo_id) = mgr.silo_for_task(task.id) else {
1252        return Ok(());
1253    };
1254    let silo = mgr.get_mut(silo_id)?;
1255    // Do not overwrite a label that was already set (e.g. by kernel_spawn_strate).
1256    // The spawner's requested label takes precedence over the default path-derived one.
1257    if silo.strate_label.is_none() {
1258        silo.strate_label = Some(label);
1259    }
1260    Ok(())
1261}
1262
1263/// Performs the current task silo label operation.
1264pub fn current_task_silo_label() -> Option<String> {
1265    let task = current_task_clone()?;
1266    let mgr = SILO_MANAGER.lock();
1267    let silo_id = mgr.silo_for_task(task.id)?;
1268    let silo = mgr.get(silo_id).ok()?;
1269    silo.strate_label.clone()
1270}
1271
1272/// Performs the list silos snapshot operation.
1273pub fn list_silos_snapshot() -> Vec<SiloSnapshot> {
1274    let mgr = SILO_MANAGER.lock();
1275    mgr.silos
1276        .values()
1277        .map(|s| SiloSnapshot {
1278            id: s.id.sid,
1279            tier: s.id.tier,
1280            name: s.name.clone(),
1281            strate_label: s.strate_label.clone(),
1282            state: s.state,
1283            task_count: s.tasks.len(),
1284            mem_usage_bytes: s.mem_usage_bytes,
1285            mem_min_bytes: s.config.mem_min,
1286            mem_max_bytes: s.config.mem_max,
1287            mode: s.config.mode,
1288            graphics_flags: s.config.flags
1289                & (SILO_FLAG_GRAPHICS
1290                    | SILO_FLAG_WEBRTC_NATIVE
1291                    | SILO_FLAG_GRAPHICS_READ_ONLY
1292                    | SILO_FLAG_WEBRTC_TURN_FORCE),
1293            graphics_max_sessions: s.config.graphics_max_sessions,
1294            graphics_session_ttl_sec: s.config.graphics_session_ttl_sec,
1295        })
1296        .collect()
1297}
1298
1299/// Return silo identity + memory accounting for a task, if the task belongs to a silo.
1300///
1301/// Tuple layout:
1302/// - silo id (u32)
1303/// - optional label
1304/// - current usage bytes
1305/// - configured minimum bytes
1306/// - configured maximum bytes (0 = unlimited)
1307pub fn silo_info_for_task(task_id: TaskId) -> Option<(u32, Option<String>, u64, u64, u64)> {
1308    let mgr = SILO_MANAGER.lock();
1309    let silo_id = mgr.silo_for_task(task_id)?;
1310    let silo = mgr.get(silo_id).ok()?;
1311    Some((
1312        silo.id.sid,
1313        silo.strate_label.clone(),
1314        silo.mem_usage_bytes,
1315        silo.config.mem_min,
1316        silo.config.mem_max,
1317    ))
1318}
1319
1320/// Performs the resolve volume resource from dev path operation.
1321fn resolve_volume_resource_from_dev_path(dev_path: &str) -> Result<usize, SyscallError> {
1322    match dev_path {
1323        "/dev/sda" => ahci::get_device()
1324            .map(|d| d as *const _ as usize)
1325            .ok_or(SyscallError::NotFound),
1326        "/dev/vda" => virtio_block::get_device()
1327            .map(|d| d as *const _ as usize)
1328            .ok_or(SyscallError::NotFound),
1329        _ => Err(SyscallError::NotFound),
1330    }
1331}
1332
1333/// Compute the effective XCR0 mask for a silo from its allowed CPU features.
1334fn compute_silo_xcr0(config: &SiloConfig) -> u64 {
1335    use crate::arch::x86_64::cpuid::{xcr0_for_features, CpuFeatures};
1336    let allowed = CpuFeatures::from_bits_truncate(config.cpu_features_allowed);
1337    xcr0_for_features(allowed)
1338}
1339
1340/// Performs the kernel spawn strate operation.
1341pub fn kernel_spawn_strate(
1342    elf_data: &[u8],
1343    label: Option<&str>,
1344    dev_path: Option<&str>,
1345) -> Result<u32, SyscallError> {
1346    let module_id = {
1347        let mut registry = MODULE_REGISTRY.lock();
1348        registry.register(elf_data.to_vec())?
1349    };
1350
1351    let silo_id = {
1352        let mut mgr = SILO_MANAGER.lock();
1353        // For kernel_spawn_strate (manual command), we auto-assign SID > 1000.
1354        // In a production system, this would follow the "42" rule from Init.
1355        let mut sid = 1000u32;
1356        while mgr.silos.contains_key(&sid) {
1357            sid = sid.checked_add(1).ok_or(SyscallError::OutOfMemory)?;
1358        }
1359
1360        let id = SiloId::new(sid);
1361        let requested_label = label
1362            .map(sanitize_label)
1363            .unwrap_or_else(|| alloc::format!("inst-{}", id.sid));
1364
1365        if mgr
1366            .silos
1367            .values()
1368            .any(|s| s.strate_label.as_deref() == Some(requested_label.as_str()))
1369        {
1370            return Err(SyscallError::AlreadyExists);
1371        }
1372
1373        let mut cfg = SiloConfig {
1374            sid: id.sid,
1375            mode: 0o000,
1376            family: StrateFamily::USR as u8,
1377            ..SiloConfig::default()
1378        };
1379        cfg.xcr0_mask = compute_silo_xcr0(&cfg);
1380
1381        let silo = Silo {
1382            id,
1383            name: alloc::format!("silo-{}", id.sid),
1384            strate_label: Some(requested_label),
1385            state: SiloState::Ready,
1386            config: cfg,
1387            mode: OctalMode::from_octal(0),
1388            family: StrateFamily::USR,
1389            mem_usage_bytes: 0,
1390            flags: 0,
1391            module_id: Some(module_id),
1392            tasks: Vec::new(),
1393            granted_caps: Vec::new(),
1394            granted_resources: Vec::new(),
1395            unveil_rules: Vec::new(),
1396            sandboxed: false,
1397            event_seq: 0,
1398            output_buf: None,
1399        };
1400
1401        mgr.silos.insert(id.sid, Box::new(silo));
1402        id.sid
1403    };
1404
1405    let module_data = {
1406        let registry = MODULE_REGISTRY.lock();
1407        let module = registry.get(module_id).ok_or(SyscallError::BadHandle)?;
1408        module.data.clone()
1409    };
1410
1411    let mut seed_caps = Vec::new();
1412    if let Some(path) = dev_path {
1413        let resource = resolve_volume_resource_from_dev_path(path)?;
1414        let cap = get_capability_manager().create_capability(
1415            ResourceType::Volume,
1416            resource,
1417            CapPermissions {
1418                read: true,
1419                write: true,
1420                execute: false,
1421                grant: true,
1422                revoke: true,
1423            },
1424        );
1425        seed_caps.push(cap);
1426    }
1427
1428    let display = {
1429        let mgr = SILO_MANAGER.lock();
1430        let silo = mgr.get(silo_id)?;
1431        silo.strate_label
1432            .clone()
1433            .unwrap_or_else(|| alloc::format!("silo-{}", silo.id.sid))
1434    };
1435    let task_name: &'static str =
1436        Box::leak(alloc::format!("silo-{}/strate-admin-{}", silo_id, display).into_boxed_str());
1437    let task = crate::process::elf::load_elf_task_with_caps(&module_data, task_name, &seed_caps)
1438        .map_err(|_| SyscallError::InvalidArgument)?;
1439    let task_id = task.id;
1440
1441    let mut mgr = SILO_MANAGER.lock();
1442    {
1443        let silo = mgr.get_mut(silo_id)?;
1444        silo.tasks.push(task_id);
1445        silo.state = SiloState::Running;
1446        let fpu_xcr0 = unsafe { (*task.fpu_state.get()).xcr0_mask };
1447        let effective_xcr0 = (silo.config.xcr0_mask & fpu_xcr0).max(0x3);
1448        task.xcr0_mask
1449            .store(effective_xcr0, core::sync::atomic::Ordering::Relaxed);
1450    }
1451    mgr.map_task(task_id, silo_id);
1452    mgr.push_event(SiloEvent {
1453        silo_id: silo_id.into(),
1454        kind: SiloEventKind::Started,
1455        data0: 0,
1456        data1: 0,
1457        tick: crate::process::scheduler::ticks(),
1458    });
1459    drop(mgr);
1460    crate::process::add_task(task);
1461    Ok(silo_id)
1462}
1463
1464/// Performs the resolve selector to silo id operation.
1465fn resolve_selector_to_silo_id(selector: &str, mgr: &SiloManager) -> Result<u32, SyscallError> {
1466    if let Ok(id) = selector.parse::<u32>() {
1467        if mgr.silos.contains_key(&id) {
1468            return Ok(id);
1469        }
1470        return Err(SyscallError::NotFound);
1471    }
1472    let mut found: Option<u32> = None;
1473    for s in mgr.silos.values() {
1474        if s.strate_label.as_deref() == Some(selector) {
1475            if found.is_some() {
1476                return Err(SyscallError::InvalidArgument);
1477            }
1478            found = Some(s.id.sid);
1479        }
1480    }
1481    found.ok_or(SyscallError::NotFound)
1482}
1483
1484/// Performs the kernel stop silo operation.
1485pub fn kernel_stop_silo(selector: &str, force_kill: bool) -> Result<u32, SyscallError> {
1486    let (silo_id, tasks) = {
1487        let mut mgr = SILO_MANAGER.lock();
1488        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
1489        let mut tasks = Vec::new();
1490        {
1491            let silo = mgr.get_mut(silo_id)?;
1492            match silo.state {
1493                SiloState::Running | SiloState::Paused => {
1494                    tasks = silo.tasks.clone();
1495                    silo.tasks.clear();
1496                    silo.state = if force_kill {
1497                        SiloState::Stopped
1498                    } else {
1499                        SiloState::Stopping
1500                    };
1501                }
1502                SiloState::Stopping => {
1503                    if force_kill {
1504                        silo.state = SiloState::Stopped;
1505                    }
1506                }
1507                SiloState::Stopped | SiloState::Created | SiloState::Ready => {}
1508                _ => return Err(SyscallError::InvalidArgument),
1509            }
1510        }
1511        for tid in &tasks {
1512            mgr.unmap_task(*tid);
1513        }
1514        mgr.push_event(SiloEvent {
1515            silo_id: silo_id as u64,
1516            kind: if force_kill {
1517                SiloEventKind::Killed
1518            } else {
1519                SiloEventKind::Stopped
1520            },
1521            data0: 0,
1522            data1: 0,
1523            tick: crate::process::scheduler::ticks(),
1524        });
1525        (silo_id, tasks)
1526    };
1527    for tid in tasks {
1528        crate::process::kill_task(tid);
1529    }
1530    Ok(silo_id)
1531}
1532
1533/// Performs the kernel start silo operation.
1534pub fn kernel_start_silo(selector: &str) -> Result<u32, SyscallError> {
1535    let silo_id = {
1536        let mgr = SILO_MANAGER.lock();
1537        resolve_selector_to_silo_id(selector, &mgr)?
1538    };
1539    start_silo_by_id(silo_id)?;
1540    Ok(silo_id)
1541}
1542
1543/// Performs the kernel destroy silo operation.
1544pub fn kernel_destroy_silo(selector: &str) -> Result<u32, SyscallError> {
1545    let (silo_id, module_id) = {
1546        let mut mgr = SILO_MANAGER.lock();
1547        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
1548        let module_id = {
1549            let silo = mgr.get(silo_id)?;
1550            if !silo.tasks.is_empty() {
1551                return Err(SyscallError::InvalidArgument);
1552            }
1553            match silo.state {
1554                SiloState::Stopped | SiloState::Created | SiloState::Ready | SiloState::Crashed => {
1555                }
1556                _ => return Err(SyscallError::InvalidArgument),
1557            }
1558            silo.module_id
1559        };
1560        let _ = mgr.silos.remove(&silo_id);
1561        (silo_id, module_id)
1562    };
1563    if let Some(mid) = module_id {
1564        let mut reg = MODULE_REGISTRY.lock();
1565        let _ = reg.remove(mid);
1566    }
1567    Ok(silo_id)
1568}
1569
1570/// Performs the kernel rename silo label operation.
1571pub fn kernel_rename_silo_label(selector: &str, new_label: &str) -> Result<u32, SyscallError> {
1572    if !is_valid_label(new_label) {
1573        return Err(SyscallError::InvalidArgument);
1574    }
1575    let mut mgr = SILO_MANAGER.lock();
1576    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
1577    if mgr
1578        .silos
1579        .values()
1580        .any(|s| s.id.sid != silo_id && s.strate_label.as_deref() == Some(new_label))
1581    {
1582        return Err(SyscallError::AlreadyExists);
1583    }
1584    let silo = mgr.get_mut(silo_id)?;
1585    match silo.state {
1586        SiloState::Stopped | SiloState::Created | SiloState::Ready | SiloState::Crashed => {
1587            silo.strate_label = Some(String::from(new_label));
1588            Ok(silo_id)
1589        }
1590        _ => Err(SyscallError::InvalidArgument),
1591    }
1592}
1593
1594/// Performs the register boot strate task operation.
1595pub fn register_boot_strate_task(task_id: TaskId, label: &str) -> Result<u32, SyscallError> {
1596    crate::serial_println!(
1597        "[trace][silo] register_boot_strate_task enter tid={} label={}",
1598        task_id.as_u64(),
1599        label
1600    );
1601    BOOT_REG_IN_PROGRESS.store(true, Ordering::Relaxed);
1602    let result = (|| -> Result<u32, SyscallError> {
1603        let sanitized = sanitize_label(label);
1604        let mut mgr = SILO_MANAGER.lock();
1605        crate::serial_println!(
1606            "[trace][silo] register_boot_strate_task lock acquired tid={}",
1607            task_id.as_u64()
1608        );
1609        crate::serial_println!(
1610            "[trace][silo] register_boot_strate_task before sid scan tid={}",
1611            task_id.as_u64()
1612        );
1613        let mut sid = 1u32;
1614        while mgr.silos.contains_key(&sid) {
1615            sid = sid.checked_add(1).ok_or(SyscallError::OutOfMemory)?;
1616        }
1617        crate::serial_println!(
1618            "[trace][silo] register_boot_strate_task sid selected tid={} sid={}",
1619            task_id.as_u64(),
1620            sid
1621        );
1622        crate::serial_println!(
1623            "[trace][silo] register_boot_strate_task before label uniqueness tid={} label={}",
1624            task_id.as_u64(),
1625            sanitized.as_str()
1626        );
1627        if mgr
1628            .silos
1629            .values()
1630            .any(|s| s.strate_label.as_deref() == Some(sanitized.as_str()))
1631        {
1632            return Err(SyscallError::AlreadyExists);
1633        }
1634        drop(mgr);
1635
1636        let id = SiloId::new(sid);
1637        let silo = Silo {
1638            id,
1639            name: alloc::format!("silo-{}", id.sid),
1640            strate_label: Some(sanitized),
1641            state: SiloState::Running,
1642            config: SiloConfig {
1643                sid: id.sid,
1644                mode: 0o777,
1645                family: StrateFamily::SYS as u8,
1646                ..SiloConfig::default()
1647            },
1648            mode: OctalMode::from_octal(0o777),
1649            family: StrateFamily::SYS,
1650            mem_usage_bytes: 0,
1651            flags: 0,
1652            module_id: None,
1653            tasks: alloc::vec![task_id],
1654            granted_caps: Vec::new(),
1655            granted_resources: Vec::new(),
1656            unveil_rules: Vec::new(),
1657            sandboxed: false,
1658            event_seq: 0,
1659            output_buf: None,
1660        };
1661
1662        let mut mgr = SILO_MANAGER.lock();
1663        if mgr.silos.contains_key(&id.sid) {
1664            return Err(SyscallError::Again);
1665        }
1666        if mgr
1667            .silos
1668            .values()
1669            .any(|s| s.strate_label.as_deref() == silo.strate_label.as_deref())
1670        {
1671            return Err(SyscallError::AlreadyExists);
1672        }
1673        crate::serial_println!(
1674            "[trace][silo] register_boot_strate_task before silo insert tid={} sid={}",
1675            task_id.as_u64(),
1676            id.sid
1677        );
1678        mgr.silos.insert(id.sid, Box::new(silo));
1679        drop(mgr);
1680        Ok(id.sid)
1681    })();
1682    BOOT_REG_IN_PROGRESS.store(false, Ordering::Relaxed);
1683    result
1684}
1685
1686/// Returns true while boot-time silo registration critical path is executing.
1687pub fn debug_boot_reg_active() -> bool {
1688    BOOT_REG_IN_PROGRESS.load(Ordering::Relaxed)
1689}
1690
1691/// Performs the resolve module handle operation.
1692fn resolve_module_handle(handle: u64, required: CapPermissions) -> Result<u64, SyscallError> {
1693    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1694    let caps = unsafe { &*task.process.capabilities.get() };
1695    let cap_id = CapId::from_raw(handle);
1696    let cap = caps.get(cap_id).ok_or(SyscallError::BadHandle)?;
1697
1698    if cap.resource_type != ResourceType::Module {
1699        return Err(SyscallError::BadHandle);
1700    }
1701
1702    if (!required.read || cap.permissions.read)
1703        && (!required.write || cap.permissions.write)
1704        && (!required.execute || cap.permissions.execute)
1705        && (!required.grant || cap.permissions.grant)
1706        && (!required.revoke || cap.permissions.revoke)
1707    {
1708        Ok(cap.resource as u64)
1709    } else {
1710        Err(SyscallError::PermissionDenied)
1711    }
1712}
1713
1714/// Grant the Silo Admin capability to a task (bootstrapping).
1715///
1716/// This should be called only for the initial admin task (e.g. "init").
1717pub fn grant_silo_admin_to_task(task: &alloc::sync::Arc<Task>) -> CapId {
1718    let cap = get_capability_manager().create_capability(
1719        ResourceType::Silo,
1720        SILO_ADMIN_RESOURCE,
1721        CapPermissions::all(),
1722    );
1723    // SAFETY: Bootstrapping. Caller must ensure exclusive access.
1724    unsafe { (&mut *task.process.capabilities.get()).insert(cap) }
1725}
1726
1727// ============================================================================
1728// Module syscalls (temporary blob loader)
1729// ============================================================================
1730
1731/// Performs the sys module load operation.
1732pub fn sys_module_load(fd_or_ptr: u64, len: u64) -> Result<u64, SyscallError> {
1733    // Module loading is currently restricted to admin.
1734    require_silo_admin()?;
1735
1736    // Transitional path: if len != 0, treat arg1 as a userspace blob pointer.
1737    if len != 0 {
1738        let len = len as usize;
1739        if len == 0 || len > MAX_MODULE_BLOB_LEN {
1740            return Err(SyscallError::InvalidArgument);
1741        }
1742
1743        let user = UserSliceRead::new(fd_or_ptr, len)?;
1744        let data = user.read_to_vec();
1745        if data.len() >= 4 {
1746            log::debug!(
1747                "module_load: len={} magic={:02x}{:02x}{:02x}{:02x}",
1748                data.len(),
1749                data[0],
1750                data[1],
1751                data[2],
1752                data[3]
1753            );
1754        } else {
1755            log::debug!("module_load: len={} (too small)", data.len());
1756        }
1757
1758        let mut registry = MODULE_REGISTRY.lock();
1759        let id = registry.register(data)?;
1760        drop(registry);
1761
1762        let cap = get_capability_manager().create_capability(
1763            ResourceType::Module,
1764            id as usize,
1765            CapPermissions::all(),
1766        );
1767
1768        let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1769        let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
1770
1771        return Ok(cap_id.as_u64());
1772    }
1773
1774    // TODO: Load from a file handle (fd) via VFS once the path exists.
1775    // For now, interpret `fd_or_ptr` as either:
1776    // - a File handle (read all), or
1777    // - an IPC port handle that streams the module bytes.
1778    //
1779    // Stream protocol:
1780    // - msg_type = IPC_STREAM_DATA, flags = payload length (0..48)
1781    // - msg_type = IPC_STREAM_EOF (or DATA with flags=0) ends the stream
1782    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1783    let caps = unsafe { &*task.process.capabilities.get() };
1784    let required = CapPermissions {
1785        read: true,
1786        write: false,
1787        execute: false,
1788        grant: false,
1789        revoke: false,
1790    };
1791    let cap = caps
1792        .get_with_permissions(CapId::from_raw(fd_or_ptr), required)
1793        .ok_or(SyscallError::PermissionDenied)?;
1794    let data = match cap.resource_type {
1795        ResourceType::File => {
1796            let fd = u32::try_from(cap.resource).map_err(|_| SyscallError::BadHandle)?;
1797            crate::vfs::read_all(fd)?
1798        }
1799        ResourceType::IpcPort => {
1800            let port_id = PortId::from_u64(cap.resource as u64);
1801            let port = port::get_port(port_id).ok_or(SyscallError::BadHandle)?;
1802            read_module_stream_from_port(&port)?
1803        }
1804        _ => return Err(SyscallError::BadHandle),
1805    };
1806    if data.len() > MAX_MODULE_BLOB_LEN {
1807        return Err(SyscallError::InvalidArgument);
1808    }
1809
1810    let mut registry = MODULE_REGISTRY.lock();
1811    let id = registry.register(data)?;
1812    drop(registry);
1813
1814    let cap = get_capability_manager().create_capability(
1815        ResourceType::Module,
1816        id as usize,
1817        CapPermissions::all(),
1818    );
1819
1820    let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
1821
1822    Ok(cap_id.as_u64())
1823}
1824
1825/// Performs the sys module unload operation.
1826pub fn sys_module_unload(handle: u64) -> Result<u64, SyscallError> {
1827    require_silo_admin()?;
1828    let required = CapPermissions {
1829        read: false,
1830        write: false,
1831        execute: false,
1832        grant: false,
1833        revoke: true,
1834    };
1835    let module_id = resolve_module_handle(handle, required)?;
1836    let mut registry = MODULE_REGISTRY.lock();
1837    registry.remove(module_id);
1838    Ok(0)
1839}
1840
1841/// Performs the sys module get symbol operation.
1842pub fn sys_module_get_symbol(handle: u64, _ordinal: u64) -> Result<u64, SyscallError> {
1843    let required = CapPermissions {
1844        read: true,
1845        write: false,
1846        execute: false,
1847        grant: false,
1848        revoke: false,
1849    };
1850    let module_id = resolve_module_handle(handle, required)?;
1851    let registry = MODULE_REGISTRY.lock();
1852    let module = registry.get(module_id).ok_or(SyscallError::BadHandle)?;
1853
1854    // The export table format is a simple array of u64 RVAs indexed by ordinal.
1855    let rva = resolve_export_offset(module, _ordinal)?;
1856    let header = module.header.ok_or(SyscallError::InvalidArgument)?;
1857    Ok(header.code_offset.saturating_add(rva))
1858}
1859
1860/// Performs the sys module query operation.
1861pub fn sys_module_query(handle: u64, out_ptr: u64) -> Result<u64, SyscallError> {
1862    let required = CapPermissions {
1863        read: true,
1864        write: false,
1865        execute: false,
1866        grant: false,
1867        revoke: false,
1868    };
1869    let module_id = resolve_module_handle(handle, required)?;
1870    if out_ptr == 0 {
1871        return Err(SyscallError::Fault);
1872    }
1873
1874    let registry = MODULE_REGISTRY.lock();
1875    let module = registry.get(module_id).ok_or(SyscallError::BadHandle)?;
1876
1877    let (format, flags, version, cpu_arch, code_size, data_size, bss_size, entry_point) =
1878        if let Some(header) = module.header {
1879            (
1880                1u32,
1881                header.flags,
1882                header.version,
1883                header.cpu_arch,
1884                header.code_size,
1885                header.data_size,
1886                header.bss_size,
1887                header.entry_point,
1888            )
1889        } else {
1890            (0u32, 0u32, 0u16, 0u8, 0u64, 0u64, 0u64, 0u64)
1891        };
1892
1893    let info = ModuleInfo {
1894        id: module.id,
1895        format,
1896        flags,
1897        version,
1898        cpu_arch,
1899        reserved: 0,
1900        code_size,
1901        data_size,
1902        bss_size,
1903        entry_point,
1904        total_size: module.data.len() as u64,
1905    };
1906
1907    const INFO_SIZE: usize = core::mem::size_of::<ModuleInfo>();
1908    let user = UserSliceWrite::new(out_ptr, INFO_SIZE)?;
1909    let src =
1910        unsafe { core::slice::from_raw_parts(&info as *const ModuleInfo as *const u8, INFO_SIZE) };
1911    user.copy_from(src);
1912    Ok(0)
1913}
1914
1915// ============================================================================
1916// Syscall handlers (kernel entry points)
1917// ============================================================================
1918
1919/// Performs the sys silo create operation.
1920pub fn sys_silo_create(config_ptr: u64) -> Result<u64, SyscallError> {
1921    require_silo_admin()?;
1922    let config = read_user_config(config_ptr)?;
1923    config.validate()?;
1924
1925    let mut mgr = SILO_MANAGER.lock();
1926    let id = mgr.create_silo(&config)?;
1927    drop(mgr);
1928
1929    let cap = get_capability_manager().create_capability(
1930        ResourceType::Silo,
1931        id.sid as usize,
1932        CapPermissions::all(),
1933    );
1934
1935    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1936    let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
1937
1938    Ok(cap_id.as_u64())
1939}
1940
1941/// Performs the sys silo config operation.
1942pub fn sys_silo_config(handle: u64, res_ptr: u64) -> Result<u64, SyscallError> {
1943    require_silo_admin()?;
1944    let config = read_user_config(res_ptr)?;
1945    config.validate()?;
1946    let family = decode_family(config.family)?;
1947
1948    let mut granted_caps = Vec::new();
1949    let mut granted_resources = Vec::new();
1950    if config.caps_len > 0 {
1951        let caps_list = read_caps_list(config.caps_ptr, config.caps_len)?;
1952        let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1953        let caps = unsafe { &*task.process.capabilities.get() };
1954
1955        for cap_handle in caps_list {
1956            let cap = caps
1957                .get(CapId::from_raw(cap_handle))
1958                .ok_or(SyscallError::BadHandle)?;
1959            if !cap.permissions.grant {
1960                return Err(SyscallError::PermissionDenied);
1961            }
1962            if !is_delegated_resource(cap.resource_type) {
1963                return Err(SyscallError::InvalidArgument);
1964            }
1965            if !granted_caps.contains(&cap_handle) {
1966                granted_caps.push(cap_handle);
1967            }
1968            add_or_merge_granted_resource(
1969                &mut granted_resources,
1970                GrantedResource {
1971                    resource_type: cap.resource_type,
1972                    resource: cap.resource,
1973                    permissions: cap.permissions,
1974                },
1975            );
1976        }
1977    }
1978
1979    let sid = resolve_silo_handle(handle, CapPermissions::read_write())?;
1980    let mut mgr = SILO_MANAGER.lock();
1981    let silo = mgr.get_mut(sid as u32)?;
1982
1983    let requested_mode = OctalMode::from_octal(config.mode);
1984    kernel_check_spawn_invariants(&silo.id, &requested_mode)?;
1985    if silo.sandboxed && !requested_mode.is_subset_of(&silo.mode) {
1986        return Err(SyscallError::PermissionDenied);
1987    }
1988    if silo.sandboxed && !requested_mode.registry.is_empty() {
1989        return Err(SyscallError::PermissionDenied);
1990    }
1991
1992    silo.config = config;
1993    silo.mode = requested_mode;
1994    silo.family = family;
1995    silo.flags = config.flags as u32;
1996    silo.granted_caps = granted_caps;
1997    silo.granted_resources = granted_resources;
1998    Ok(0)
1999}
2000
2001/// Performs the sys silo attach module operation.
2002pub fn sys_silo_attach_module(handle: u64, module_handle: u64) -> Result<u64, SyscallError> {
2003    require_silo_admin()?;
2004    let silo_id = resolve_silo_handle(handle, CapPermissions::read_write())?;
2005
2006    let required = CapPermissions {
2007        read: true,
2008        write: false,
2009        execute: false,
2010        grant: false,
2011        revoke: false,
2012    };
2013    let module_id = resolve_module_handle(module_handle, required)?;
2014
2015    let mut mgr = SILO_MANAGER.lock();
2016    let silo = mgr.get_mut(silo_id)?;
2017
2018    match silo.state {
2019        SiloState::Created | SiloState::Stopped | SiloState::Ready => {
2020            silo.module_id = Some(module_id);
2021            silo.state = SiloState::Ready;
2022            Ok(0)
2023        }
2024        SiloState::Running | SiloState::Paused => {
2025            silo.module_id = Some(module_id);
2026            Ok(0)
2027        }
2028        _ => Err(SyscallError::InvalidArgument),
2029    }
2030}
2031
2032/// Starts silo by id.
2033fn start_silo_by_id(silo_id: u32) -> Result<(), SyscallError> {
2034    let (
2035        module_id,
2036        granted_caps,
2037        silo_flags,
2038        previous_state,
2039        can_start,
2040        within_task_limit,
2041        silo_name,
2042        silo_label,
2043    ) = {
2044        let mut mgr = SILO_MANAGER.lock();
2045        let silo = mgr.get_mut(silo_id)?;
2046        let previous_state = silo.state;
2047        let can_start = matches!(
2048            previous_state,
2049            SiloState::Ready | SiloState::Stopped | SiloState::Running
2050        );
2051        let within_task_limit = match silo.config.max_tasks {
2052            0 => true, // 0 = unlimited
2053            max => silo.tasks.len() < max as usize,
2054        };
2055        let module_id = silo.module_id;
2056        let granted_caps = silo.granted_caps.clone();
2057        let silo_flags = silo.config.flags;
2058        let silo_name = silo.name.clone();
2059        let silo_label = silo.strate_label.clone();
2060        if can_start && within_task_limit {
2061            silo.state = SiloState::Loading;
2062        }
2063        (
2064            module_id,
2065            granted_caps,
2066            silo_flags,
2067            previous_state,
2068            can_start,
2069            within_task_limit,
2070            silo_name,
2071            silo_label,
2072        )
2073    };
2074
2075    if !can_start {
2076        return Err(SyscallError::InvalidArgument);
2077    }
2078    if !within_task_limit {
2079        return Err(SyscallError::QueueFull);
2080    }
2081
2082    let rollback_loading = |state: SiloState| {
2083        let mut mgr = SILO_MANAGER.lock();
2084        if let Ok(silo) = mgr.get_mut(silo_id) {
2085            if matches!(silo.state, SiloState::Loading) {
2086                silo.state = state;
2087            }
2088        }
2089    };
2090
2091    let module_id = match module_id {
2092        Some(id) => id,
2093        None => {
2094            rollback_loading(previous_state);
2095            return Err(SyscallError::InvalidArgument);
2096        }
2097    };
2098
2099    let seed_caps = {
2100        let task = match current_task_clone() {
2101            Some(t) => t,
2102            None => {
2103                rollback_loading(previous_state);
2104                return Err(SyscallError::PermissionDenied);
2105            }
2106        };
2107        let caps = unsafe { &mut *task.process.capabilities.get() };
2108        let mut out = Vec::with_capacity(granted_caps.len());
2109        for handle in granted_caps {
2110            // Enforce: caller must currently hold the capability.
2111            if !silo_has_capability(&task, handle) {
2112                rollback_loading(previous_state);
2113                return Err(SyscallError::PermissionDenied);
2114            }
2115            if let Some(dup) = caps.duplicate(CapId::from_raw(handle)) {
2116                out.push(dup);
2117            } else {
2118                rollback_loading(previous_state);
2119                return Err(SyscallError::PermissionDenied);
2120            }
2121        }
2122        out
2123    };
2124
2125    let display = silo_label.unwrap_or(silo_name);
2126    let task_name_owned = if silo_flags & SILO_FLAG_ADMIN != 0 {
2127        alloc::format!("silo-{}/strate-admin-{}", silo_id, display)
2128    } else {
2129        alloc::format!("silo-{}/strate-{}", silo_id, display)
2130    };
2131    // Intentional leak: task names are expected to live for the task lifetime.
2132    // This avoids generic "silo" labels in process viewers.
2133    let task_name: &'static str = Box::leak(task_name_owned.into_boxed_str());
2134
2135    let module_data = {
2136        let registry = MODULE_REGISTRY.lock();
2137        match registry.get(module_id) {
2138            Some(module) => module.data.clone(),
2139            None => {
2140                rollback_loading(previous_state);
2141                return Err(SyscallError::BadHandle);
2142            }
2143        }
2144    };
2145
2146    let load_result =
2147        crate::process::elf::load_elf_task_with_caps(&module_data, task_name, &seed_caps).map_err(
2148            |err| {
2149                log::warn!(
2150                    "silo_start: sid={} module={} task='{}' load failed: {}",
2151                    silo_id,
2152                    module_id,
2153                    task_name,
2154                    err
2155                );
2156                map_elf_start_error(err)
2157            },
2158        );
2159
2160    let task = match load_result {
2161        Ok(task) => task,
2162        Err(e) => {
2163            rollback_loading(previous_state);
2164            return Err(e);
2165        }
2166    };
2167    let task_id = task.id;
2168
2169    // Give the silo an EOF stdin so that any read(0, …) returns 0 immediately
2170    // instead of EBADF (which can cause busy-loops) or blocking on the
2171    // keyboard (which would steal input from the foreground shell).
2172    let bg_stdin = crate::vfs::create_background_stdin();
2173    let fd_table = unsafe { &mut *task.process.fd_table.get() };
2174    fd_table.insert_at(crate::vfs::STDIN, bg_stdin);
2175
2176    let mut mgr = SILO_MANAGER.lock();
2177    {
2178        let silo = match mgr.get_mut(silo_id) {
2179            Ok(silo) => silo,
2180            Err(e) => {
2181                return Err(e);
2182            }
2183        };
2184        silo.tasks.push(task_id);
2185        silo.state = SiloState::Running;
2186        let fpu_xcr0 = unsafe { (*task.fpu_state.get()).xcr0_mask };
2187        let effective_xcr0 = (silo.config.xcr0_mask & fpu_xcr0).max(0x3);
2188        task.xcr0_mask
2189            .store(effective_xcr0, core::sync::atomic::Ordering::Relaxed);
2190    }
2191    mgr.map_task(task_id, silo_id);
2192    mgr.push_event(SiloEvent {
2193        silo_id: silo_id.into(),
2194        kind: SiloEventKind::Started,
2195        data0: 0,
2196        data1: 0,
2197        tick: crate::process::scheduler::ticks(),
2198    });
2199    drop(mgr);
2200    crate::process::add_task(task);
2201    Ok(())
2202}
2203
2204/// Performs the sys silo start operation.
2205pub fn sys_silo_start(handle: u64) -> Result<u64, SyscallError> {
2206    require_silo_admin()?;
2207    let required = CapPermissions {
2208        read: false,
2209        write: false,
2210        execute: true,
2211        grant: false,
2212        revoke: false,
2213    };
2214    let silo_id = resolve_silo_handle(handle, required)?;
2215    start_silo_by_id(silo_id)?;
2216    Ok(0)
2217}
2218
2219/// Best-effort cleanup hook called by the scheduler when a task terminates.
2220///
2221/// Ensures `task_to_silo` mappings are removed even for normal exits and
2222/// transitions a running/paused silo to `Stopped` when its last task is gone.
2223pub fn on_task_terminated(task_id: TaskId) {
2224    let mut mgr = SILO_MANAGER.lock();
2225    let silo_id = match mgr.silo_for_task(task_id) {
2226        Some(id) => id,
2227        None => return,
2228    };
2229    mgr.unmap_task(task_id);
2230
2231    let mut emit_stopped = false;
2232    if let Ok(silo) = mgr.get_mut(silo_id) {
2233        if let Some(pos) = silo.tasks.iter().position(|tid| *tid == task_id) {
2234            silo.tasks.swap_remove(pos);
2235        }
2236        if silo.tasks.is_empty() {
2237            match silo.state {
2238                SiloState::Running | SiloState::Paused | SiloState::Stopping => {
2239                    silo.state = SiloState::Stopped;
2240                    silo.event_seq = silo.event_seq.wrapping_add(1);
2241                    emit_stopped = true;
2242                }
2243                _ => {}
2244            }
2245        }
2246    }
2247
2248    if emit_stopped {
2249        mgr.push_event(SiloEvent {
2250            silo_id: silo_id.into(),
2251            kind: SiloEventKind::Stopped,
2252            data0: 0,
2253            data1: 0,
2254            tick: crate::process::scheduler::ticks(),
2255        });
2256    }
2257}
2258
2259/// Stops or kill silo by id.
2260fn stop_or_kill_silo_by_id(
2261    silo_id: u32,
2262    force_kill: bool,
2263    require_running: bool,
2264) -> Result<Vec<TaskId>, SyscallError> {
2265    let mut mgr = SILO_MANAGER.lock();
2266    let tasks = {
2267        let silo = mgr.get_mut(silo_id)?;
2268        if force_kill {
2269            silo.state = SiloState::Stopped;
2270            let tasks = silo.tasks.clone();
2271            silo.tasks.clear();
2272            tasks
2273        } else {
2274            match silo.state {
2275                SiloState::Running | SiloState::Paused => {
2276                    silo.state = SiloState::Stopping;
2277                    let tasks = silo.tasks.clone();
2278                    silo.tasks.clear();
2279                    tasks
2280                }
2281                _ if require_running => return Err(SyscallError::InvalidArgument),
2282                _ => Vec::new(),
2283            }
2284        }
2285    };
2286
2287    for tid in &tasks {
2288        mgr.unmap_task(*tid);
2289    }
2290    if !force_kill {
2291        if let Ok(silo) = mgr.get_mut(silo_id) {
2292            silo.state = SiloState::Stopped;
2293        }
2294    }
2295    mgr.push_event(SiloEvent {
2296        silo_id: silo_id.into(),
2297        kind: if force_kill {
2298            SiloEventKind::Killed
2299        } else {
2300            SiloEventKind::Stopped
2301        },
2302        data0: 0,
2303        data1: 0,
2304        tick: crate::process::scheduler::ticks(),
2305    });
2306
2307    Ok(tasks)
2308}
2309
2310/// Performs the sys silo stop operation.
2311pub fn sys_silo_stop(handle: u64) -> Result<u64, SyscallError> {
2312    require_silo_admin()?;
2313    let required = CapPermissions {
2314        read: false,
2315        write: false,
2316        execute: true,
2317        grant: false,
2318        revoke: false,
2319    };
2320    let silo_id = resolve_silo_handle(handle, required)?;
2321    let tasks = stop_or_kill_silo_by_id(silo_id, false, true)?;
2322
2323    for tid in tasks {
2324        crate::process::kill_task(tid);
2325    }
2326    Ok(0)
2327}
2328
2329/// Performs the sys silo kill operation.
2330pub fn sys_silo_kill(handle: u64) -> Result<u64, SyscallError> {
2331    require_silo_admin()?;
2332    let required = CapPermissions {
2333        read: false,
2334        write: false,
2335        execute: true,
2336        grant: false,
2337        revoke: false,
2338    };
2339    let silo_id = resolve_silo_handle(handle, required)?;
2340    let tasks = stop_or_kill_silo_by_id(silo_id, true, false)?;
2341
2342    for tid in tasks {
2343        crate::process::kill_task(tid);
2344    }
2345    Ok(0)
2346}
2347
2348/// Performs the silo has capability operation.
2349fn silo_has_capability(task: &Task, cap_id: u64) -> bool {
2350    let caps = unsafe { &*task.process.capabilities.get() };
2351    caps.get(CapId::from_raw(cap_id)).is_some()
2352}
2353
2354/// Returns whether delegated resource.
2355fn is_delegated_resource(rt: ResourceType) -> bool {
2356    matches!(
2357        rt,
2358        ResourceType::Nic
2359            | ResourceType::FileSystem
2360            | ResourceType::Console
2361            | ResourceType::Keyboard
2362            | ResourceType::Volume
2363            | ResourceType::Namespace
2364            | ResourceType::Device
2365            | ResourceType::File
2366            | ResourceType::IoPortRange
2367            | ResourceType::InterruptLine
2368    )
2369}
2370
2371/// Returns whether admin task.
2372fn is_admin_task(task: &Task) -> bool {
2373    let caps = unsafe { &*task.process.capabilities.get() };
2374    let required = CapPermissions {
2375        read: false,
2376        write: false,
2377        execute: false,
2378        grant: true,
2379        revoke: false,
2380    };
2381    caps.has_resource_with_permissions(ResourceType::Silo, SILO_ADMIN_RESOURCE, required)
2382}
2383
2384/// Maps elf start error.
2385fn map_elf_start_error(err: &'static str) -> SyscallError {
2386    if err.contains("allocate")
2387        || err.contains("Out of memory")
2388        || err.contains("No virtual range")
2389        || err.contains("Failed to map page")
2390    {
2391        return SyscallError::OutOfMemory;
2392    }
2393    if err.contains("ELF")
2394        || err.contains("PT_")
2395        || err.contains("entry")
2396        || err.contains("relocation")
2397        || err.contains("Program header")
2398        || err.contains("x86_64")
2399        || err.contains("Unsupported")
2400    {
2401        return SyscallError::ExecFormatError;
2402    }
2403    SyscallError::InvalidArgument
2404}
2405
2406#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2407struct GrantedResource {
2408    resource_type: ResourceType,
2409    resource: usize,
2410    permissions: CapPermissions,
2411}
2412
2413/// Performs the merge permissions operation.
2414fn merge_permissions(a: CapPermissions, b: CapPermissions) -> CapPermissions {
2415    CapPermissions {
2416        read: a.read || b.read,
2417        write: a.write || b.write,
2418        execute: a.execute || b.execute,
2419        grant: a.grant || b.grant,
2420        revoke: a.revoke || b.revoke,
2421    }
2422}
2423
2424/// Performs the permissions subset operation.
2425fn permissions_subset(requested: CapPermissions, allowed: CapPermissions) -> bool {
2426    (!requested.read || allowed.read)
2427        && (!requested.write || allowed.write)
2428        && (!requested.execute || allowed.execute)
2429        && (!requested.grant || allowed.grant)
2430        && (!requested.revoke || allowed.revoke)
2431}
2432
2433/// Performs the add or merge granted resource operation.
2434fn add_or_merge_granted_resource(list: &mut Vec<GrantedResource>, grant: GrantedResource) {
2435    for existing in list.iter_mut() {
2436        if existing.resource_type == grant.resource_type && existing.resource == grant.resource {
2437            existing.permissions = merge_permissions(existing.permissions, grant.permissions);
2438            return;
2439        }
2440    }
2441    list.push(grant);
2442}
2443
2444/// Performs the register current task granted resource operation.
2445pub fn register_current_task_granted_resource(
2446    resource_type: ResourceType,
2447    resource: usize,
2448    permissions: CapPermissions,
2449) -> Result<(), SyscallError> {
2450    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2451    let mut mgr = SILO_MANAGER.lock();
2452    let silo_id = mgr
2453        .silo_for_task(task.id)
2454        .ok_or(SyscallError::PermissionDenied)?;
2455    let silo = mgr.get_mut(silo_id)?;
2456    add_or_merge_granted_resource(
2457        &mut silo.granted_resources,
2458        GrantedResource {
2459            resource_type,
2460            resource,
2461            permissions,
2462        },
2463    );
2464    Ok(())
2465}
2466
2467/// Enforce that the current task may use a delegated capability.
2468pub fn enforce_cap_for_current_task(handle: u64) -> Result<(), SyscallError> {
2469    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2470
2471    // Admin tasks bypass delegated-cap enforcement.
2472    if is_admin_task(&task) {
2473        return Ok(());
2474    }
2475
2476    let caps = unsafe { &*task.process.capabilities.get() };
2477    let cap = caps
2478        .get(CapId::from_raw(handle))
2479        .ok_or(SyscallError::BadHandle)?;
2480
2481    if !is_delegated_resource(cap.resource_type) {
2482        return Ok(());
2483    }
2484
2485    let mgr = SILO_MANAGER.lock();
2486    if let Some(silo_id) = mgr.silo_for_task(task.id) {
2487        if let Ok(silo) = mgr.get(silo_id) {
2488            for grant in &silo.granted_resources {
2489                if grant.resource_type == cap.resource_type && grant.resource == cap.resource {
2490                    if permissions_subset(cap.permissions, grant.permissions) {
2491                        return Ok(());
2492                    }
2493                    return Err(SyscallError::PermissionDenied);
2494                }
2495            }
2496        }
2497    }
2498
2499    Err(SyscallError::PermissionDenied)
2500}
2501
2502/// Performs the enforce registry bind for current task operation.
2503pub fn enforce_registry_bind_for_current_task() -> Result<(), SyscallError> {
2504    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2505    if is_admin_task(&task) {
2506        return Ok(());
2507    }
2508    let mgr = SILO_MANAGER.lock();
2509    let silo_id = mgr
2510        .silo_for_task(task.id)
2511        .ok_or(SyscallError::PermissionDenied)?;
2512    let silo = mgr.get(silo_id)?;
2513    if silo.sandboxed {
2514        return Err(SyscallError::PermissionDenied);
2515    }
2516    if silo.mode.registry.contains(RegistryMode::BIND) {
2517        Ok(())
2518    } else {
2519        Err(SyscallError::PermissionDenied)
2520    }
2521}
2522
2523/// Enforce console access for the current task.
2524///
2525/// Only admin tasks or tasks holding a Console capability with write permission
2526/// can access the kernel console (SYS_WRITE fd=1/2).
2527pub fn enforce_console_access() -> Result<(), SyscallError> {
2528    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2529    if is_admin_task(&task) {
2530        return Ok(());
2531    }
2532    let mgr = SILO_MANAGER.lock();
2533    if let Some(silo_id) = mgr.silo_for_task(task.id) {
2534        if let Ok(silo) = mgr.get(silo_id) {
2535            if matches!(silo.family, StrateFamily::SYS | StrateFamily::NET) {
2536                return Ok(());
2537            }
2538        }
2539    }
2540    drop(mgr);
2541    let caps = unsafe { &*task.process.capabilities.get() };
2542    let required = CapPermissions {
2543        read: false,
2544        write: true,
2545        execute: false,
2546        grant: false,
2547        revoke: false,
2548    };
2549    if caps.has_resource_type_with_permissions(ResourceType::Console, required) {
2550        Ok(())
2551    } else {
2552        Err(SyscallError::PermissionDenied)
2553    }
2554}
2555
2556/// Performs the sys silo event next operation.
2557pub fn sys_silo_event_next(_event_ptr: u64) -> Result<u64, SyscallError> {
2558    require_silo_admin()?;
2559    if _event_ptr == 0 {
2560        return Err(SyscallError::Fault);
2561    }
2562
2563    let event = {
2564        let mut mgr = SILO_MANAGER.lock();
2565        mgr.events.pop_front()
2566    };
2567
2568    let event = match event {
2569        Some(e) => e,
2570        None => return Err(SyscallError::Again),
2571    };
2572
2573    const EVT_SIZE: usize = core::mem::size_of::<SiloEvent>();
2574    let user = UserSliceWrite::new(_event_ptr, EVT_SIZE)?;
2575    let src =
2576        unsafe { core::slice::from_raw_parts(&event as *const SiloEvent as *const u8, EVT_SIZE) };
2577    user.copy_from(src);
2578    Ok(0)
2579}
2580
2581/// Performs the sys silo suspend operation.
2582pub fn sys_silo_suspend(handle: u64) -> Result<u64, SyscallError> {
2583    require_silo_admin()?;
2584    let required = CapPermissions {
2585        read: false,
2586        write: false,
2587        execute: true,
2588        grant: false,
2589        revoke: false,
2590    };
2591    let silo_id = resolve_silo_handle(handle, required)?;
2592
2593    // Lock is released before suspend_task (which takes the scheduler lock)
2594    // to avoid lock-ordering deadlock. Tasks added between the two locks
2595    // won't be suspended — acceptable best-effort trade-off.
2596    let tasks = {
2597        let mut mgr = SILO_MANAGER.lock();
2598        let silo = mgr.get_mut(silo_id)?;
2599        match silo.state {
2600            SiloState::Running => {
2601                silo.state = SiloState::Paused;
2602                silo.tasks.clone()
2603            }
2604            _ => return Err(SyscallError::InvalidArgument),
2605        }
2606    };
2607
2608    for tid in &tasks {
2609        crate::process::suspend_task(*tid);
2610    }
2611
2612    let mut mgr = SILO_MANAGER.lock();
2613    mgr.push_event(SiloEvent {
2614        silo_id: silo_id.into(),
2615        kind: SiloEventKind::Paused,
2616        data0: 0,
2617        data1: 0,
2618        tick: crate::process::scheduler::ticks(),
2619    });
2620
2621    Ok(0)
2622}
2623
2624/// Performs the sys silo resume operation.
2625pub fn sys_silo_resume(handle: u64) -> Result<u64, SyscallError> {
2626    require_silo_admin()?;
2627    let required = CapPermissions {
2628        read: false,
2629        write: false,
2630        execute: true,
2631        grant: false,
2632        revoke: false,
2633    };
2634    let silo_id = resolve_silo_handle(handle, required)?;
2635
2636    let tasks = {
2637        let mut mgr = SILO_MANAGER.lock();
2638        let silo = mgr.get_mut(silo_id)?;
2639        match silo.state {
2640            SiloState::Paused => {
2641                silo.state = SiloState::Running;
2642                silo.tasks.clone()
2643            }
2644            _ => return Err(SyscallError::InvalidArgument),
2645        }
2646    };
2647
2648    // Same lock-ordering pattern as sys_silo_suspend (see note there).
2649    for tid in &tasks {
2650        crate::process::resume_task(*tid);
2651    }
2652
2653    let mut mgr = SILO_MANAGER.lock();
2654    mgr.push_event(SiloEvent {
2655        silo_id: silo_id.into(),
2656        kind: SiloEventKind::Resumed,
2657        data0: 0,
2658        data1: 0,
2659        tick: crate::process::scheduler::ticks(),
2660    });
2661
2662    Ok(0)
2663}
2664
2665// ============================================================================
2666// Kernel-side CLI helpers (no capability gate — shell runs in Ring 0)
2667// ============================================================================
2668
2669pub fn kernel_suspend_silo(selector: &str) -> Result<u32, SyscallError> {
2670    let (silo_id, tasks) = {
2671        let mut mgr = SILO_MANAGER.lock();
2672        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2673        let silo = mgr.get_mut(silo_id)?;
2674        match silo.state {
2675            SiloState::Running => {
2676                silo.state = SiloState::Paused;
2677                let t = silo.tasks.clone();
2678                (silo_id, t)
2679            }
2680            _ => return Err(SyscallError::InvalidArgument),
2681        }
2682    };
2683    for tid in &tasks {
2684        crate::process::suspend_task(*tid);
2685    }
2686    let mut mgr = SILO_MANAGER.lock();
2687    mgr.push_event(SiloEvent {
2688        silo_id: silo_id.into(),
2689        kind: SiloEventKind::Paused,
2690        data0: 0,
2691        data1: 0,
2692        tick: crate::process::scheduler::ticks(),
2693    });
2694    Ok(silo_id)
2695}
2696
2697pub fn kernel_resume_silo(selector: &str) -> Result<u32, SyscallError> {
2698    let (silo_id, tasks) = {
2699        let mut mgr = SILO_MANAGER.lock();
2700        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2701        let silo = mgr.get_mut(silo_id)?;
2702        match silo.state {
2703            SiloState::Paused => {
2704                silo.state = SiloState::Running;
2705                let t = silo.tasks.clone();
2706                (silo_id, t)
2707            }
2708            _ => return Err(SyscallError::InvalidArgument),
2709        }
2710    };
2711    for tid in &tasks {
2712        crate::process::resume_task(*tid);
2713    }
2714    let mut mgr = SILO_MANAGER.lock();
2715    mgr.push_event(SiloEvent {
2716        silo_id: silo_id.into(),
2717        kind: SiloEventKind::Resumed,
2718        data0: 0,
2719        data1: 0,
2720        tick: crate::process::scheduler::ticks(),
2721    });
2722    Ok(silo_id)
2723}
2724
2725pub fn silo_detail_snapshot(selector: &str) -> Result<SiloDetailSnapshot, SyscallError> {
2726    let mgr = SILO_MANAGER.lock();
2727    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2728    let s = mgr.get(silo_id)?;
2729    Ok(SiloDetailSnapshot {
2730        base: SiloSnapshot {
2731            id: s.id.sid,
2732            tier: s.id.tier,
2733            name: s.name.clone(),
2734            strate_label: s.strate_label.clone(),
2735            state: s.state,
2736            task_count: s.tasks.len(),
2737            mem_usage_bytes: s.mem_usage_bytes,
2738            mem_min_bytes: s.config.mem_min,
2739            mem_max_bytes: s.config.mem_max,
2740            mode: s.config.mode,
2741            graphics_flags: s.config.flags
2742                & (SILO_FLAG_GRAPHICS
2743                    | SILO_FLAG_WEBRTC_NATIVE
2744                    | SILO_FLAG_GRAPHICS_READ_ONLY
2745                    | SILO_FLAG_WEBRTC_TURN_FORCE),
2746            graphics_max_sessions: s.config.graphics_max_sessions,
2747            graphics_session_ttl_sec: s.config.graphics_session_ttl_sec,
2748        },
2749        family: s.family,
2750        sandboxed: s.sandboxed,
2751        cpu_shares: s.config.cpu_shares,
2752        cpu_affinity_mask: s.config.cpu_affinity_mask,
2753        max_tasks: s.config.max_tasks,
2754        task_ids: s.tasks.iter().map(|t| t.as_u64()).collect(),
2755        unveil_rules: s
2756            .unveil_rules
2757            .iter()
2758            .map(|r| {
2759                let bits = (if r.rights.read { 4 } else { 0 })
2760                    | (if r.rights.write { 2 } else { 0 })
2761                    | (if r.rights.execute { 1 } else { 0 });
2762                (r.path.clone(), bits)
2763            })
2764            .collect(),
2765        granted_caps_count: s.granted_caps.len(),
2766        cpu_features_required: s.config.cpu_features_required,
2767        cpu_features_allowed: s.config.cpu_features_allowed,
2768        xcr0_mask: s.config.xcr0_mask,
2769        graphics_flags: s.config.flags
2770            & (SILO_FLAG_GRAPHICS
2771                | SILO_FLAG_WEBRTC_NATIVE
2772                | SILO_FLAG_GRAPHICS_READ_ONLY
2773                | SILO_FLAG_WEBRTC_TURN_FORCE),
2774        graphics_max_sessions: s.config.graphics_max_sessions,
2775        graphics_session_ttl_sec: s.config.graphics_session_ttl_sec,
2776    })
2777}
2778
2779pub fn list_events_snapshot() -> Vec<SiloEventSnapshot> {
2780    let mgr = SILO_MANAGER.lock();
2781    mgr.events
2782        .iter()
2783        .map(|e| SiloEventSnapshot {
2784            silo_id: e.silo_id,
2785            kind: e.kind,
2786            data0: e.data0,
2787            data1: e.data1,
2788            tick: e.tick,
2789        })
2790        .collect()
2791}
2792
2793pub fn list_events_for_silo(selector: &str) -> Result<Vec<SiloEventSnapshot>, SyscallError> {
2794    let mgr = SILO_MANAGER.lock();
2795    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2796    let sid64 = silo_id as u64;
2797    Ok(mgr
2798        .events
2799        .iter()
2800        .filter(|e| e.silo_id == sid64)
2801        .map(|e| SiloEventSnapshot {
2802            silo_id: e.silo_id,
2803            kind: e.kind,
2804            data0: e.data0,
2805            data1: e.data1,
2806            tick: e.tick,
2807        })
2808        .collect())
2809}
2810
2811pub fn kernel_pledge_silo(selector: &str, mode_val: u16) -> Result<(u16, u16), SyscallError> {
2812    let new_mode = OctalMode::from_octal(mode_val);
2813    let mut mgr = SILO_MANAGER.lock();
2814    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2815    let silo = mgr.get_mut(silo_id)?;
2816    let old_raw = silo.config.mode;
2817    silo.mode.pledge(new_mode)?;
2818    silo.config.mode = mode_val;
2819    Ok((old_raw, mode_val))
2820}
2821
2822pub fn kernel_unveil_silo(
2823    selector: &str,
2824    path: &str,
2825    rights_str: &str,
2826) -> Result<u32, SyscallError> {
2827    let rights = UnveilRights {
2828        read: rights_str.contains('r'),
2829        write: rights_str.contains('w'),
2830        execute: rights_str.contains('x'),
2831    };
2832    let mut mgr = SILO_MANAGER.lock();
2833    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2834    let silo = mgr.get_mut(silo_id)?;
2835    if let Some(rule) = silo.unveil_rules.iter_mut().find(|r| r.path == path) {
2836        rule.rights = rights;
2837    } else {
2838        silo.unveil_rules.push(UnveilRule {
2839            path: String::from(path),
2840            rights,
2841        });
2842    }
2843    Ok(silo_id)
2844}
2845
2846pub fn kernel_sandbox_silo(selector: &str) -> Result<u32, SyscallError> {
2847    let mut mgr = SILO_MANAGER.lock();
2848    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2849    let silo = mgr.get_mut(silo_id)?;
2850    silo.sandboxed = true;
2851    crate::audit::log(
2852        crate::audit::AuditCategory::Security,
2853        0,
2854        silo_id,
2855        alloc::format!("silo sandboxed"),
2856    );
2857    Ok(silo_id)
2858}
2859
2860/// Get the silo ID for a given task, if any.
2861pub fn task_silo_id(task_id: TaskId) -> Option<u32> {
2862    SILO_MANAGER.lock().silo_for_task(task_id)
2863}
2864
2865/// Append data to a silo's output ring buffer (called from `sys_debug_log`).
2866pub fn silo_output_write(silo_id: u32, data: &[u8]) {
2867    let mut mgr = SILO_MANAGER.lock();
2868    if let Ok(silo) = mgr.get_mut(silo_id) {
2869        let buf = silo
2870            .output_buf
2871            .get_or_insert_with(|| Box::new(SiloOutputBuf::new()));
2872        buf.push(data);
2873    }
2874}
2875
2876/// Drain the output buffer for a silo, returning accumulated bytes.
2877pub fn silo_output_drain(selector: &str) -> Result<Vec<u8>, SyscallError> {
2878    let mut mgr = SILO_MANAGER.lock();
2879    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2880    let silo = mgr.get_mut(silo_id)?;
2881    let mut buf = match silo.output_buf.take() {
2882        Some(buf) => buf,
2883        None => return Ok(Vec::new()),
2884    };
2885    let out = buf.drain();
2886    silo.output_buf = Some(buf);
2887    Ok(out)
2888}
2889
2890/// Dynamically adjust resource quotas for a silo.
2891///
2892/// `key` can be: `mem_max`, `mem_min`, `max_tasks`, `cpu_shares`.
2893/// Values are parsed as u64 (bytes for memory, count otherwise).
2894pub fn kernel_limit_silo(selector: &str, key: &str, value: u64) -> Result<u32, SyscallError> {
2895    let mut mgr = SILO_MANAGER.lock();
2896    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2897    let silo = mgr.get_mut(silo_id)?;
2898    let mut next_mem_min = silo.config.mem_min;
2899    let mut next_mem_max = silo.config.mem_max;
2900    let mut next_max_tasks = silo.config.max_tasks;
2901    let mut next_cpu_shares = silo.config.cpu_shares;
2902    match key {
2903        "mem_max" => next_mem_max = value,
2904        "mem_min" => next_mem_min = value,
2905        "max_tasks" => {
2906            if value > u32::MAX as u64 {
2907                return Err(SyscallError::InvalidArgument);
2908            }
2909            next_max_tasks = value as u32;
2910        }
2911        "cpu_shares" => {
2912            if value > u32::MAX as u64 {
2913                return Err(SyscallError::InvalidArgument);
2914            }
2915            next_cpu_shares = value as u32;
2916        }
2917        _ => return Err(SyscallError::InvalidArgument),
2918    }
2919    if next_mem_max != 0 && next_mem_min > next_mem_max {
2920        return Err(SyscallError::InvalidArgument);
2921    }
2922    silo.config.mem_min = next_mem_min;
2923    silo.config.mem_max = next_mem_max;
2924    silo.config.max_tasks = next_max_tasks;
2925    silo.config.cpu_shares = next_cpu_shares;
2926    crate::audit::log(
2927        crate::audit::AuditCategory::Security,
2928        0,
2929        silo_id,
2930        alloc::format!("silo limit: {}={}", key, value),
2931    );
2932    Ok(silo_id)
2933}
2934
2935// ============================================================================
2936// Fault handling (called from exception handlers)
2937// ============================================================================
2938
2939/// Performs the dump user fault operation.
2940fn dump_user_fault(task_id: TaskId, reason: SiloFaultReason, extra: u64, subcode: u64, rip: u64) {
2941    let task_meta = crate::process::get_task_by_id(task_id).map(|task| {
2942        let state = unsafe { *task.state.get() };
2943        let as_ref = unsafe { &*task.process.address_space.get() };
2944        (
2945            task.pid,
2946            task.tid,
2947            task.name,
2948            state,
2949            as_ref.cr3().as_u64(),
2950            as_ref.is_kernel(),
2951        )
2952    });
2953
2954    if let Some((pid, tid, name, state, as_cr3, as_is_kernel)) = task_meta {
2955        crate::serial_println!(
2956            "\x1b[31m[handle_user_fault]\x1b[0m task={} \x1b[36mpid={}\x1b[0m tid={} name='{}' state={:?} reason={:?} \x1b[35mrip={:#x}\x1b[0m \x1b[35mextra={:#x}\x1b[0m subcode={:#x} as_cr3={:#x} as_kernel={}",
2957            task_id.as_u64(),
2958            pid,
2959            tid,
2960            name,
2961            state,
2962            reason,
2963            rip,
2964            extra,
2965            subcode,
2966            as_cr3,
2967            as_is_kernel
2968        );
2969    } else {
2970        crate::serial_println!(
2971            "\x1b[31m[handle_user_fault]\x1b[0m task={} reason={:?} \x1b[35mrip={:#x}\x1b[0m \x1b[35mextra={:#x}\x1b[0m subcode={:#x} (task metadata unavailable)",
2972            task_id.as_u64(),
2973            reason,
2974            rip,
2975            extra,
2976            subcode
2977        );
2978    }
2979
2980    if reason == SiloFaultReason::PageFault {
2981        let present = (subcode & 0x1) != 0;
2982        let write = (subcode & 0x2) != 0;
2983        let user = (subcode & 0x4) != 0;
2984        let reserved = (subcode & 0x8) != 0;
2985        let instr_fetch = (subcode & 0x10) != 0;
2986        let pkey = (subcode & 0x20) != 0;
2987        let shadow_stack = (subcode & 0x40) != 0;
2988        let sgx = (subcode & 0x8000) != 0;
2989        crate::serial_println!(
2990            "\x1b[31m[handle_user_fault]\x1b[0m \x1b[31mpagefault\x1b[0m \x1b[35maddr={:#x}\x1b[0m \x1b[35mrip={:#x}\x1b[0m ec={:#x} present={} write={} user={} reserved={} ifetch={} pkey={} shadow_stack={} sgx={}",
2991            extra,
2992            rip,
2993            subcode,
2994            present,
2995            write,
2996            user,
2997            reserved,
2998            instr_fetch,
2999            pkey,
3000            shadow_stack,
3001            sgx
3002        );
3003        if user && extra < 0x1000 {
3004            crate::serial_println!(
3005                "\x1b[31m[handle_user_fault]\x1b[0m \x1b[33mhint: low user address fault ({:#x}) -> probable NULL/near-NULL dereference\x1b[0m",
3006                extra
3007            );
3008        }
3009    } else {
3010        crate::serial_println!(
3011            "\x1b[31m[handle_user_fault]\x1b[0m \x1b[31mfault detail\x1b[0m \x1b[35mrip={:#x}\x1b[0m code={:#x}",
3012            rip,
3013            subcode
3014        );
3015    }
3016}
3017
3018/// Handles user fault.
3019pub fn handle_user_fault(
3020    task_id: TaskId,
3021    reason: SiloFaultReason,
3022    extra: u64,
3023    subcode: u64,
3024    rip: u64,
3025) {
3026    // FORCE OUTPUT for user fault - bypasses normal logging mutexes
3027    crate::serial_force_println!(
3028        "\x1b[31;1m[handle_user_fault] CRITICAL FAULT\x1b[0m: tid={} reason={:?} rip={:#x} addr={:#x} err={:#x}",
3029        task_id.as_u64(),
3030        reason,
3031        rip,
3032        extra,
3033        subcode
3034    );
3035
3036    dump_user_fault(task_id, reason, extra, subcode, rip);
3037
3038    // Best-effort: map task to silo, mark crashed, emit event, kill tasks.
3039    let tasks = {
3040        let mut mgr = SILO_MANAGER.lock();
3041        let silo_id = match mgr.silo_for_task(task_id) {
3042            Some(id) => id,
3043            None => {
3044                crate::serial_println!(
3045                    "[handle_user_fault] Non-silo task {} crashed (reason={:?})! Killing it.",
3046                    task_id.as_u64(),
3047                    reason
3048                );
3049                drop(mgr);
3050                crate::process::kill_task(task_id);
3051                return;
3052            }
3053        };
3054        let mut tasks = Vec::new();
3055        {
3056            if let Ok(silo) = mgr.get_mut(silo_id) {
3057                silo.state = SiloState::Crashed;
3058                tasks = silo.tasks.clone();
3059                silo.tasks.clear();
3060                silo.event_seq = silo.event_seq.wrapping_add(1);
3061            }
3062        }
3063        for tid in &tasks {
3064            mgr.unmap_task(*tid);
3065        }
3066        mgr.push_event(SiloEvent {
3067            silo_id: silo_id.into(),
3068            kind: SiloEventKind::Crashed,
3069            data0: pack_fault(reason, subcode),
3070            data1: extra,
3071            tick: crate::process::scheduler::ticks(),
3072        });
3073        tasks
3074    };
3075
3076    for tid in &tasks {
3077        crate::process::kill_task(*tid);
3078    }
3079    if !tasks.contains(&task_id) {
3080        crate::process::kill_task(task_id);
3081    }
3082}