Skip to main content

strat9_kernel/silo/
mod.rs

1//! Silo manager (kernel-side, minimal mechanisms only)
2//!
3//! This module provides the core kernel structures and syscalls
4//! to create and manage silos. Policy lives in userspace (silo admin).
5
6use crate::{
7    capability::{get_capability_manager, CapId, CapPermissions, ResourceType},
8    hardware::storage::{ahci, virtio_block},
9    ipc::port::{self, PortId},
10    memory::{UserSliceRead, UserSliceWrite},
11    process::{current_task_clone, task::Task, TaskId},
12    sync::{FixedQueue, SpinLock},
13    syscall::error::SyscallError,
14};
15use alloc::{
16    boxed::Box,
17    collections::BTreeMap,
18    string::{String, ToString},
19    sync::Arc,
20    vec::Vec,
21};
22use core::sync::atomic::{AtomicU64, Ordering};
23
24// ============================================================================
25// Public ABI structs (repr(C) for syscall boundary)
26// ============================================================================
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
29#[repr(u8)]
30pub enum SiloTier {
31    Critical = 0,
32    System = 1,
33    User = 2,
34}
35
36#[repr(C)]
37#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
38pub struct SiloId {
39    pub sid: u32,
40    pub tier: SiloTier,
41}
42
43impl SiloId {
44    /// Creates a new instance.
45    pub const fn new(sid: u32) -> Self {
46        let tier = match sid {
47            1..=9 => SiloTier::Critical,
48            10..=999 => SiloTier::System,
49            _ => SiloTier::User,
50        };
51        Self { sid, tier }
52    }
53
54    /// Returns this as u64.
55    pub fn as_u64(&self) -> u64 {
56        self.sid as u64
57    }
58}
59
60use bitflags::bitflags;
61
62bitflags! {
63    #[repr(transparent)]
64    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
65    pub struct ControlMode: u8 {
66        const LIST  = 0b100;
67        const STOP  = 0b010;
68        const SPAWN = 0b001;
69    }
70}
71
72bitflags! {
73    #[repr(transparent)]
74    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
75    pub struct HardwareMode: u8 {
76        const INTERRUPT = 0b100;
77        const IO        = 0b010;
78        const DMA       = 0b001;
79    }
80}
81
82bitflags! {
83    #[repr(transparent)]
84    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
85    pub struct RegistryMode: u8 {
86        const LOOKUP = 0b100;
87        const BIND   = 0b010;
88        const PROXY  = 0b001;
89    }
90}
91
92#[repr(C)]
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub struct OctalMode {
95    pub control: ControlMode,
96    pub hardware: HardwareMode,
97    pub registry: RegistryMode,
98}
99
100impl OctalMode {
101    /// Builds this from octal.
102    pub const fn from_octal(val: u16) -> Self {
103        Self {
104            control: ControlMode::from_bits_truncate(((val >> 6) & 0o7) as u8),
105            hardware: HardwareMode::from_bits_truncate(((val >> 3) & 0o7) as u8),
106            registry: RegistryMode::from_bits_truncate((val & 0o7) as u8),
107        }
108    }
109
110    /// Returns whether subset of.
111    pub const fn is_subset_of(&self, other: &OctalMode) -> bool {
112        (self.control.bits() & !other.control.bits() == 0)
113            && (self.hardware.bits() & !other.hardware.bits() == 0)
114            && (self.registry.bits() & !other.registry.bits() == 0)
115    }
116
117    /// Performs the pledge operation.
118    pub fn pledge(&mut self, new_mode: OctalMode) -> Result<(), SyscallError> {
119        if !new_mode.is_subset_of(self) {
120            return Err(SyscallError::PermissionDenied); // Escalation attempt
121        }
122        *self = new_mode;
123        Ok(())
124    }
125}
126
127/// Performs the sys silo pledge operation.
128pub fn sys_silo_pledge(mode_val: u64) -> Result<u64, SyscallError> {
129    let new_mode = OctalMode::from_octal(mode_val as u16);
130    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
131
132    let mut mgr = SILO_MANAGER.lock();
133    if let Some(silo_id) = mgr.silo_for_task(task.id) {
134        if let Ok(silo) = mgr.get_mut(silo_id) {
135            silo.mode.pledge(new_mode)?;
136
137            mgr.push_event(SiloEvent {
138                silo_id: silo_id as u64,
139                kind: SiloEventKind::Started, // Re-using Started as "Updated" for now
140                data0: mode_val,
141                data1: 0,
142                tick: crate::process::scheduler::ticks(),
143            });
144            return Ok(0);
145        }
146    }
147    Err(SyscallError::BadHandle)
148}
149
150/// Performs the sys silo unveil operation.
151pub fn sys_silo_unveil(
152    path_ptr: u64,
153    path_len: u64,
154    rights_bits: u64,
155) -> Result<u64, SyscallError> {
156    const MAX_UNVEIL_PATH: usize = 1024;
157    const MAX_UNVEIL_RULES: usize = 128;
158
159    if path_ptr == 0 {
160        return Err(SyscallError::Fault);
161    }
162    let len = usize::try_from(path_len).map_err(|_| SyscallError::InvalidArgument)?;
163    if len == 0 || len > MAX_UNVEIL_PATH {
164        return Err(SyscallError::InvalidArgument);
165    }
166    let user = UserSliceRead::new(path_ptr, len)?;
167    let raw = user.read_to_vec();
168    let path = core::str::from_utf8(&raw).map_err(|_| SyscallError::InvalidArgument)?;
169    if path.is_empty() || !path.starts_with('/') || path.as_bytes().iter().any(|b| *b == 0) {
170        return Err(SyscallError::InvalidArgument);
171    }
172    let path = normalize_unveil_path(path)?;
173    let rights = UnveilRights::from_bits(rights_bits)?;
174
175    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
176    let mut mgr = SILO_MANAGER.lock();
177    let silo_id = mgr.silo_for_task(task.id).ok_or(SyscallError::BadHandle)?;
178    let silo = mgr.get_mut(silo_id)?;
179
180    if let Some(rule) = silo.unveil_rules.iter_mut().find(|r| r.path == path) {
181        rule.rights = rule.rights.intersect(rights);
182        return Ok(0);
183    }
184    if silo.unveil_rules.len() >= MAX_UNVEIL_RULES {
185        return Err(SyscallError::QueueFull);
186    }
187    silo.unveil_rules.push(UnveilRule { path, rights });
188    Ok(0)
189}
190
191/// Performs the sys silo enter sandbox operation.
192pub fn sys_silo_enter_sandbox() -> Result<u64, SyscallError> {
193    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
194    let mut mgr = SILO_MANAGER.lock();
195    let silo_id = mgr.silo_for_task(task.id).ok_or(SyscallError::BadHandle)?;
196    let silo = mgr.get_mut(silo_id)?;
197    if silo.sandboxed {
198        return Ok(0);
199    }
200    silo.sandboxed = true;
201    silo.mode.registry = RegistryMode::empty();
202    silo.config.mode =
203        ((silo.mode.control.bits() as u16) << 6) | ((silo.mode.hardware.bits() as u16) << 3);
204    Ok(0)
205}
206
207/// Performs the enforce silo may grant operation.
208pub fn enforce_silo_may_grant() -> Result<(), SyscallError> {
209    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
210    if is_admin_task(&task) {
211        return Ok(());
212    }
213    let mgr = SILO_MANAGER.lock();
214    let Some(silo_id) = mgr.silo_for_task(task.id) else {
215        return Ok(());
216    };
217    let silo = mgr.get(silo_id)?;
218    if silo.sandboxed {
219        return Err(SyscallError::PermissionDenied);
220    }
221    Ok(())
222}
223
224/// Performs the normalize unveil path operation.
225fn normalize_unveil_path(path: &str) -> Result<String, SyscallError> {
226    if !path.starts_with('/') {
227        return Err(SyscallError::InvalidArgument);
228    }
229    let mut out = String::new();
230    let mut prev_slash = false;
231    for ch in path.chars() {
232        if ch == '/' {
233            if !prev_slash {
234                out.push('/');
235            }
236            prev_slash = true;
237            continue;
238        }
239        if ch == '\0' {
240            return Err(SyscallError::InvalidArgument);
241        }
242        prev_slash = false;
243        out.push(ch);
244    }
245    while out.len() > 1 && out.ends_with('/') {
246        out.pop();
247    }
248    if out.is_empty() {
249        out.push('/');
250    }
251    Ok(out)
252}
253
254/// Performs the path rule matches operation.
255fn path_rule_matches(rule: &str, path: &str) -> bool {
256    if rule == "/" {
257        return true;
258    }
259    if path == rule {
260        return true;
261    }
262    if !path.starts_with(rule) {
263        return false;
264    }
265    let bytes = path.as_bytes();
266    let idx = rule.len();
267    idx < bytes.len() && bytes[idx] == b'/'
268}
269
270/// Performs the enforce path for current task operation.
271pub fn enforce_path_for_current_task(
272    path: &str,
273    want_read: bool,
274    want_write: bool,
275    want_execute: bool,
276) -> Result<(), SyscallError> {
277    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
278    if is_admin_task(&task) {
279        return Ok(());
280    }
281    let path = normalize_unveil_path(path)?;
282    let mgr = SILO_MANAGER.lock();
283    let Some(silo_id) = mgr.silo_for_task(task.id) else {
284        return Ok(());
285    };
286    let silo = mgr.get(silo_id)?;
287    if silo.sandboxed {
288        return Err(SyscallError::PermissionDenied);
289    }
290    if silo.unveil_rules.is_empty() {
291        return Ok(());
292    }
293    for rule in &silo.unveil_rules {
294        if !path_rule_matches(&rule.path, &path) {
295            continue;
296        }
297        if (!want_read || rule.rights.read)
298            && (!want_write || rule.rights.write)
299            && (!want_execute || rule.rights.execute)
300        {
301            return Ok(());
302        }
303    }
304    Err(SyscallError::PermissionDenied)
305}
306
307#[derive(Debug, Clone, Copy, PartialEq, Eq)]
308#[repr(C)]
309struct UnveilRights {
310    read: bool,
311    write: bool,
312    execute: bool,
313}
314
315impl UnveilRights {
316    /// Builds this from bits.
317    fn from_bits(bits: u64) -> Result<Self, SyscallError> {
318        if bits & !0x7 != 0 {
319            return Err(SyscallError::InvalidArgument);
320        }
321        Ok(Self {
322            read: (bits & 0x1) != 0,
323            write: (bits & 0x2) != 0,
324            execute: (bits & 0x4) != 0,
325        })
326    }
327
328    /// Performs the intersect operation.
329    fn intersect(self, other: Self) -> Self {
330        Self {
331            read: self.read && other.read,
332            write: self.write && other.write,
333            execute: self.execute && other.execute,
334        }
335    }
336}
337
338#[derive(Debug, Clone)]
339struct UnveilRule {
340    path: String,
341    rights: UnveilRights,
342}
343
344#[repr(u8)]
345#[derive(Debug, Clone, Copy, PartialEq, Eq)]
346pub enum StrateFamily {
347    SYS = 0,
348    DRV = 1,
349    FS = 2,
350    NET = 3,
351    WASM = 4,
352    USR = 5,
353}
354
355#[repr(u32)]
356#[derive(Debug, Clone, Copy, PartialEq, Eq)]
357pub enum SiloState {
358    Created = 0,
359    Loading = 1,
360    Ready = 2,
361    Running = 3,
362    Paused = 4,
363    Stopping = 5,
364    Stopped = 6,
365    Crashed = 7,
366    Zombie = 8,
367    Destroyed = 9,
368}
369
370pub const SILO_FLAG_ADMIN: u64 = 1 << 0;
371pub const SILO_FLAG_GRAPHICS: u64 = 1 << 1;
372pub const SILO_FLAG_WEBRTC_NATIVE: u64 = 1 << 2;
373pub const SILO_FLAG_GRAPHICS_READ_ONLY: u64 = 1 << 3;
374pub const SILO_FLAG_WEBRTC_TURN_FORCE: u64 = 1 << 4;
375
376#[repr(C)]
377#[derive(Debug, Clone, Copy)]
378pub struct SiloConfig {
379    pub mem_min: u64,
380    pub mem_max: u64,
381    pub cpu_shares: u32,
382    pub cpu_quota_us: u64,
383    pub cpu_period_us: u64,
384    pub cpu_affinity_mask: u64,
385    pub max_tasks: u32,
386    pub io_bw_read: u64,
387    pub io_bw_write: u64,
388    pub caps_ptr: u64,
389    pub caps_len: u64,
390    pub flags: u64,
391    pub sid: u32,
392    pub mode: u16,
393    pub family: u8,
394    /// CPU features that this silo requires (bitflags from `CpuFeatures`).
395    pub cpu_features_required: u64,
396    /// CPU features that this silo is allowed to use.
397    pub cpu_features_allowed: u64,
398    /// Effective XCR0 mask (computed from allowed features & host capabilities).
399    pub xcr0_mask: u64,
400    /// Maximum concurrent graphics sessions for this silo (0 = disabled).
401    pub graphics_max_sessions: u16,
402    /// Graphics session time-to-live in seconds.
403    pub graphics_session_ttl_sec: u32,
404    /// Reserved for ABI expansion.
405    pub graphics_reserved: u16,
406}
407
408impl Default for SiloConfig {
409    fn default() -> Self {
410        SiloConfig {
411            mem_min: 0,
412            mem_max: 0,
413            cpu_shares: 0,
414            cpu_quota_us: 0,
415            cpu_period_us: 0,
416            cpu_affinity_mask: 0,
417            max_tasks: 0,
418            io_bw_read: 0,
419            io_bw_write: 0,
420            caps_ptr: 0,
421            caps_len: 0,
422            flags: 0,
423            sid: 42,
424            mode: 0,
425            family: StrateFamily::USR as u8,
426            cpu_features_required: 0,
427            cpu_features_allowed: u64::MAX,
428            xcr0_mask: 0,
429            graphics_max_sessions: 0,
430            graphics_session_ttl_sec: 0,
431            graphics_reserved: 0,
432        }
433    }
434}
435
436impl SiloConfig {
437    /// Performs the validate operation.
438    fn validate(&self) -> Result<(), SyscallError> {
439        if self.mem_min > self.mem_max && self.mem_max != 0 {
440            return Err(SyscallError::InvalidArgument);
441        }
442        if self.cpu_quota_us > 0 && self.cpu_period_us == 0 {
443            return Err(SyscallError::InvalidArgument);
444        }
445        if self.caps_len > MAX_SILO_CAPS as u64 {
446            return Err(SyscallError::InvalidArgument);
447        }
448        if self.caps_len > 0 && self.caps_ptr == 0 {
449            return Err(SyscallError::InvalidArgument);
450        }
451        if self.flags & SILO_FLAG_WEBRTC_NATIVE != 0 && self.flags & SILO_FLAG_GRAPHICS == 0 {
452            return Err(SyscallError::InvalidArgument);
453        }
454        if self.flags & SILO_FLAG_GRAPHICS == 0 {
455            if self.graphics_max_sessions != 0 || self.graphics_session_ttl_sec != 0 {
456                return Err(SyscallError::InvalidArgument);
457            }
458        } else {
459            if self.graphics_max_sessions == 0 {
460                return Err(SyscallError::InvalidArgument);
461            }
462            if self.graphics_session_ttl_sec == 0 {
463                return Err(SyscallError::InvalidArgument);
464            }
465        }
466        Ok(())
467    }
468}
469
470#[repr(C, packed)]
471#[derive(Clone, Copy)]
472pub struct Strat9ModuleHeader {
473    pub magic: [u8; 4], // "CMOD"
474    pub version: u16,
475    pub cpu_arch: u8, // 0 = x86_64
476    pub flags: u32,
477    pub code_offset: u64,
478    pub code_size: u64,
479    pub data_offset: u64,
480    pub data_size: u64,
481    pub bss_size: u64,
482    pub entry_point: u64,
483    pub export_table_offset: u64,
484    pub import_table_offset: u64,
485    pub relocation_table_offset: u64,
486    pub key_id: [u8; 8],
487    pub signature: [u8; 64],
488    /// CPU features required by this module (CpuFeatures bitflags). Header v2+.
489    pub cpu_features_required: u64,
490    pub reserved: [u8; 48],
491}
492
493impl core::fmt::Debug for Strat9ModuleHeader {
494    /// Performs the fmt operation.
495    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
496        // SAFETY: read fields via read_unaligned to avoid UB on packed struct.
497        let version = unsafe { core::ptr::addr_of!(self.version).read_unaligned() };
498        let flags = unsafe { core::ptr::addr_of!(self.flags).read_unaligned() };
499        let entry = unsafe { core::ptr::addr_of!(self.entry_point).read_unaligned() };
500        let code_sz = unsafe { core::ptr::addr_of!(self.code_size).read_unaligned() };
501        let data_sz = unsafe { core::ptr::addr_of!(self.data_size).read_unaligned() };
502        f.debug_struct("Strat9ModuleHeader")
503            .field("magic", &self.magic)
504            .field("version", &version)
505            .field("cpu_arch", &self.cpu_arch)
506            .field("flags", &flags)
507            .field("entry_point", &entry)
508            .field("code_size", &code_sz)
509            .field("data_size", &data_sz)
510            .finish_non_exhaustive()
511    }
512}
513
514#[repr(C)]
515#[derive(Debug, Clone, Copy)]
516pub struct ModuleInfo {
517    pub id: u64,
518    pub format: u32, // 0 = raw/ELF, 1 = CMOD
519    pub flags: u32,
520    pub version: u16,
521    pub cpu_arch: u8,
522    pub reserved: u8,
523    pub code_size: u64,
524    pub data_size: u64,
525    pub bss_size: u64,
526    pub entry_point: u64,
527    pub total_size: u64,
528}
529
530#[repr(u32)]
531#[derive(Debug, Clone, Copy, PartialEq, Eq)]
532pub enum SiloEventKind {
533    Started = 1,
534    Stopped = 2,
535    Killed = 3,
536    Crashed = 4,
537    Paused = 5,
538    Resumed = 6,
539}
540
541#[repr(u64)]
542#[derive(Debug, Clone, Copy, PartialEq, Eq)]
543pub enum SiloFaultReason {
544    PageFault = 1,
545    GeneralProtection = 2,
546    InvalidOpcode = 3,
547}
548
549#[repr(C)]
550#[derive(Debug, Clone, Copy)]
551pub struct SiloEvent {
552    pub silo_id: u64,
553    pub kind: SiloEventKind,
554    pub data0: u64,
555    pub data1: u64,
556    pub tick: u64,
557}
558
559// data0 encoding for Crashed:
560// - bits 0..15: fault reason (SiloFaultReason)
561// - bits 16..31: fault subcode (arch-specific)
562// - bits 32..63: reserved
563pub const FAULT_SUBCODE_SHIFT: u64 = 16;
564
565/// Performs the pack fault operation.
566pub fn pack_fault(reason: SiloFaultReason, subcode: u64) -> u64 {
567    (reason as u64) | (subcode << FAULT_SUBCODE_SHIFT)
568}
569
570// ============================================================================
571// Internal kernel structs
572// ============================================================================
573
574#[derive(Debug)]
575struct Silo {
576    id: SiloId,
577    name: String,
578    strate_label: Option<String>,
579    state: SiloState,
580    config: SiloConfig,
581    mode: OctalMode,
582    family: StrateFamily,
583    /// Current memory usage accounted to this silo (bytes).
584    /// This tracks user-space virtual regions reserved/mapped via AddressSpace APIs.
585    mem_usage_bytes: u64,
586    flags: u32,
587    module_id: Option<u64>,
588    tasks: Vec<TaskId>,
589    granted_caps: Vec<u64>,
590    granted_resources: Vec<GrantedResource>,
591    unveil_rules: Vec<UnveilRule>,
592    sandboxed: bool,
593    event_seq: u64,
594    /// Ring buffer capturing debug output for `silo attach`.
595    output_buf: Option<Box<SiloOutputBuf>>,
596}
597
598const SILO_OUTPUT_CAPACITY: usize = 4096;
599
600struct SiloOutputBuf {
601    buf: [u8; SILO_OUTPUT_CAPACITY],
602    head: usize,
603    count: usize,
604}
605
606impl core::fmt::Debug for SiloOutputBuf {
607    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
608        f.debug_struct("SiloOutputBuf")
609            .field("count", &self.count)
610            .finish()
611    }
612}
613
614impl SiloOutputBuf {
615    const fn new() -> Self {
616        Self {
617            buf: [0; SILO_OUTPUT_CAPACITY],
618            head: 0,
619            count: 0,
620        }
621    }
622
623    fn push(&mut self, data: &[u8]) {
624        for &b in data {
625            let tail = (self.head + self.count) % SILO_OUTPUT_CAPACITY;
626            self.buf[tail] = b;
627            if self.count < SILO_OUTPUT_CAPACITY {
628                self.count += 1;
629            } else {
630                self.head = (self.head + 1) % SILO_OUTPUT_CAPACITY;
631            }
632        }
633    }
634
635    fn drain(&mut self) -> Vec<u8> {
636        let mut out = Vec::with_capacity(self.count);
637        for i in 0..self.count {
638            out.push(self.buf[(self.head + i) % SILO_OUTPUT_CAPACITY]);
639        }
640        self.head = 0;
641        self.count = 0;
642        out
643    }
644}
645
646#[derive(Debug, Clone)]
647pub struct SiloSnapshot {
648    pub id: u32,
649    pub tier: SiloTier,
650    pub name: String,
651    pub strate_label: Option<String>,
652    pub state: SiloState,
653    pub task_count: usize,
654    pub mem_usage_bytes: u64,
655    pub mem_min_bytes: u64,
656    pub mem_max_bytes: u64,
657    pub mode: u16,
658    pub graphics_flags: u64,
659    pub graphics_max_sessions: u16,
660    pub graphics_session_ttl_sec: u32,
661}
662
663#[derive(Debug, Clone)]
664pub struct SiloDetailSnapshot {
665    pub base: SiloSnapshot,
666    pub family: StrateFamily,
667    pub sandboxed: bool,
668    pub cpu_shares: u32,
669    pub cpu_affinity_mask: u64,
670    pub max_tasks: u32,
671    pub task_ids: Vec<u64>,
672    pub unveil_rules: Vec<(String, u8)>,
673    pub granted_caps_count: usize,
674    pub cpu_features_required: u64,
675    pub cpu_features_allowed: u64,
676    pub xcr0_mask: u64,
677    pub graphics_flags: u64,
678    pub graphics_max_sessions: u16,
679    pub graphics_session_ttl_sec: u32,
680}
681
682#[derive(Debug, Clone)]
683pub struct SiloEventSnapshot {
684    pub silo_id: u64,
685    pub kind: SiloEventKind,
686    pub data0: u64,
687    pub data1: u64,
688    pub tick: u64,
689}
690
691struct SiloManager {
692    silos: BTreeMap<u32, Box<Silo>>,
693    events: FixedQueue<SiloEvent, SILO_EVENTS_CAPACITY>,
694    task_to_silo: BTreeMap<TaskId, u32>,
695}
696
697const SILO_EVENTS_CAPACITY: usize = 256;
698
699impl SiloManager {
700    /// Creates a new instance.
701    const fn new() -> Self {
702        SiloManager {
703            silos: BTreeMap::new(),
704            events: FixedQueue::new(),
705            task_to_silo: BTreeMap::new(),
706        }
707    }
708
709    /// Creates silo.
710    fn create_silo(&mut self, config: &SiloConfig) -> Result<SiloId, SyscallError> {
711        let id = SiloId::new(config.sid);
712        if self.silos.contains_key(&id.sid) {
713            return Err(SyscallError::AlreadyExists);
714        }
715
716        kernel_check_spawn_invariants(&id, &OctalMode::from_octal(config.mode))?;
717
718        let mut name = String::from("silo-");
719        name.push_str(&id.sid.to_string());
720
721        let family = decode_family(config.family)?;
722
723        let silo = Silo {
724            id,
725            name,
726            strate_label: None,
727            state: SiloState::Created,
728            config: *config,
729            mode: OctalMode::from_octal(config.mode),
730            family,
731            mem_usage_bytes: 0,
732            flags: config.flags as u32,
733            module_id: None,
734            tasks: Vec::new(),
735            granted_caps: Vec::new(),
736            granted_resources: Vec::new(),
737            unveil_rules: Vec::new(),
738            sandboxed: false,
739            event_seq: 0,
740            output_buf: None,
741        };
742
743        self.silos.insert(id.sid, Box::new(silo));
744        Ok(id)
745    }
746
747    /// Returns mut.
748    fn get_mut(&mut self, id: u32) -> Result<&mut Silo, SyscallError> {
749        self.silos
750            .get_mut(&id)
751            .map(Box::as_mut)
752            .ok_or(SyscallError::BadHandle)
753    }
754
755    /// Performs the get operation.
756    fn get(&self, id: u32) -> Result<&Silo, SyscallError> {
757        self.silos
758            .get(&id)
759            .map(Box::as_ref)
760            .ok_or(SyscallError::BadHandle)
761    }
762
763    /// Performs the push event operation.
764    fn push_event(&mut self, ev: SiloEvent) {
765        if self.events.is_full() {
766            let _ = self.events.pop_front();
767        }
768        self.events
769            .push_back(ev)
770            .expect("silo event queue push must succeed after dropping oldest entry");
771    }
772
773    /// Maps task.
774    fn map_task(&mut self, task_id: TaskId, silo_id: u32) {
775        crate::serial_println!(
776            "[trace][silo] map_task enter tid={} sid={} len={}",
777            task_id.as_u64(),
778            silo_id,
779            self.task_to_silo.len()
780        );
781        let existed = self.task_to_silo.contains_key(&task_id);
782        crate::serial_println!(
783            "[trace][silo] map_task before insert tid={} sid={} existed={}",
784            task_id.as_u64(),
785            silo_id,
786            existed
787        );
788        self.task_to_silo.insert(task_id, silo_id);
789        crate::serial_println!(
790            "[trace][silo] map_task after insert tid={} sid={} len={}",
791            task_id.as_u64(),
792            silo_id,
793            self.task_to_silo.len()
794        );
795    }
796
797    /// Unmaps task.
798    fn unmap_task(&mut self, task_id: TaskId) {
799        self.task_to_silo.remove(&task_id);
800    }
801
802    /// Performs the silo for task operation.
803    fn silo_for_task(&self, task_id: TaskId) -> Option<u32> {
804        if let Some(silo_id) = self.task_to_silo.get(&task_id).copied() {
805            return Some(silo_id);
806        }
807
808        // Critical boot fallback: boot-time registration avoids BTreeMap inserts
809        // while holding SILO_MANAGER to eliminate allocator re-entrancy risk on
810        // the fragile early-init path.
811        self.silos
812            .iter()
813            .find_map(|(sid, silo)| silo.tasks.iter().any(|tid| *tid == task_id).then_some(*sid))
814    }
815}
816
817/// Performs the kernel check spawn invariants operation.
818pub fn kernel_check_spawn_invariants(id: &SiloId, mode: &OctalMode) -> Result<(), SyscallError> {
819    if id.tier == SiloTier::User && !mode.hardware.is_empty() {
820        return Err(SyscallError::PermissionDenied);
821    }
822    if id.tier == SiloTier::User && !mode.control.is_empty() {
823        return Err(SyscallError::PermissionDenied);
824    }
825    Ok(())
826}
827
828/// Performs the decode family operation.
829fn decode_family(raw: u8) -> Result<StrateFamily, SyscallError> {
830    match raw {
831        0 => Ok(StrateFamily::SYS),
832        1 => Ok(StrateFamily::DRV),
833        2 => Ok(StrateFamily::FS),
834        3 => Ok(StrateFamily::NET),
835        4 => Ok(StrateFamily::WASM),
836        5 => Ok(StrateFamily::USR),
837        _ => Err(SyscallError::InvalidArgument),
838    }
839}
840
841static SILO_MANAGER: SpinLock<SiloManager> = SpinLock::new(SiloManager::new());
842static BOOT_REG_IN_PROGRESS: core::sync::atomic::AtomicBool =
843    core::sync::atomic::AtomicBool::new(false);
844
845const SILO_ADMIN_RESOURCE: usize = 0;
846const MAX_SILO_CAPS: usize = 64;
847const MAX_MODULE_BLOB_LEN: usize = 64 * 1024 * 1024; // 64 MiB for large preloaded user modules such as strate-wasm
848const IPC_STREAM_DATA: u32 = 0xFFFF_FFFE;
849const IPC_STREAM_EOF: u32 = 0xFFFF_FFFF;
850const MODULE_FLAG_SIGNED: u32 = 1 << 0;
851const MODULE_FLAG_KERNEL: u32 = 1 << 1;
852
853/// Reads user config.
854fn read_user_config(ptr: u64) -> Result<SiloConfig, SyscallError> {
855    if ptr == 0 {
856        return Err(SyscallError::Fault);
857    }
858    const SIZE: usize = core::mem::size_of::<SiloConfig>();
859    let user = UserSliceRead::new(ptr, SIZE)?;
860    let mut buf = [0u8; SIZE];
861    user.copy_to(&mut buf);
862    // SAFETY: We copied the exact bytes for SiloConfig from userspace.
863    let config = unsafe { core::ptr::read_unaligned(buf.as_ptr() as *const SiloConfig) };
864    Ok(config)
865}
866
867/// Reads caps list.
868fn read_caps_list(ptr: u64, len: u64) -> Result<Vec<u64>, SyscallError> {
869    if len == 0 {
870        return Ok(Vec::new());
871    }
872    if len > MAX_SILO_CAPS as u64 {
873        return Err(SyscallError::InvalidArgument);
874    }
875    let byte_len = len as usize * core::mem::size_of::<u64>();
876    let user = UserSliceRead::new(ptr, byte_len)?;
877    let bytes = user.read_to_vec();
878    let mut out = Vec::with_capacity(len as usize);
879    for chunk in bytes.chunks_exact(8) {
880        let mut arr = [0u8; 8];
881        arr.copy_from_slice(chunk);
882        out.push(u64::from_le_bytes(arr));
883    }
884    Ok(out)
885}
886
887/// Reads module stream from port.
888fn read_module_stream_from_port(
889    port: &alloc::sync::Arc<port::Port>,
890) -> Result<Vec<u8>, SyscallError> {
891    let mut out = Vec::new();
892    loop {
893        let msg = port.recv().map_err(|_| SyscallError::BadHandle)?;
894
895        if msg.msg_type == IPC_STREAM_EOF {
896            break;
897        }
898        if msg.msg_type != IPC_STREAM_DATA {
899            return Err(SyscallError::InvalidArgument);
900        }
901        if msg.flags != 0 {
902            return Err(SyscallError::InvalidArgument);
903        }
904
905        let chunk_len = u16::from_le_bytes([msg.payload[0], msg.payload[1]]) as usize;
906        if chunk_len == 0 {
907            break;
908        }
909        if chunk_len > msg.payload.len() - 2 {
910            return Err(SyscallError::InvalidArgument);
911        }
912        if out.len().saturating_add(chunk_len) > MAX_MODULE_BLOB_LEN {
913            return Err(SyscallError::InvalidArgument);
914        }
915
916        out.extend_from_slice(&msg.payload[2..2 + chunk_len]);
917    }
918    Ok(out)
919}
920
921/// Parses module header.
922fn parse_module_header(data: &[u8]) -> Result<Option<Strat9ModuleHeader>, SyscallError> {
923    const MAGIC: [u8; 4] = *b"CMOD";
924    let header_size = core::mem::size_of::<Strat9ModuleHeader>();
925
926    if data.len() < MAGIC.len() {
927        return Ok(None);
928    }
929    if data[0..4] != MAGIC {
930        return Ok(None);
931    }
932    if data.len() < header_size {
933        return Err(SyscallError::InvalidArgument);
934    }
935
936    // SAFETY: We checked length, and we read unaligned from a byte slice.
937    let header = unsafe { core::ptr::read_unaligned(data.as_ptr() as *const Strat9ModuleHeader) };
938
939    if header.version != 1 && header.version != 2 {
940        return Err(SyscallError::InvalidArgument);
941    }
942    if header.cpu_arch != 0 {
943        return Err(SyscallError::InvalidArgument);
944    }
945
946    let version = unsafe { core::ptr::addr_of!(header.version).read_unaligned() };
947    let req = if version >= 2 {
948        unsafe { core::ptr::addr_of!(header.cpu_features_required).read_unaligned() }
949    } else {
950        0
951    };
952    if req != 0 {
953        let host = crate::arch::x86_64::cpuid::host();
954        let required = crate::arch::x86_64::cpuid::CpuFeatures::from_bits_truncate(req);
955        if !host.features.contains(required) {
956            log::warn!(
957                "[cmod] module requires CPU features {:#x} but host has {:#x}",
958                req,
959                host.features.bits()
960            );
961            return Err(SyscallError::InvalidArgument);
962        }
963    }
964
965    let data_len = data.len() as u64;
966    let code_end = header
967        .code_offset
968        .checked_add(header.code_size)
969        .ok_or(SyscallError::InvalidArgument)?;
970    let data_end = header
971        .data_offset
972        .checked_add(header.data_size)
973        .ok_or(SyscallError::InvalidArgument)?;
974    if code_end > data_len || data_end > data_len {
975        return Err(SyscallError::InvalidArgument);
976    }
977    if header.entry_point >= header.code_size && header.code_size != 0 {
978        return Err(SyscallError::InvalidArgument);
979    }
980    if header.export_table_offset > data_len
981        || header.import_table_offset > data_len
982        || header.relocation_table_offset > data_len
983    {
984        return Err(SyscallError::InvalidArgument);
985    }
986
987    // Segmentation rules: code/data must not overlap and must be page-aligned.
988    const PAGE_SIZE: u64 = 4096;
989    if header.code_size > 0 {
990        if header.code_offset % PAGE_SIZE != 0 || header.code_size % PAGE_SIZE != 0 {
991            return Err(SyscallError::InvalidArgument);
992        }
993    }
994    if header.data_size > 0 {
995        if header.data_offset % PAGE_SIZE != 0 || header.data_size % PAGE_SIZE != 0 {
996            return Err(SyscallError::InvalidArgument);
997        }
998    }
999    let code_range = header.code_offset..code_end;
1000    let data_range = header.data_offset..data_end;
1001    if code_range.start < data_range.end && data_range.start < code_range.end {
1002        return Err(SyscallError::InvalidArgument);
1003    }
1004
1005    // Flags/signature checks (verification is TODO).
1006    if header.flags & MODULE_FLAG_SIGNED != 0 {
1007        let sig_nonzero = header.signature.iter().any(|b| *b != 0);
1008        let key_nonzero = header.key_id.iter().any(|b| *b != 0);
1009        if !sig_nonzero || !key_nonzero {
1010            return Err(SyscallError::PermissionDenied);
1011        }
1012    }
1013    if header.flags & MODULE_FLAG_KERNEL != 0 {
1014        // Kernel modules are allowed only when loaded by admin (already enforced).
1015    }
1016
1017    Ok(Some(header))
1018}
1019
1020/// Reads u32 le.
1021fn read_u32_le(data: &[u8], offset: usize) -> Result<u32, SyscallError> {
1022    if offset + 4 > data.len() {
1023        return Err(SyscallError::InvalidArgument);
1024    }
1025    let mut buf = [0u8; 4];
1026    buf.copy_from_slice(&data[offset..offset + 4]);
1027    Ok(u32::from_le_bytes(buf))
1028}
1029
1030/// Reads u64 le.
1031fn read_u64_le(data: &[u8], offset: usize) -> Result<u64, SyscallError> {
1032    if offset + 8 > data.len() {
1033        return Err(SyscallError::InvalidArgument);
1034    }
1035    let mut buf = [0u8; 8];
1036    buf.copy_from_slice(&data[offset..offset + 8]);
1037    Ok(u64::from_le_bytes(buf))
1038}
1039
1040/// Performs the resolve export offset operation.
1041fn resolve_export_offset(module: &ModuleImage, ordinal: u64) -> Result<u64, SyscallError> {
1042    let header = module.header.ok_or(SyscallError::InvalidArgument)?;
1043    if header.export_table_offset == 0 {
1044        return Err(SyscallError::NotFound);
1045    }
1046    let table_off = header.export_table_offset as usize;
1047    let count = read_u32_le(module.data.as_slice(), table_off)? as u64;
1048    if ordinal >= count {
1049        return Err(SyscallError::InvalidArgument);
1050    }
1051    // Layout: u32 count + u32 reserved, then u64 entries.
1052    let entries_off = table_off + 8;
1053    let entry_off = entries_off + (ordinal as usize * 8);
1054    let rva = read_u64_le(module.data.as_slice(), entry_off)?;
1055    Ok(rva)
1056}
1057
1058/// Performs the require silo admin operation.
1059pub fn require_silo_admin() -> Result<(), SyscallError> {
1060    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1061    // SAFETY: Current task owns its capability table during syscall execution.
1062    let caps = unsafe { &*task.process.capabilities.get() };
1063    let required = CapPermissions {
1064        read: false,
1065        write: false,
1066        execute: false,
1067        grant: true,
1068        revoke: false,
1069    };
1070
1071    if caps.has_resource_with_permissions(ResourceType::Silo, SILO_ADMIN_RESOURCE, required) {
1072        Ok(())
1073    } else {
1074        Err(SyscallError::PermissionDenied)
1075    }
1076}
1077
1078/// Performs the resolve silo handle operation.
1079fn resolve_silo_handle(handle: u64, required: CapPermissions) -> Result<u32, SyscallError> {
1080    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1081    let caps = unsafe { &*task.process.capabilities.get() };
1082    let cap_id = CapId::from_raw(handle);
1083    let cap = caps.get(cap_id).ok_or(SyscallError::BadHandle)?;
1084
1085    // Ensure this is a Silo capability and permissions are sufficient.
1086    if cap.resource_type != ResourceType::Silo {
1087        return Err(SyscallError::BadHandle);
1088    }
1089
1090    if (!required.read || cap.permissions.read)
1091        && (!required.write || cap.permissions.write)
1092        && (!required.execute || cap.permissions.execute)
1093        && (!required.grant || cap.permissions.grant)
1094        && (!required.revoke || cap.permissions.revoke)
1095    {
1096        Ok(cap.resource as u32)
1097    } else {
1098        Err(SyscallError::PermissionDenied)
1099    }
1100}
1101
1102// ============================================================================
1103// Module registry (temporary blob store for .cmod/ELF)
1104// ============================================================================
1105
1106#[derive(Debug, Clone)]
1107enum ModuleData {
1108    Owned(Arc<[u8]>),
1109    Static(&'static [u8]),
1110}
1111
1112impl ModuleData {
1113    fn as_slice(&self) -> &[u8] {
1114        match self {
1115            ModuleData::Owned(data) => data,
1116            ModuleData::Static(data) => data,
1117        }
1118    }
1119
1120    fn len(&self) -> usize {
1121        self.as_slice().len()
1122    }
1123}
1124
1125#[derive(Debug)]
1126struct ModuleImage {
1127    id: u64,
1128    data: ModuleData,
1129    header: Option<Strat9ModuleHeader>,
1130}
1131
1132struct ModuleRegistry {
1133    modules: BTreeMap<u64, ModuleImage>,
1134}
1135
1136impl ModuleRegistry {
1137    /// Creates a new instance.
1138    const fn new() -> Self {
1139        ModuleRegistry {
1140            modules: BTreeMap::new(),
1141        }
1142    }
1143
1144    /// Performs the register operation.
1145    fn register(&mut self, data: Vec<u8>) -> Result<u64, SyscallError> {
1146        let header = parse_module_header(&data)?;
1147        static NEXT_MOD: AtomicU64 = AtomicU64::new(1);
1148        let id = NEXT_MOD.fetch_add(1, Ordering::Relaxed);
1149        self.modules.insert(
1150            id,
1151            ModuleImage {
1152                id,
1153                data: ModuleData::Owned(Arc::from(data.into_boxed_slice())),
1154                header,
1155            },
1156        );
1157        Ok(id)
1158    }
1159
1160    fn register_static(&mut self, data: &'static [u8]) -> Result<u64, SyscallError> {
1161        let header = parse_module_header(data)?;
1162        static NEXT_MOD: AtomicU64 = AtomicU64::new(1);
1163        let id = NEXT_MOD.fetch_add(1, Ordering::Relaxed);
1164        self.modules.insert(
1165            id,
1166            ModuleImage {
1167                id,
1168                data: ModuleData::Static(data),
1169                header,
1170            },
1171        );
1172        Ok(id)
1173    }
1174
1175    /// Performs the get operation.
1176    fn get(&self, id: u64) -> Option<&ModuleImage> {
1177        self.modules.get(&id)
1178    }
1179
1180    /// Performs the remove operation.
1181    fn remove(&mut self, id: u64) -> Option<ModuleImage> {
1182        self.modules.remove(&id)
1183    }
1184}
1185
1186static MODULE_REGISTRY: SpinLock<ModuleRegistry> = SpinLock::new(ModuleRegistry::new());
1187
1188/// Performs the charge task silo memory operation.
1189fn charge_task_silo_memory(task_id: TaskId, bytes: u64) -> Result<(), SyscallError> {
1190    if bytes == 0 {
1191        return Ok(());
1192    }
1193    let mut mgr = SILO_MANAGER.lock();
1194    let Some(silo_id) = mgr.silo_for_task(task_id) else {
1195        return Ok(());
1196    };
1197    let silo = mgr.get_mut(silo_id)?;
1198    let next = silo
1199        .mem_usage_bytes
1200        .checked_add(bytes)
1201        .ok_or(SyscallError::OutOfMemory)?;
1202    if silo.config.mem_max != 0 && next > silo.config.mem_max {
1203        return Err(SyscallError::OutOfMemory);
1204    }
1205    silo.mem_usage_bytes = next;
1206    Ok(())
1207}
1208
1209/// Performs the release task silo memory operation.
1210fn release_task_silo_memory(task_id: TaskId, bytes: u64) {
1211    if bytes == 0 {
1212        return;
1213    }
1214    let mut mgr = SILO_MANAGER.lock();
1215    let Some(silo_id) = mgr.silo_for_task(task_id) else {
1216        return;
1217    };
1218    if let Ok(silo) = mgr.get_mut(silo_id) {
1219        silo.mem_usage_bytes = silo.mem_usage_bytes.saturating_sub(bytes);
1220    }
1221}
1222
1223/// Charge memory usage against the current task's silo quota (if any).
1224///
1225/// Returns `OutOfMemory` when charging would exceed `SiloConfig.mem_max`.
1226/// Tasks that are not part of a silo are ignored.
1227pub fn charge_current_task_memory(bytes: u64) -> Result<(), SyscallError> {
1228    let Some(task) = crate::process::scheduler::current_task_clone_try() else {
1229        // Boot-time/kernel contexts may have no current task.
1230        // Also avoid deadlock when scheduler lock is already held in cleanup paths.
1231        return Ok(());
1232    };
1233    charge_task_silo_memory(task.id, bytes)
1234}
1235
1236/// Release memory usage from the current task's silo quota (if any).
1237///
1238/// Tasks that are not part of a silo are ignored.
1239pub fn release_current_task_memory(bytes: u64) {
1240    if let Some(task) = crate::process::scheduler::current_task_clone_try() {
1241        release_task_silo_memory(task.id, bytes);
1242    }
1243}
1244
1245/// Performs the extract strate label operation.
1246fn extract_strate_label(path: &str) -> Option<String> {
1247    let prefix = "/srv/strate-fs-";
1248    let rest = path.strip_prefix(prefix)?;
1249    let mut parts = rest.split('/').filter(|p| !p.is_empty());
1250    let _strate_type = parts.next()?;
1251    let label = parts.next()?;
1252    if label.is_empty() || parts.next().is_some() {
1253        return None;
1254    }
1255    Some(String::from(label))
1256}
1257
1258/// Performs the sanitize label operation.
1259fn sanitize_label(raw: &str) -> String {
1260    let mut out = String::new();
1261    for b in raw.bytes().take(31) {
1262        let ok = (b as char).is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'.';
1263        out.push(if ok { b as char } else { '_' });
1264    }
1265    if out.is_empty() {
1266        String::from("default")
1267    } else {
1268        out
1269    }
1270}
1271
1272/// Returns whether valid label.
1273fn is_valid_label(raw: &str) -> bool {
1274    if raw.is_empty() || raw.len() > 31 {
1275        return false;
1276    }
1277    raw.bytes()
1278        .all(|b| (b as char).is_ascii_alphanumeric() || b == b'-' || b == b'_' || b == b'.')
1279}
1280
1281/// Sets current silo label from path.
1282pub fn set_current_silo_label_from_path(path: &str) -> Result<(), SyscallError> {
1283    let Some(label) = extract_strate_label(path) else {
1284        return Ok(());
1285    };
1286    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1287    let mut mgr = SILO_MANAGER.lock();
1288    let Some(silo_id) = mgr.silo_for_task(task.id) else {
1289        return Ok(());
1290    };
1291    let silo = mgr.get_mut(silo_id)?;
1292    // Do not overwrite a label that was already set (e.g. by kernel_spawn_strate).
1293    // The spawner's requested label takes precedence over the default path-derived one.
1294    if silo.strate_label.is_none() {
1295        silo.strate_label = Some(label);
1296    }
1297    Ok(())
1298}
1299
1300/// Performs the current task silo label operation.
1301pub fn current_task_silo_label() -> Option<String> {
1302    let task = current_task_clone()?;
1303    let mgr = SILO_MANAGER.lock();
1304    let silo_id = mgr.silo_for_task(task.id)?;
1305    let silo = mgr.get(silo_id).ok()?;
1306    silo.strate_label.clone()
1307}
1308
1309/// Performs the list silos snapshot operation.
1310pub fn list_silos_snapshot() -> Vec<SiloSnapshot> {
1311    let mgr = SILO_MANAGER.lock();
1312    mgr.silos
1313        .values()
1314        .map(|s| SiloSnapshot {
1315            id: s.id.sid,
1316            tier: s.id.tier,
1317            name: s.name.clone(),
1318            strate_label: s.strate_label.clone(),
1319            state: s.state,
1320            task_count: s.tasks.len(),
1321            mem_usage_bytes: s.mem_usage_bytes,
1322            mem_min_bytes: s.config.mem_min,
1323            mem_max_bytes: s.config.mem_max,
1324            mode: s.config.mode,
1325            graphics_flags: s.config.flags
1326                & (SILO_FLAG_GRAPHICS
1327                    | SILO_FLAG_WEBRTC_NATIVE
1328                    | SILO_FLAG_GRAPHICS_READ_ONLY
1329                    | SILO_FLAG_WEBRTC_TURN_FORCE),
1330            graphics_max_sessions: s.config.graphics_max_sessions,
1331            graphics_session_ttl_sec: s.config.graphics_session_ttl_sec,
1332        })
1333        .collect()
1334}
1335
1336/// Return silo identity + memory accounting for a task, if the task belongs to a silo.
1337///
1338/// Tuple layout:
1339/// - silo id (u32)
1340/// - optional label
1341/// - current usage bytes
1342/// - configured minimum bytes
1343/// - configured maximum bytes (0 = unlimited)
1344/// Best-effort, non-blocking silo lookup for allocator-internal accounting.
1345///
1346/// Uses `try_lock` so that callers running under IRQs-disabled conditions
1347/// (e.g. inside vmalloc) do not deadlock when SILO_MANAGER is already held
1348/// by an outer call on the same CPU.  Returns `None` if the lock is contended
1349/// or if the task is not registered in any silo.
1350pub fn try_silo_id_for_task(task_id: TaskId) -> Option<u32> {
1351    SILO_MANAGER.try_lock()?.silo_for_task(task_id)
1352}
1353
1354pub fn silo_info_for_task(task_id: TaskId) -> Option<(u32, Option<String>, u64, u64, u64)> {
1355    let mgr = SILO_MANAGER.lock();
1356    let silo_id = mgr.silo_for_task(task_id)?;
1357    let silo = mgr.get(silo_id).ok()?;
1358    Some((
1359        silo.id.sid,
1360        silo.strate_label.clone(),
1361        silo.mem_usage_bytes,
1362        silo.config.mem_min,
1363        silo.config.mem_max,
1364    ))
1365}
1366
1367/// Performs the resolve volume resource from dev path operation.
1368fn resolve_volume_resource_from_dev_path(dev_path: &str) -> Result<usize, SyscallError> {
1369    match dev_path {
1370        "/dev/sda" => ahci::get_device()
1371            .map(|d| d as *const _ as usize)
1372            .ok_or(SyscallError::NotFound),
1373        "/dev/vda" => virtio_block::get_device()
1374            .map(|d| d as *const _ as usize)
1375            .ok_or(SyscallError::NotFound),
1376        _ => Err(SyscallError::NotFound),
1377    }
1378}
1379
1380/// Compute the effective XCR0 mask for a silo from its allowed CPU features.
1381fn compute_silo_xcr0(config: &SiloConfig) -> u64 {
1382    use crate::arch::x86_64::cpuid::{xcr0_for_features, CpuFeatures};
1383    let allowed = CpuFeatures::from_bits_truncate(config.cpu_features_allowed);
1384    xcr0_for_features(allowed)
1385}
1386
1387/// Performs the kernel spawn strate operation.
1388pub fn kernel_spawn_strate(
1389    elf_data: &[u8],
1390    label: Option<&str>,
1391    dev_path: Option<&str>,
1392) -> Result<u32, SyscallError> {
1393    let module_id = {
1394        let mut registry = MODULE_REGISTRY.lock();
1395        registry.register(elf_data.to_vec())?
1396    };
1397
1398    let silo_id = {
1399        let mut mgr = SILO_MANAGER.lock();
1400        // For kernel_spawn_strate (manual command), we auto-assign SID > 1000.
1401        // In a production system, this would follow the "42" rule from Init.
1402        let mut sid = 1000u32;
1403        while mgr.silos.contains_key(&sid) {
1404            sid = sid.checked_add(1).ok_or(SyscallError::OutOfMemory)?;
1405        }
1406
1407        let id = SiloId::new(sid);
1408        let requested_label = label
1409            .map(sanitize_label)
1410            .unwrap_or_else(|| alloc::format!("inst-{}", id.sid));
1411
1412        if mgr
1413            .silos
1414            .values()
1415            .any(|s| s.strate_label.as_deref() == Some(requested_label.as_str()))
1416        {
1417            return Err(SyscallError::AlreadyExists);
1418        }
1419
1420        let mut cfg = SiloConfig {
1421            sid: id.sid,
1422            mode: 0o000,
1423            family: StrateFamily::USR as u8,
1424            ..SiloConfig::default()
1425        };
1426        cfg.xcr0_mask = compute_silo_xcr0(&cfg);
1427
1428        let silo = Silo {
1429            id,
1430            name: alloc::format!("silo-{}", id.sid),
1431            strate_label: Some(requested_label),
1432            state: SiloState::Ready,
1433            config: cfg,
1434            mode: OctalMode::from_octal(0),
1435            family: StrateFamily::USR,
1436            mem_usage_bytes: 0,
1437            flags: 0,
1438            module_id: Some(module_id),
1439            tasks: Vec::new(),
1440            granted_caps: Vec::new(),
1441            granted_resources: Vec::new(),
1442            unveil_rules: Vec::new(),
1443            sandboxed: false,
1444            event_seq: 0,
1445            output_buf: None,
1446        };
1447
1448        mgr.silos.insert(id.sid, Box::new(silo));
1449        id.sid
1450    };
1451
1452    let module_data = {
1453        let registry = MODULE_REGISTRY.lock();
1454        let module = registry.get(module_id).ok_or(SyscallError::BadHandle)?;
1455        module.data.clone()
1456    };
1457
1458    let mut seed_caps = Vec::new();
1459    if let Some(path) = dev_path {
1460        let resource = resolve_volume_resource_from_dev_path(path)?;
1461        let cap = get_capability_manager().create_capability(
1462            ResourceType::Volume,
1463            resource,
1464            CapPermissions {
1465                read: true,
1466                write: true,
1467                execute: false,
1468                grant: true,
1469                revoke: true,
1470            },
1471        );
1472        seed_caps.push(cap);
1473    }
1474
1475    let display = {
1476        let mgr = SILO_MANAGER.lock();
1477        let silo = mgr.get(silo_id)?;
1478        silo.strate_label
1479            .clone()
1480            .unwrap_or_else(|| alloc::format!("silo-{}", silo.id.sid))
1481    };
1482    let task_name: &'static str =
1483        Box::leak(alloc::format!("silo-{}/strate-admin-{}", silo_id, display).into_boxed_str());
1484    let task =
1485        crate::process::elf::load_elf_task_with_caps(module_data.as_slice(), task_name, &seed_caps)
1486            .map_err(|_| SyscallError::InvalidArgument)?;
1487    let task_id = task.id;
1488
1489    let mut mgr = SILO_MANAGER.lock();
1490    {
1491        let silo = mgr.get_mut(silo_id)?;
1492        silo.tasks.push(task_id);
1493        silo.state = SiloState::Running;
1494        let fpu_xcr0 = unsafe { (*task.fpu_state.get()).xcr0_mask };
1495        let effective_xcr0 = (silo.config.xcr0_mask & fpu_xcr0).max(0x3);
1496        task.xcr0_mask
1497            .store(effective_xcr0, core::sync::atomic::Ordering::Relaxed);
1498    }
1499    mgr.map_task(task_id, silo_id);
1500    mgr.push_event(SiloEvent {
1501        silo_id: silo_id.into(),
1502        kind: SiloEventKind::Started,
1503        data0: 0,
1504        data1: 0,
1505        tick: crate::process::scheduler::ticks(),
1506    });
1507    drop(mgr);
1508    crate::process::add_task(task);
1509    Ok(silo_id)
1510}
1511
1512/// Performs the resolve selector to silo id operation.
1513fn resolve_selector_to_silo_id(selector: &str, mgr: &SiloManager) -> Result<u32, SyscallError> {
1514    if let Ok(id) = selector.parse::<u32>() {
1515        if mgr.silos.contains_key(&id) {
1516            return Ok(id);
1517        }
1518        return Err(SyscallError::NotFound);
1519    }
1520    let mut found: Option<u32> = None;
1521    for s in mgr.silos.values() {
1522        if s.strate_label.as_deref() == Some(selector) {
1523            if found.is_some() {
1524                return Err(SyscallError::InvalidArgument);
1525            }
1526            found = Some(s.id.sid);
1527        }
1528    }
1529    found.ok_or(SyscallError::NotFound)
1530}
1531
1532/// Performs the kernel stop silo operation.
1533pub fn kernel_stop_silo(selector: &str, force_kill: bool) -> Result<u32, SyscallError> {
1534    let (silo_id, tasks) = {
1535        let mut mgr = SILO_MANAGER.lock();
1536        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
1537        let mut tasks = Vec::new();
1538        {
1539            let silo = mgr.get_mut(silo_id)?;
1540            match silo.state {
1541                SiloState::Running | SiloState::Paused => {
1542                    tasks = silo.tasks.clone();
1543                    silo.tasks.clear();
1544                    silo.state = if force_kill {
1545                        SiloState::Stopped
1546                    } else {
1547                        SiloState::Stopping
1548                    };
1549                }
1550                SiloState::Stopping => {
1551                    if force_kill {
1552                        silo.state = SiloState::Stopped;
1553                    }
1554                }
1555                SiloState::Stopped | SiloState::Created | SiloState::Ready => {}
1556                _ => return Err(SyscallError::InvalidArgument),
1557            }
1558        }
1559        for tid in &tasks {
1560            mgr.unmap_task(*tid);
1561        }
1562        mgr.push_event(SiloEvent {
1563            silo_id: silo_id as u64,
1564            kind: if force_kill {
1565                SiloEventKind::Killed
1566            } else {
1567                SiloEventKind::Stopped
1568            },
1569            data0: 0,
1570            data1: 0,
1571            tick: crate::process::scheduler::ticks(),
1572        });
1573        (silo_id, tasks)
1574    };
1575    for tid in tasks {
1576        crate::process::kill_task(tid);
1577    }
1578    Ok(silo_id)
1579}
1580
1581/// Performs the kernel start silo operation.
1582pub fn kernel_start_silo(selector: &str) -> Result<u32, SyscallError> {
1583    let silo_id = {
1584        let mgr = SILO_MANAGER.lock();
1585        resolve_selector_to_silo_id(selector, &mgr)?
1586    };
1587    let _ = start_silo_by_id(silo_id)?;
1588    Ok(silo_id)
1589}
1590
1591/// Performs the kernel destroy silo operation.
1592pub fn kernel_destroy_silo(selector: &str) -> Result<u32, SyscallError> {
1593    let (silo_id, module_id) = {
1594        let mut mgr = SILO_MANAGER.lock();
1595        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
1596        let module_id = {
1597            let silo = mgr.get(silo_id)?;
1598            if !silo.tasks.is_empty() {
1599                return Err(SyscallError::InvalidArgument);
1600            }
1601            match silo.state {
1602                SiloState::Stopped | SiloState::Created | SiloState::Ready | SiloState::Crashed => {
1603                }
1604                _ => return Err(SyscallError::InvalidArgument),
1605            }
1606            silo.module_id
1607        };
1608        let _ = mgr.silos.remove(&silo_id);
1609        (silo_id, module_id)
1610    };
1611    if let Some(mid) = module_id {
1612        let mut reg = MODULE_REGISTRY.lock();
1613        let _ = reg.remove(mid);
1614    }
1615    Ok(silo_id)
1616}
1617
1618/// Performs the kernel rename silo label operation.
1619pub fn kernel_rename_silo_label(selector: &str, new_label: &str) -> Result<u32, SyscallError> {
1620    if !is_valid_label(new_label) {
1621        return Err(SyscallError::InvalidArgument);
1622    }
1623    let mut mgr = SILO_MANAGER.lock();
1624    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
1625    if mgr
1626        .silos
1627        .values()
1628        .any(|s| s.id.sid != silo_id && s.strate_label.as_deref() == Some(new_label))
1629    {
1630        return Err(SyscallError::AlreadyExists);
1631    }
1632    let silo = mgr.get_mut(silo_id)?;
1633    match silo.state {
1634        SiloState::Stopped | SiloState::Created | SiloState::Ready | SiloState::Crashed => {
1635            silo.strate_label = Some(String::from(new_label));
1636            Ok(silo_id)
1637        }
1638        _ => Err(SyscallError::InvalidArgument),
1639    }
1640}
1641
1642/// Performs the register boot strate task operation.
1643pub fn register_boot_strate_task(task_id: TaskId, label: &str) -> Result<u32, SyscallError> {
1644    crate::serial_println!(
1645        "[trace][silo] register_boot_strate_task enter tid={} label={}",
1646        task_id.as_u64(),
1647        label
1648    );
1649    BOOT_REG_IN_PROGRESS.store(true, Ordering::Relaxed);
1650    let result = (|| -> Result<u32, SyscallError> {
1651        let sanitized = sanitize_label(label);
1652        let mgr = SILO_MANAGER.lock();
1653        crate::serial_println!(
1654            "[trace][silo] register_boot_strate_task lock acquired tid={}",
1655            task_id.as_u64()
1656        );
1657        crate::serial_println!(
1658            "[trace][silo] register_boot_strate_task before sid scan tid={}",
1659            task_id.as_u64()
1660        );
1661        let mut sid = 1u32;
1662        while mgr.silos.contains_key(&sid) {
1663            sid = sid.checked_add(1).ok_or(SyscallError::OutOfMemory)?;
1664        }
1665        crate::serial_println!(
1666            "[trace][silo] register_boot_strate_task sid selected tid={} sid={}",
1667            task_id.as_u64(),
1668            sid
1669        );
1670        crate::serial_println!(
1671            "[trace][silo] register_boot_strate_task before label uniqueness tid={} label={}",
1672            task_id.as_u64(),
1673            sanitized.as_str()
1674        );
1675        if mgr
1676            .silos
1677            .values()
1678            .any(|s| s.strate_label.as_deref() == Some(sanitized.as_str()))
1679        {
1680            return Err(SyscallError::AlreadyExists);
1681        }
1682        drop(mgr);
1683
1684        let id = SiloId::new(sid);
1685        let silo = Silo {
1686            id,
1687            name: alloc::format!("silo-{}", id.sid),
1688            strate_label: Some(sanitized),
1689            state: SiloState::Running,
1690            config: SiloConfig {
1691                sid: id.sid,
1692                mode: 0o777,
1693                family: StrateFamily::SYS as u8,
1694                ..SiloConfig::default()
1695            },
1696            mode: OctalMode::from_octal(0o777),
1697            family: StrateFamily::SYS,
1698            mem_usage_bytes: 0,
1699            flags: 0,
1700            module_id: None,
1701            tasks: alloc::vec![task_id],
1702            granted_caps: Vec::new(),
1703            granted_resources: Vec::new(),
1704            unveil_rules: Vec::new(),
1705            sandboxed: false,
1706            event_seq: 0,
1707            output_buf: None,
1708        };
1709
1710        let mut mgr = SILO_MANAGER.lock();
1711        if mgr.silos.contains_key(&id.sid) {
1712            return Err(SyscallError::Again);
1713        }
1714        if mgr
1715            .silos
1716            .values()
1717            .any(|s| s.strate_label.as_deref() == silo.strate_label.as_deref())
1718        {
1719            return Err(SyscallError::AlreadyExists);
1720        }
1721        crate::serial_println!(
1722            "[trace][silo] register_boot_strate_task before silo insert tid={} sid={}",
1723            task_id.as_u64(),
1724            id.sid
1725        );
1726        mgr.silos.insert(id.sid, Box::new(silo));
1727        drop(mgr);
1728        Ok(id.sid)
1729    })();
1730    BOOT_REG_IN_PROGRESS.store(false, Ordering::Relaxed);
1731    result
1732}
1733
1734/// Returns true while boot-time silo registration critical path is executing.
1735pub fn debug_boot_reg_active() -> bool {
1736    BOOT_REG_IN_PROGRESS.load(Ordering::Relaxed)
1737}
1738
1739/// Performs the resolve module handle operation.
1740fn resolve_module_handle(handle: u64, required: CapPermissions) -> Result<u64, SyscallError> {
1741    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1742    let caps = unsafe { &*task.process.capabilities.get() };
1743    let cap_id = CapId::from_raw(handle);
1744    let cap = caps.get(cap_id).ok_or(SyscallError::BadHandle)?;
1745
1746    if cap.resource_type != ResourceType::Module {
1747        return Err(SyscallError::BadHandle);
1748    }
1749
1750    if (!required.read || cap.permissions.read)
1751        && (!required.write || cap.permissions.write)
1752        && (!required.execute || cap.permissions.execute)
1753        && (!required.grant || cap.permissions.grant)
1754        && (!required.revoke || cap.permissions.revoke)
1755    {
1756        Ok(cap.resource as u64)
1757    } else {
1758        Err(SyscallError::PermissionDenied)
1759    }
1760}
1761
1762/// Grant the Silo Admin capability to a task (bootstrapping).
1763///
1764/// This should be called only for the initial admin task (e.g. "init").
1765pub fn grant_silo_admin_to_task(task: &alloc::sync::Arc<Task>) -> CapId {
1766    let cap = get_capability_manager().create_capability(
1767        ResourceType::Silo,
1768        SILO_ADMIN_RESOURCE,
1769        CapPermissions::all(),
1770    );
1771    // SAFETY: Bootstrapping. Caller must ensure exclusive access.
1772    unsafe { (&mut *task.process.capabilities.get()).insert(cap) }
1773}
1774
1775// ============================================================================
1776// Module syscalls (temporary blob loader)
1777// ============================================================================
1778
1779/// Performs the sys module load operation.
1780pub fn sys_module_load(fd_or_ptr: u64, len: u64) -> Result<u64, SyscallError> {
1781    // Module loading is currently restricted to admin.
1782    require_silo_admin()?;
1783
1784    // Transitional path: if len != 0, treat arg1 as a userspace blob pointer.
1785    if len != 0 {
1786        let len = len as usize;
1787        if len == 0 || len > MAX_MODULE_BLOB_LEN {
1788            return Err(SyscallError::InvalidArgument);
1789        }
1790
1791        if len <= 4096 {
1792            let user = UserSliceRead::new(fd_or_ptr, len)?;
1793            if matches!(user.read_u8(0), Ok(b'/')) {
1794                let path_buf = user.read_to_vec();
1795                if let Ok(path) = core::str::from_utf8(&path_buf) {
1796                    if let Some(data) = crate::vfs::get_initfs_file_bytes(path) {
1797                        let mut registry = MODULE_REGISTRY.lock();
1798                        let id = registry.register_static(data)?;
1799                        drop(registry);
1800
1801                        let cap = get_capability_manager().create_capability(
1802                            ResourceType::Module,
1803                            id as usize,
1804                            CapPermissions::all(),
1805                        );
1806
1807                        let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1808                        let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
1809                        return Ok(cap_id.as_u64());
1810                    }
1811                }
1812            }
1813        }
1814
1815        let user = UserSliceRead::new(fd_or_ptr, len)?;
1816        let data = user.read_to_vec();
1817        if data.len() >= 4 {
1818            log::debug!(
1819                "module_load: len={} magic={:02x}{:02x}{:02x}{:02x}",
1820                data.len(),
1821                data[0],
1822                data[1],
1823                data[2],
1824                data[3]
1825            );
1826        } else {
1827            log::debug!("module_load: len={} (too small)", data.len());
1828        }
1829
1830        let mut registry = MODULE_REGISTRY.lock();
1831        let id = registry.register(data)?;
1832        drop(registry);
1833
1834        let cap = get_capability_manager().create_capability(
1835            ResourceType::Module,
1836            id as usize,
1837            CapPermissions::all(),
1838        );
1839
1840        let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1841        let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
1842
1843        return Ok(cap_id.as_u64());
1844    }
1845
1846    // TODO: Load from a file handle (fd) via VFS once the path exists.
1847    // For now, interpret `fd_or_ptr` as either:
1848    // - a File handle (read all), or
1849    // - an IPC port handle that streams the module bytes.
1850    //
1851    // Stream protocol:
1852    // - msg_type = IPC_STREAM_DATA, flags = payload length (0..48)
1853    // - msg_type = IPC_STREAM_EOF (or DATA with flags=0) ends the stream
1854    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
1855    let caps = unsafe { &*task.process.capabilities.get() };
1856    let required = CapPermissions {
1857        read: true,
1858        write: false,
1859        execute: false,
1860        grant: false,
1861        revoke: false,
1862    };
1863    let cap = caps
1864        .get_with_permissions(CapId::from_raw(fd_or_ptr), required)
1865        .ok_or(SyscallError::PermissionDenied)?;
1866    let data = match cap.resource_type {
1867        ResourceType::File => {
1868            let fd = u32::try_from(cap.resource).map_err(|_| SyscallError::BadHandle)?;
1869            crate::vfs::read_all(fd)?
1870        }
1871        ResourceType::IpcPort => {
1872            let port_id = PortId::from_u64(cap.resource as u64);
1873            let port = port::get_port(port_id).ok_or(SyscallError::BadHandle)?;
1874            read_module_stream_from_port(&port)?
1875        }
1876        _ => return Err(SyscallError::BadHandle),
1877    };
1878    if data.len() > MAX_MODULE_BLOB_LEN {
1879        return Err(SyscallError::InvalidArgument);
1880    }
1881
1882    let mut registry = MODULE_REGISTRY.lock();
1883    let id = registry.register(data)?;
1884    drop(registry);
1885
1886    let cap = get_capability_manager().create_capability(
1887        ResourceType::Module,
1888        id as usize,
1889        CapPermissions::all(),
1890    );
1891
1892    let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
1893
1894    Ok(cap_id.as_u64())
1895}
1896
1897/// Performs the sys module unload operation.
1898pub fn sys_module_unload(handle: u64) -> Result<u64, SyscallError> {
1899    require_silo_admin()?;
1900    let required = CapPermissions {
1901        read: false,
1902        write: false,
1903        execute: false,
1904        grant: false,
1905        revoke: true,
1906    };
1907    let module_id = resolve_module_handle(handle, required)?;
1908    let mut registry = MODULE_REGISTRY.lock();
1909    registry.remove(module_id);
1910    Ok(0)
1911}
1912
1913/// Performs the sys module get symbol operation.
1914pub fn sys_module_get_symbol(handle: u64, _ordinal: u64) -> Result<u64, SyscallError> {
1915    let required = CapPermissions {
1916        read: true,
1917        write: false,
1918        execute: false,
1919        grant: false,
1920        revoke: false,
1921    };
1922    let module_id = resolve_module_handle(handle, required)?;
1923    let registry = MODULE_REGISTRY.lock();
1924    let module = registry.get(module_id).ok_or(SyscallError::BadHandle)?;
1925
1926    // The export table format is a simple array of u64 RVAs indexed by ordinal.
1927    let rva = resolve_export_offset(module, _ordinal)?;
1928    let header = module.header.ok_or(SyscallError::InvalidArgument)?;
1929    Ok(header.code_offset.saturating_add(rva))
1930}
1931
1932/// Performs the sys module query operation.
1933pub fn sys_module_query(handle: u64, out_ptr: u64) -> Result<u64, SyscallError> {
1934    let required = CapPermissions {
1935        read: true,
1936        write: false,
1937        execute: false,
1938        grant: false,
1939        revoke: false,
1940    };
1941    let module_id = resolve_module_handle(handle, required)?;
1942    if out_ptr == 0 {
1943        return Err(SyscallError::Fault);
1944    }
1945
1946    let registry = MODULE_REGISTRY.lock();
1947    let module = registry.get(module_id).ok_or(SyscallError::BadHandle)?;
1948
1949    let (format, flags, version, cpu_arch, code_size, data_size, bss_size, entry_point) =
1950        if let Some(header) = module.header {
1951            (
1952                1u32,
1953                header.flags,
1954                header.version,
1955                header.cpu_arch,
1956                header.code_size,
1957                header.data_size,
1958                header.bss_size,
1959                header.entry_point,
1960            )
1961        } else {
1962            (0u32, 0u32, 0u16, 0u8, 0u64, 0u64, 0u64, 0u64)
1963        };
1964
1965    let info = ModuleInfo {
1966        id: module.id,
1967        format,
1968        flags,
1969        version,
1970        cpu_arch,
1971        reserved: 0,
1972        code_size,
1973        data_size,
1974        bss_size,
1975        entry_point,
1976        total_size: module.data.len() as u64,
1977    };
1978
1979    const INFO_SIZE: usize = core::mem::size_of::<ModuleInfo>();
1980    let user = UserSliceWrite::new(out_ptr, INFO_SIZE)?;
1981    let src =
1982        unsafe { core::slice::from_raw_parts(&info as *const ModuleInfo as *const u8, INFO_SIZE) };
1983    user.copy_from(src);
1984    Ok(0)
1985}
1986
1987// ============================================================================
1988// Syscall handlers (kernel entry points)
1989// ============================================================================
1990
1991/// Performs the sys silo create operation.
1992pub fn sys_silo_create(config_ptr: u64) -> Result<u64, SyscallError> {
1993    require_silo_admin()?;
1994    let config = read_user_config(config_ptr)?;
1995    config.validate()?;
1996
1997    let mut mgr = SILO_MANAGER.lock();
1998    let id = mgr.create_silo(&config)?;
1999    drop(mgr);
2000
2001    let cap = get_capability_manager().create_capability(
2002        ResourceType::Silo,
2003        id.sid as usize,
2004        CapPermissions::all(),
2005    );
2006
2007    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2008    let cap_id = unsafe { (&mut *task.process.capabilities.get()).insert(cap) };
2009
2010    Ok(cap_id.as_u64())
2011}
2012
2013/// Performs the sys silo config operation.
2014pub fn sys_silo_config(handle: u64, res_ptr: u64) -> Result<u64, SyscallError> {
2015    require_silo_admin()?;
2016    let config = read_user_config(res_ptr)?;
2017    config.validate()?;
2018    let family = decode_family(config.family)?;
2019
2020    let mut granted_caps = Vec::new();
2021    let mut granted_resources = Vec::new();
2022    if config.caps_len > 0 {
2023        let caps_list = read_caps_list(config.caps_ptr, config.caps_len)?;
2024        let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2025        let caps = unsafe { &*task.process.capabilities.get() };
2026
2027        for cap_handle in caps_list {
2028            let cap = caps
2029                .get(CapId::from_raw(cap_handle))
2030                .ok_or(SyscallError::BadHandle)?;
2031            if !cap.permissions.grant {
2032                return Err(SyscallError::PermissionDenied);
2033            }
2034            if !is_delegated_resource(cap.resource_type) {
2035                return Err(SyscallError::InvalidArgument);
2036            }
2037            if !granted_caps.contains(&cap_handle) {
2038                granted_caps.push(cap_handle);
2039            }
2040            add_or_merge_granted_resource(
2041                &mut granted_resources,
2042                GrantedResource {
2043                    resource_type: cap.resource_type,
2044                    resource: cap.resource,
2045                    permissions: cap.permissions,
2046                },
2047            );
2048        }
2049    }
2050
2051    let sid = resolve_silo_handle(handle, CapPermissions::read_write())?;
2052    let mut mgr = SILO_MANAGER.lock();
2053    let silo = mgr.get_mut(sid as u32)?;
2054
2055    let requested_mode = OctalMode::from_octal(config.mode);
2056    kernel_check_spawn_invariants(&silo.id, &requested_mode)?;
2057    if silo.sandboxed && !requested_mode.is_subset_of(&silo.mode) {
2058        return Err(SyscallError::PermissionDenied);
2059    }
2060    if silo.sandboxed && !requested_mode.registry.is_empty() {
2061        return Err(SyscallError::PermissionDenied);
2062    }
2063
2064    silo.config = config;
2065    silo.mode = requested_mode;
2066    silo.family = family;
2067    silo.flags = config.flags as u32;
2068    silo.granted_caps = granted_caps;
2069    silo.granted_resources = granted_resources;
2070    Ok(0)
2071}
2072
2073/// Performs the sys silo attach module operation.
2074pub fn sys_silo_attach_module(handle: u64, module_handle: u64) -> Result<u64, SyscallError> {
2075    require_silo_admin()?;
2076    let silo_id = resolve_silo_handle(handle, CapPermissions::read_write())?;
2077
2078    let required = CapPermissions {
2079        read: true,
2080        write: false,
2081        execute: false,
2082        grant: false,
2083        revoke: false,
2084    };
2085    let module_id = resolve_module_handle(module_handle, required)?;
2086
2087    let mut mgr = SILO_MANAGER.lock();
2088    let silo = mgr.get_mut(silo_id)?;
2089
2090    match silo.state {
2091        SiloState::Created | SiloState::Stopped | SiloState::Ready => {
2092            silo.module_id = Some(module_id);
2093            silo.state = SiloState::Ready;
2094            Ok(0)
2095        }
2096        SiloState::Running | SiloState::Paused => {
2097            silo.module_id = Some(module_id);
2098            Ok(0)
2099        }
2100        _ => Err(SyscallError::InvalidArgument),
2101    }
2102}
2103
2104/// Starts silo by id.
2105fn start_silo_by_id(silo_id: u32) -> Result<u64, SyscallError> {
2106    let (
2107        module_id,
2108        granted_caps,
2109        silo_flags,
2110        previous_state,
2111        can_start,
2112        within_task_limit,
2113        silo_name,
2114        silo_label,
2115    ) = {
2116        let mut mgr = SILO_MANAGER.lock();
2117        let silo = mgr.get_mut(silo_id)?;
2118        let previous_state = silo.state;
2119        let can_start = matches!(
2120            previous_state,
2121            SiloState::Ready | SiloState::Stopped | SiloState::Running
2122        );
2123        let within_task_limit = match silo.config.max_tasks {
2124            0 => true, // 0 = unlimited
2125            max => silo.tasks.len() < max as usize,
2126        };
2127        let module_id = silo.module_id;
2128        let granted_caps = silo.granted_caps.clone();
2129        let silo_flags = silo.config.flags;
2130        let silo_name = silo.name.clone();
2131        let silo_label = silo.strate_label.clone();
2132        if can_start && within_task_limit {
2133            silo.state = SiloState::Loading;
2134        }
2135        (
2136            module_id,
2137            granted_caps,
2138            silo_flags,
2139            previous_state,
2140            can_start,
2141            within_task_limit,
2142            silo_name,
2143            silo_label,
2144        )
2145    };
2146
2147    if !can_start {
2148        return Err(SyscallError::InvalidArgument);
2149    }
2150    if !within_task_limit {
2151        return Err(SyscallError::QueueFull);
2152    }
2153
2154    let rollback_loading = |state: SiloState| {
2155        let mut mgr = SILO_MANAGER.lock();
2156        if let Ok(silo) = mgr.get_mut(silo_id) {
2157            if matches!(silo.state, SiloState::Loading) {
2158                silo.state = state;
2159            }
2160        }
2161    };
2162
2163    let module_id = match module_id {
2164        Some(id) => id,
2165        None => {
2166            rollback_loading(previous_state);
2167            return Err(SyscallError::InvalidArgument);
2168        }
2169    };
2170
2171    let seed_caps = {
2172        let task = match current_task_clone() {
2173            Some(t) => t,
2174            None => {
2175                rollback_loading(previous_state);
2176                return Err(SyscallError::PermissionDenied);
2177            }
2178        };
2179        let caps = unsafe { &mut *task.process.capabilities.get() };
2180        let mut out = Vec::with_capacity(granted_caps.len());
2181        for handle in granted_caps {
2182            // Enforce: caller must currently hold the capability.
2183            if !silo_has_capability(&task, handle) {
2184                rollback_loading(previous_state);
2185                return Err(SyscallError::PermissionDenied);
2186            }
2187            if let Some(dup) = caps.duplicate(CapId::from_raw(handle)) {
2188                out.push(dup);
2189            } else {
2190                rollback_loading(previous_state);
2191                return Err(SyscallError::PermissionDenied);
2192            }
2193        }
2194        out
2195    };
2196
2197    let display = silo_label.unwrap_or(silo_name);
2198    let task_name_owned = if silo_flags & SILO_FLAG_ADMIN != 0 {
2199        alloc::format!("silo-{}/strate-admin-{}", silo_id, display)
2200    } else {
2201        alloc::format!("silo-{}/strate-{}", silo_id, display)
2202    };
2203    // Intentional leak: task names are expected to live for the task lifetime.
2204    // This avoids generic "silo" labels in process viewers.
2205    let task_name: &'static str = Box::leak(task_name_owned.into_boxed_str());
2206
2207    let module_data = {
2208        let registry = MODULE_REGISTRY.lock();
2209        match registry.get(module_id) {
2210            Some(module) => module.data.clone(),
2211            None => {
2212                rollback_loading(previous_state);
2213                return Err(SyscallError::BadHandle);
2214            }
2215        }
2216    };
2217
2218    let load_result =
2219        crate::process::elf::load_elf_task_with_caps(module_data.as_slice(), task_name, &seed_caps)
2220            .map_err(|err| {
2221                log::warn!(
2222                    "silo_start: sid={} module={} task='{}' load failed: {}",
2223                    silo_id,
2224                    module_id,
2225                    task_name,
2226                    err
2227                );
2228                map_elf_start_error(err)
2229            });
2230
2231    let task = match load_result {
2232        Ok(task) => task,
2233        Err(e) => {
2234            rollback_loading(previous_state);
2235            return Err(e);
2236        }
2237    };
2238    let task_id = task.id;
2239    let task_pid = task.pid;
2240
2241    // Give the silo an EOF stdin so that any read(0, …) returns 0 immediately
2242    // instead of EBADF (which can cause busy-loops) or blocking on the
2243    // keyboard (which would steal input from the foreground shell).
2244    let bg_stdin = crate::vfs::create_background_stdin();
2245    let fd_table = unsafe { &mut *task.process.fd_table.get() };
2246    fd_table.insert_at(crate::vfs::STDIN, bg_stdin);
2247
2248    let mut mgr = SILO_MANAGER.lock();
2249    {
2250        let silo = match mgr.get_mut(silo_id) {
2251            Ok(silo) => silo,
2252            Err(e) => {
2253                return Err(e);
2254            }
2255        };
2256        silo.tasks.push(task_id);
2257        silo.state = SiloState::Running;
2258        let fpu_xcr0 = unsafe { (*task.fpu_state.get()).xcr0_mask };
2259        let effective_xcr0 = (silo.config.xcr0_mask & fpu_xcr0).max(0x3);
2260        task.xcr0_mask
2261            .store(effective_xcr0, core::sync::atomic::Ordering::Relaxed);
2262    }
2263    mgr.map_task(task_id, silo_id);
2264    mgr.push_event(SiloEvent {
2265        silo_id: silo_id.into(),
2266        kind: SiloEventKind::Started,
2267        data0: 0,
2268        data1: 0,
2269        tick: crate::process::scheduler::ticks(),
2270    });
2271    drop(mgr);
2272    crate::process::add_task(task);
2273    Ok(task_pid as u64)
2274}
2275
2276/// Performs the sys silo start operation.
2277pub fn sys_silo_start(handle: u64) -> Result<u64, SyscallError> {
2278    require_silo_admin()?;
2279    let required = CapPermissions {
2280        read: false,
2281        write: false,
2282        execute: true,
2283        grant: false,
2284        revoke: false,
2285    };
2286    let silo_id = resolve_silo_handle(handle, required)?;
2287    start_silo_by_id(silo_id)
2288}
2289
2290/// Best-effort cleanup hook called by the scheduler when a task terminates.
2291///
2292/// Ensures `task_to_silo` mappings are removed even for normal exits and
2293/// transitions a running/paused silo to `Stopped` when its last task is gone.
2294pub fn on_task_terminated(task_id: TaskId) {
2295    let mut mgr = SILO_MANAGER.lock();
2296    let silo_id = match mgr.silo_for_task(task_id) {
2297        Some(id) => id,
2298        None => return,
2299    };
2300    mgr.unmap_task(task_id);
2301
2302    let mut emit_stopped = false;
2303    if let Ok(silo) = mgr.get_mut(silo_id) {
2304        if let Some(pos) = silo.tasks.iter().position(|tid| *tid == task_id) {
2305            silo.tasks.swap_remove(pos);
2306        }
2307        if silo.tasks.is_empty() {
2308            match silo.state {
2309                SiloState::Running | SiloState::Paused | SiloState::Stopping => {
2310                    silo.state = SiloState::Stopped;
2311                    silo.event_seq = silo.event_seq.wrapping_add(1);
2312                    emit_stopped = true;
2313                }
2314                _ => {}
2315            }
2316        }
2317    }
2318
2319    if emit_stopped {
2320        mgr.push_event(SiloEvent {
2321            silo_id: silo_id.into(),
2322            kind: SiloEventKind::Stopped,
2323            data0: 0,
2324            data1: 0,
2325            tick: crate::process::scheduler::ticks(),
2326        });
2327    }
2328}
2329
2330/// Stops or kill silo by id.
2331fn stop_or_kill_silo_by_id(
2332    silo_id: u32,
2333    force_kill: bool,
2334    require_running: bool,
2335) -> Result<Vec<TaskId>, SyscallError> {
2336    let mut mgr = SILO_MANAGER.lock();
2337    let tasks = {
2338        let silo = mgr.get_mut(silo_id)?;
2339        if force_kill {
2340            silo.state = SiloState::Stopped;
2341            let tasks = silo.tasks.clone();
2342            silo.tasks.clear();
2343            tasks
2344        } else {
2345            match silo.state {
2346                SiloState::Running | SiloState::Paused => {
2347                    silo.state = SiloState::Stopping;
2348                    let tasks = silo.tasks.clone();
2349                    silo.tasks.clear();
2350                    tasks
2351                }
2352                _ if require_running => return Err(SyscallError::InvalidArgument),
2353                _ => Vec::new(),
2354            }
2355        }
2356    };
2357
2358    for tid in &tasks {
2359        mgr.unmap_task(*tid);
2360    }
2361    if !force_kill {
2362        if let Ok(silo) = mgr.get_mut(silo_id) {
2363            silo.state = SiloState::Stopped;
2364        }
2365    }
2366    mgr.push_event(SiloEvent {
2367        silo_id: silo_id.into(),
2368        kind: if force_kill {
2369            SiloEventKind::Killed
2370        } else {
2371            SiloEventKind::Stopped
2372        },
2373        data0: 0,
2374        data1: 0,
2375        tick: crate::process::scheduler::ticks(),
2376    });
2377
2378    Ok(tasks)
2379}
2380
2381/// Performs the sys silo stop operation.
2382pub fn sys_silo_stop(handle: u64) -> Result<u64, SyscallError> {
2383    require_silo_admin()?;
2384    let required = CapPermissions {
2385        read: false,
2386        write: false,
2387        execute: true,
2388        grant: false,
2389        revoke: false,
2390    };
2391    let silo_id = resolve_silo_handle(handle, required)?;
2392    let tasks = stop_or_kill_silo_by_id(silo_id, false, true)?;
2393
2394    for tid in tasks {
2395        crate::process::kill_task(tid);
2396    }
2397    Ok(0)
2398}
2399
2400/// Performs the sys silo kill operation.
2401pub fn sys_silo_kill(handle: u64) -> Result<u64, SyscallError> {
2402    require_silo_admin()?;
2403    let required = CapPermissions {
2404        read: false,
2405        write: false,
2406        execute: true,
2407        grant: false,
2408        revoke: false,
2409    };
2410    let silo_id = resolve_silo_handle(handle, required)?;
2411    let tasks = stop_or_kill_silo_by_id(silo_id, true, false)?;
2412
2413    for tid in tasks {
2414        crate::process::kill_task(tid);
2415    }
2416    Ok(0)
2417}
2418
2419/// Performs the silo has capability operation.
2420fn silo_has_capability(task: &Task, cap_id: u64) -> bool {
2421    let caps = unsafe { &*task.process.capabilities.get() };
2422    caps.get(CapId::from_raw(cap_id)).is_some()
2423}
2424
2425/// Returns whether delegated resource.
2426fn is_delegated_resource(rt: ResourceType) -> bool {
2427    matches!(
2428        rt,
2429        ResourceType::Nic
2430            | ResourceType::FileSystem
2431            | ResourceType::Console
2432            | ResourceType::Keyboard
2433            | ResourceType::Volume
2434            | ResourceType::Namespace
2435            | ResourceType::Device
2436            | ResourceType::File
2437            | ResourceType::IoPortRange
2438            | ResourceType::InterruptLine
2439    )
2440}
2441
2442/// Returns whether admin task.
2443fn is_admin_task(task: &Task) -> bool {
2444    let caps = unsafe { &*task.process.capabilities.get() };
2445    let required = CapPermissions {
2446        read: false,
2447        write: false,
2448        execute: false,
2449        grant: true,
2450        revoke: false,
2451    };
2452    caps.has_resource_with_permissions(ResourceType::Silo, SILO_ADMIN_RESOURCE, required)
2453}
2454
2455/// Maps elf start error.
2456fn map_elf_start_error(err: &'static str) -> SyscallError {
2457    if err.contains("allocate")
2458        || err.contains("Out of memory")
2459        || err.contains("No virtual range")
2460        || err.contains("Failed to map page")
2461    {
2462        return SyscallError::OutOfMemory;
2463    }
2464    if err.contains("ELF")
2465        || err.contains("PT_")
2466        || err.contains("entry")
2467        || err.contains("relocation")
2468        || err.contains("Program header")
2469        || err.contains("x86_64")
2470        || err.contains("Unsupported")
2471    {
2472        return SyscallError::ExecFormatError;
2473    }
2474    SyscallError::InvalidArgument
2475}
2476
2477#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2478struct GrantedResource {
2479    resource_type: ResourceType,
2480    resource: usize,
2481    permissions: CapPermissions,
2482}
2483
2484/// Performs the merge permissions operation.
2485fn merge_permissions(a: CapPermissions, b: CapPermissions) -> CapPermissions {
2486    CapPermissions {
2487        read: a.read || b.read,
2488        write: a.write || b.write,
2489        execute: a.execute || b.execute,
2490        grant: a.grant || b.grant,
2491        revoke: a.revoke || b.revoke,
2492    }
2493}
2494
2495/// Performs the permissions subset operation.
2496fn permissions_subset(requested: CapPermissions, allowed: CapPermissions) -> bool {
2497    (!requested.read || allowed.read)
2498        && (!requested.write || allowed.write)
2499        && (!requested.execute || allowed.execute)
2500        && (!requested.grant || allowed.grant)
2501        && (!requested.revoke || allowed.revoke)
2502}
2503
2504/// Performs the add or merge granted resource operation.
2505fn add_or_merge_granted_resource(list: &mut Vec<GrantedResource>, grant: GrantedResource) {
2506    for existing in list.iter_mut() {
2507        if existing.resource_type == grant.resource_type && existing.resource == grant.resource {
2508            existing.permissions = merge_permissions(existing.permissions, grant.permissions);
2509            return;
2510        }
2511    }
2512    list.push(grant);
2513}
2514
2515/// Performs the register current task granted resource operation.
2516pub fn register_current_task_granted_resource(
2517    resource_type: ResourceType,
2518    resource: usize,
2519    permissions: CapPermissions,
2520) -> Result<(), SyscallError> {
2521    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2522    let mut mgr = SILO_MANAGER.lock();
2523    let silo_id = mgr
2524        .silo_for_task(task.id)
2525        .ok_or(SyscallError::PermissionDenied)?;
2526    let silo = mgr.get_mut(silo_id)?;
2527    add_or_merge_granted_resource(
2528        &mut silo.granted_resources,
2529        GrantedResource {
2530            resource_type,
2531            resource,
2532            permissions,
2533        },
2534    );
2535    Ok(())
2536}
2537
2538/// Enforce that the current task may use a delegated capability.
2539pub fn enforce_cap_for_current_task(handle: u64) -> Result<(), SyscallError> {
2540    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2541
2542    // Admin tasks bypass delegated-cap enforcement.
2543    if is_admin_task(&task) {
2544        return Ok(());
2545    }
2546
2547    let caps = unsafe { &*task.process.capabilities.get() };
2548    let cap = caps
2549        .get(CapId::from_raw(handle))
2550        .ok_or(SyscallError::BadHandle)?;
2551
2552    if !is_delegated_resource(cap.resource_type) {
2553        return Ok(());
2554    }
2555
2556    let mgr = SILO_MANAGER.lock();
2557    if let Some(silo_id) = mgr.silo_for_task(task.id) {
2558        if let Ok(silo) = mgr.get(silo_id) {
2559            for grant in &silo.granted_resources {
2560                if grant.resource_type == cap.resource_type && grant.resource == cap.resource {
2561                    if permissions_subset(cap.permissions, grant.permissions) {
2562                        return Ok(());
2563                    }
2564                    return Err(SyscallError::PermissionDenied);
2565                }
2566            }
2567        }
2568    }
2569
2570    Err(SyscallError::PermissionDenied)
2571}
2572
2573/// Performs the enforce registry bind for current task operation.
2574pub fn enforce_registry_bind_for_current_task() -> Result<(), SyscallError> {
2575    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2576    if is_admin_task(&task) {
2577        return Ok(());
2578    }
2579    let mgr = SILO_MANAGER.lock();
2580    let silo_id = mgr
2581        .silo_for_task(task.id)
2582        .ok_or(SyscallError::PermissionDenied)?;
2583    let silo = mgr.get(silo_id)?;
2584    if silo.sandboxed {
2585        return Err(SyscallError::PermissionDenied);
2586    }
2587    if silo.mode.registry.contains(RegistryMode::BIND) {
2588        Ok(())
2589    } else {
2590        Err(SyscallError::PermissionDenied)
2591    }
2592}
2593
2594/// Enforce console access for the current task.
2595///
2596/// Only admin tasks or tasks holding a Console capability with write permission
2597/// can access the kernel console (SYS_WRITE fd=1/2).
2598pub fn enforce_console_access() -> Result<(), SyscallError> {
2599    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
2600    if is_admin_task(&task) {
2601        return Ok(());
2602    }
2603    let mgr = SILO_MANAGER.lock();
2604    if let Some(silo_id) = mgr.silo_for_task(task.id) {
2605        if let Ok(silo) = mgr.get(silo_id) {
2606            if matches!(silo.family, StrateFamily::SYS | StrateFamily::NET) {
2607                return Ok(());
2608            }
2609        }
2610    }
2611    drop(mgr);
2612    let caps = unsafe { &*task.process.capabilities.get() };
2613    let required = CapPermissions {
2614        read: false,
2615        write: true,
2616        execute: false,
2617        grant: false,
2618        revoke: false,
2619    };
2620    if caps.has_resource_type_with_permissions(ResourceType::Console, required) {
2621        Ok(())
2622    } else {
2623        Err(SyscallError::PermissionDenied)
2624    }
2625}
2626
2627/// Performs the sys silo event next operation.
2628pub fn sys_silo_event_next(_event_ptr: u64) -> Result<u64, SyscallError> {
2629    require_silo_admin()?;
2630    if _event_ptr == 0 {
2631        return Err(SyscallError::Fault);
2632    }
2633
2634    let event = {
2635        let mut mgr = SILO_MANAGER.lock();
2636        mgr.events.pop_front()
2637    };
2638
2639    let event = match event {
2640        Some(e) => e,
2641        None => return Err(SyscallError::Again),
2642    };
2643
2644    const EVT_SIZE: usize = core::mem::size_of::<SiloEvent>();
2645    let user = UserSliceWrite::new(_event_ptr, EVT_SIZE)?;
2646    let src =
2647        unsafe { core::slice::from_raw_parts(&event as *const SiloEvent as *const u8, EVT_SIZE) };
2648    user.copy_from(src);
2649    Ok(0)
2650}
2651
2652/// Performs the sys silo suspend operation.
2653pub fn sys_silo_suspend(handle: u64) -> Result<u64, SyscallError> {
2654    require_silo_admin()?;
2655    let required = CapPermissions {
2656        read: false,
2657        write: false,
2658        execute: true,
2659        grant: false,
2660        revoke: false,
2661    };
2662    let silo_id = resolve_silo_handle(handle, required)?;
2663
2664    // Lock is released before suspend_task (which takes the scheduler lock)
2665    // to avoid lock-ordering deadlock. Tasks added between the two locks
2666    // won't be suspended : acceptable best-effort trade-off.
2667    let tasks = {
2668        let mut mgr = SILO_MANAGER.lock();
2669        let silo = mgr.get_mut(silo_id)?;
2670        match silo.state {
2671            SiloState::Running => {
2672                silo.state = SiloState::Paused;
2673                silo.tasks.clone()
2674            }
2675            _ => return Err(SyscallError::InvalidArgument),
2676        }
2677    };
2678
2679    for tid in &tasks {
2680        crate::process::suspend_task(*tid);
2681    }
2682
2683    let mut mgr = SILO_MANAGER.lock();
2684    mgr.push_event(SiloEvent {
2685        silo_id: silo_id.into(),
2686        kind: SiloEventKind::Paused,
2687        data0: 0,
2688        data1: 0,
2689        tick: crate::process::scheduler::ticks(),
2690    });
2691
2692    Ok(0)
2693}
2694
2695/// Performs the sys silo resume operation.
2696pub fn sys_silo_resume(handle: u64) -> Result<u64, SyscallError> {
2697    require_silo_admin()?;
2698    let required = CapPermissions {
2699        read: false,
2700        write: false,
2701        execute: true,
2702        grant: false,
2703        revoke: false,
2704    };
2705    let silo_id = resolve_silo_handle(handle, required)?;
2706
2707    let tasks = {
2708        let mut mgr = SILO_MANAGER.lock();
2709        let silo = mgr.get_mut(silo_id)?;
2710        match silo.state {
2711            SiloState::Paused => {
2712                silo.state = SiloState::Running;
2713                silo.tasks.clone()
2714            }
2715            _ => return Err(SyscallError::InvalidArgument),
2716        }
2717    };
2718
2719    // Same lock-ordering pattern as sys_silo_suspend (see note there).
2720    for tid in &tasks {
2721        crate::process::resume_task(*tid);
2722    }
2723
2724    let mut mgr = SILO_MANAGER.lock();
2725    mgr.push_event(SiloEvent {
2726        silo_id: silo_id.into(),
2727        kind: SiloEventKind::Resumed,
2728        data0: 0,
2729        data1: 0,
2730        tick: crate::process::scheduler::ticks(),
2731    });
2732
2733    Ok(0)
2734}
2735
2736// ============================================================================
2737// Kernel-side CLI helpers (no capability gate : shell runs in Ring 0)
2738// ============================================================================
2739
2740pub fn kernel_suspend_silo(selector: &str) -> Result<u32, SyscallError> {
2741    let (silo_id, tasks) = {
2742        let mut mgr = SILO_MANAGER.lock();
2743        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2744        let silo = mgr.get_mut(silo_id)?;
2745        match silo.state {
2746            SiloState::Running => {
2747                silo.state = SiloState::Paused;
2748                let t = silo.tasks.clone();
2749                (silo_id, t)
2750            }
2751            _ => return Err(SyscallError::InvalidArgument),
2752        }
2753    };
2754    for tid in &tasks {
2755        crate::process::suspend_task(*tid);
2756    }
2757    let mut mgr = SILO_MANAGER.lock();
2758    mgr.push_event(SiloEvent {
2759        silo_id: silo_id.into(),
2760        kind: SiloEventKind::Paused,
2761        data0: 0,
2762        data1: 0,
2763        tick: crate::process::scheduler::ticks(),
2764    });
2765    Ok(silo_id)
2766}
2767
2768pub fn kernel_resume_silo(selector: &str) -> Result<u32, SyscallError> {
2769    let (silo_id, tasks) = {
2770        let mut mgr = SILO_MANAGER.lock();
2771        let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2772        let silo = mgr.get_mut(silo_id)?;
2773        match silo.state {
2774            SiloState::Paused => {
2775                silo.state = SiloState::Running;
2776                let t = silo.tasks.clone();
2777                (silo_id, t)
2778            }
2779            _ => return Err(SyscallError::InvalidArgument),
2780        }
2781    };
2782    for tid in &tasks {
2783        crate::process::resume_task(*tid);
2784    }
2785    let mut mgr = SILO_MANAGER.lock();
2786    mgr.push_event(SiloEvent {
2787        silo_id: silo_id.into(),
2788        kind: SiloEventKind::Resumed,
2789        data0: 0,
2790        data1: 0,
2791        tick: crate::process::scheduler::ticks(),
2792    });
2793    Ok(silo_id)
2794}
2795
2796pub fn silo_detail_snapshot(selector: &str) -> Result<SiloDetailSnapshot, SyscallError> {
2797    let mgr = SILO_MANAGER.lock();
2798    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2799    let s = mgr.get(silo_id)?;
2800    Ok(SiloDetailSnapshot {
2801        base: SiloSnapshot {
2802            id: s.id.sid,
2803            tier: s.id.tier,
2804            name: s.name.clone(),
2805            strate_label: s.strate_label.clone(),
2806            state: s.state,
2807            task_count: s.tasks.len(),
2808            mem_usage_bytes: s.mem_usage_bytes,
2809            mem_min_bytes: s.config.mem_min,
2810            mem_max_bytes: s.config.mem_max,
2811            mode: s.config.mode,
2812            graphics_flags: s.config.flags
2813                & (SILO_FLAG_GRAPHICS
2814                    | SILO_FLAG_WEBRTC_NATIVE
2815                    | SILO_FLAG_GRAPHICS_READ_ONLY
2816                    | SILO_FLAG_WEBRTC_TURN_FORCE),
2817            graphics_max_sessions: s.config.graphics_max_sessions,
2818            graphics_session_ttl_sec: s.config.graphics_session_ttl_sec,
2819        },
2820        family: s.family,
2821        sandboxed: s.sandboxed,
2822        cpu_shares: s.config.cpu_shares,
2823        cpu_affinity_mask: s.config.cpu_affinity_mask,
2824        max_tasks: s.config.max_tasks,
2825        task_ids: s.tasks.iter().map(|t| t.as_u64()).collect(),
2826        unveil_rules: s
2827            .unveil_rules
2828            .iter()
2829            .map(|r| {
2830                let bits = (if r.rights.read { 4 } else { 0 })
2831                    | (if r.rights.write { 2 } else { 0 })
2832                    | (if r.rights.execute { 1 } else { 0 });
2833                (r.path.clone(), bits)
2834            })
2835            .collect(),
2836        granted_caps_count: s.granted_caps.len(),
2837        cpu_features_required: s.config.cpu_features_required,
2838        cpu_features_allowed: s.config.cpu_features_allowed,
2839        xcr0_mask: s.config.xcr0_mask,
2840        graphics_flags: s.config.flags
2841            & (SILO_FLAG_GRAPHICS
2842                | SILO_FLAG_WEBRTC_NATIVE
2843                | SILO_FLAG_GRAPHICS_READ_ONLY
2844                | SILO_FLAG_WEBRTC_TURN_FORCE),
2845        graphics_max_sessions: s.config.graphics_max_sessions,
2846        graphics_session_ttl_sec: s.config.graphics_session_ttl_sec,
2847    })
2848}
2849
2850pub fn list_events_snapshot() -> Vec<SiloEventSnapshot> {
2851    let mgr = SILO_MANAGER.lock();
2852    mgr.events
2853        .iter()
2854        .map(|e| SiloEventSnapshot {
2855            silo_id: e.silo_id,
2856            kind: e.kind,
2857            data0: e.data0,
2858            data1: e.data1,
2859            tick: e.tick,
2860        })
2861        .collect()
2862}
2863
2864pub fn list_events_for_silo(selector: &str) -> Result<Vec<SiloEventSnapshot>, SyscallError> {
2865    let mgr = SILO_MANAGER.lock();
2866    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2867    let sid64 = silo_id as u64;
2868    Ok(mgr
2869        .events
2870        .iter()
2871        .filter(|e| e.silo_id == sid64)
2872        .map(|e| SiloEventSnapshot {
2873            silo_id: e.silo_id,
2874            kind: e.kind,
2875            data0: e.data0,
2876            data1: e.data1,
2877            tick: e.tick,
2878        })
2879        .collect())
2880}
2881
2882pub fn kernel_pledge_silo(selector: &str, mode_val: u16) -> Result<(u16, u16), SyscallError> {
2883    let new_mode = OctalMode::from_octal(mode_val);
2884    let mut mgr = SILO_MANAGER.lock();
2885    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2886    let silo = mgr.get_mut(silo_id)?;
2887    let old_raw = silo.config.mode;
2888    silo.mode.pledge(new_mode)?;
2889    silo.config.mode = mode_val;
2890    Ok((old_raw, mode_val))
2891}
2892
2893pub fn kernel_unveil_silo(
2894    selector: &str,
2895    path: &str,
2896    rights_str: &str,
2897) -> Result<u32, SyscallError> {
2898    let rights = UnveilRights {
2899        read: rights_str.contains('r'),
2900        write: rights_str.contains('w'),
2901        execute: rights_str.contains('x'),
2902    };
2903    let mut mgr = SILO_MANAGER.lock();
2904    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2905    let silo = mgr.get_mut(silo_id)?;
2906    if let Some(rule) = silo.unveil_rules.iter_mut().find(|r| r.path == path) {
2907        rule.rights = rights;
2908    } else {
2909        silo.unveil_rules.push(UnveilRule {
2910            path: String::from(path),
2911            rights,
2912        });
2913    }
2914    Ok(silo_id)
2915}
2916
2917pub fn kernel_sandbox_silo(selector: &str) -> Result<u32, SyscallError> {
2918    let mut mgr = SILO_MANAGER.lock();
2919    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2920    let silo = mgr.get_mut(silo_id)?;
2921    silo.sandboxed = true;
2922    crate::audit::log(
2923        crate::audit::AuditCategory::Security,
2924        0,
2925        silo_id,
2926        alloc::format!("silo sandboxed"),
2927    );
2928    Ok(silo_id)
2929}
2930
2931/// Get the silo ID for a given task, if any.
2932pub fn task_silo_id(task_id: TaskId) -> Option<u32> {
2933    SILO_MANAGER.lock().silo_for_task(task_id)
2934}
2935
2936/// Append data to a silo's output ring buffer (called from `sys_debug_log`).
2937pub fn silo_output_write(silo_id: u32, data: &[u8]) {
2938    let mut mgr = SILO_MANAGER.lock();
2939    if let Ok(silo) = mgr.get_mut(silo_id) {
2940        let buf = silo
2941            .output_buf
2942            .get_or_insert_with(|| Box::new(SiloOutputBuf::new()));
2943        buf.push(data);
2944    }
2945}
2946
2947/// Drain the output buffer for a silo, returning accumulated bytes.
2948pub fn silo_output_drain(selector: &str) -> Result<Vec<u8>, SyscallError> {
2949    let mut mgr = SILO_MANAGER.lock();
2950    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2951    let silo = mgr.get_mut(silo_id)?;
2952    let mut buf = match silo.output_buf.take() {
2953        Some(buf) => buf,
2954        None => return Ok(Vec::new()),
2955    };
2956    let out = buf.drain();
2957    silo.output_buf = Some(buf);
2958    Ok(out)
2959}
2960
2961/// Dynamically adjust resource quotas for a silo.
2962///
2963/// `key` can be: `mem_max`, `mem_min`, `max_tasks`, `cpu_shares`.
2964/// Values are parsed as u64 (bytes for memory, count otherwise).
2965pub fn kernel_limit_silo(selector: &str, key: &str, value: u64) -> Result<u32, SyscallError> {
2966    let mut mgr = SILO_MANAGER.lock();
2967    let silo_id = resolve_selector_to_silo_id(selector, &mgr)?;
2968    let silo = mgr.get_mut(silo_id)?;
2969    let mut next_mem_min = silo.config.mem_min;
2970    let mut next_mem_max = silo.config.mem_max;
2971    let mut next_max_tasks = silo.config.max_tasks;
2972    let mut next_cpu_shares = silo.config.cpu_shares;
2973    match key {
2974        "mem_max" => next_mem_max = value,
2975        "mem_min" => next_mem_min = value,
2976        "max_tasks" => {
2977            if value > u32::MAX as u64 {
2978                return Err(SyscallError::InvalidArgument);
2979            }
2980            next_max_tasks = value as u32;
2981        }
2982        "cpu_shares" => {
2983            if value > u32::MAX as u64 {
2984                return Err(SyscallError::InvalidArgument);
2985            }
2986            next_cpu_shares = value as u32;
2987        }
2988        _ => return Err(SyscallError::InvalidArgument),
2989    }
2990    if next_mem_max != 0 && next_mem_min > next_mem_max {
2991        return Err(SyscallError::InvalidArgument);
2992    }
2993    silo.config.mem_min = next_mem_min;
2994    silo.config.mem_max = next_mem_max;
2995    silo.config.max_tasks = next_max_tasks;
2996    silo.config.cpu_shares = next_cpu_shares;
2997    crate::audit::log(
2998        crate::audit::AuditCategory::Security,
2999        0,
3000        silo_id,
3001        alloc::format!("silo limit: {}={}", key, value),
3002    );
3003    Ok(silo_id)
3004}
3005
3006// ============================================================================
3007// Fault handling (called from exception handlers)
3008// ============================================================================
3009
3010/// Performs the dump user fault operation.
3011fn dump_user_fault(task_id: TaskId, reason: SiloFaultReason, extra: u64, subcode: u64, rip: u64) {
3012    let task_meta = crate::process::get_task_by_id(task_id).map(|task| {
3013        let state = task.get_state();
3014        let as_ref = task.process.address_space_arc();
3015        (
3016            task.pid,
3017            task.tid,
3018            task.name,
3019            state,
3020            as_ref.cr3().as_u64(),
3021            as_ref.is_kernel(),
3022        )
3023    });
3024
3025    if let Some((pid, tid, name, state, as_cr3, as_is_kernel)) = task_meta {
3026        crate::serial_force_println!(
3027            "\x1b[31m[handle_user_fault]\x1b[0m task={} \x1b[36mpid={}\x1b[0m tid={} name='{}' state={:?} reason={:?} \x1b[35mrip={:#x}\x1b[0m \x1b[35mextra={:#x}\x1b[0m subcode={:#x} as_cr3={:#x} as_kernel={}",
3028            task_id.as_u64(),
3029            pid,
3030            tid,
3031            name,
3032            state,
3033            reason,
3034            rip,
3035            extra,
3036            subcode,
3037            as_cr3,
3038            as_is_kernel
3039        );
3040    } else {
3041        crate::serial_force_println!(
3042            "\x1b[31m[handle_user_fault]\x1b[0m task={} reason={:?} \x1b[35mrip={:#x}\x1b[0m \x1b[35mextra={:#x}\x1b[0m subcode={:#x} (task metadata unavailable)",
3043            task_id.as_u64(),
3044            reason,
3045            rip,
3046            extra,
3047            subcode
3048        );
3049    }
3050
3051    if reason == SiloFaultReason::PageFault {
3052        let present = (subcode & 0x1) != 0;
3053        let write = (subcode & 0x2) != 0;
3054        let user = (subcode & 0x4) != 0;
3055        let reserved = (subcode & 0x8) != 0;
3056        let instr_fetch = (subcode & 0x10) != 0;
3057        let pkey = (subcode & 0x20) != 0;
3058        let shadow_stack = (subcode & 0x40) != 0;
3059        let sgx = (subcode & 0x8000) != 0;
3060        crate::serial_force_println!(
3061            "\x1b[31m[handle_user_fault]\x1b[0m \x1b[31mpagefault\x1b[0m \x1b[35maddr={:#x}\x1b[0m \x1b[35mrip={:#x}\x1b[0m ec={:#x} present={} write={} user={} reserved={} ifetch={} pkey={} shadow_stack={} sgx={}",
3062            extra,
3063            rip,
3064            subcode,
3065            present,
3066            write,
3067            user,
3068            reserved,
3069            instr_fetch,
3070            pkey,
3071            shadow_stack,
3072            sgx
3073        );
3074        if user && extra < 0x1000 {
3075            crate::serial_force_println!(
3076                "\x1b[31m[handle_user_fault]\x1b[0m \x1b[33mhint: low user address fault ({:#x}) -> probable NULL/near-NULL dereference\x1b[0m",
3077                extra
3078            );
3079        }
3080    } else {
3081        crate::serial_force_println!(
3082            "\x1b[31m[handle_user_fault]\x1b[0m \x1b[31mfault detail\x1b[0m \x1b[35mrip={:#x}\x1b[0m code={:#x}",
3083            rip,
3084            subcode
3085        );
3086    }
3087}
3088
3089/// Handles user fault.
3090pub fn handle_user_fault(
3091    task_id: TaskId,
3092    reason: SiloFaultReason,
3093    extra: u64,
3094    subcode: u64,
3095    rip: u64,
3096) {
3097    // FORCE OUTPUT for user fault - bypasses normal logging mutexes
3098    crate::serial_force_println!(
3099        "\x1b[31;1m[handle_user_fault] CRITICAL FAULT\x1b[0m: tid={} reason={:?} rip={:#x} addr={:#x} err={:#x}",
3100        task_id.as_u64(),
3101        reason,
3102        rip,
3103        extra,
3104        subcode
3105    );
3106
3107    dump_user_fault(task_id, reason, extra, subcode, rip);
3108
3109    // Best-effort: map task to silo, mark crashed, emit event, kill tasks.
3110    let tasks = {
3111        let mut mgr = SILO_MANAGER.lock();
3112        let silo_id = match mgr.silo_for_task(task_id) {
3113            Some(id) => id,
3114            None => {
3115                crate::serial_force_println!(
3116                    "[handle_user_fault] Non-silo task {} crashed (reason={:?})! Killing it.",
3117                    task_id.as_u64(),
3118                    reason
3119                );
3120                drop(mgr);
3121                crate::process::kill_task(task_id);
3122                return;
3123            }
3124        };
3125        let mut tasks = Vec::new();
3126        {
3127            if let Ok(silo) = mgr.get_mut(silo_id) {
3128                silo.state = SiloState::Crashed;
3129                tasks = silo.tasks.clone();
3130                silo.tasks.clear();
3131                silo.event_seq = silo.event_seq.wrapping_add(1);
3132            }
3133        }
3134        for tid in &tasks {
3135            mgr.unmap_task(*tid);
3136        }
3137        mgr.push_event(SiloEvent {
3138            silo_id: silo_id.into(),
3139            kind: SiloEventKind::Crashed,
3140            data0: pack_fault(reason, subcode),
3141            data1: extra,
3142            tick: crate::process::scheduler::ticks(),
3143        });
3144        tasks
3145    };
3146
3147    for tid in &tasks {
3148        crate::process::kill_task(*tid);
3149    }
3150    if !tasks.contains(&task_id) {
3151        crate::process::kill_task(task_id);
3152    }
3153}