Skip to main content

strat9_kernel/vfs/
mod.rs

1//! Virtual File System (VFS) - Plan 9-inspired namespace.
2//!
3//! The VFS provides:
4//! - Scheme abstraction: Pluggable backends (IPC, kernel, devices)
5//! - Mount table: Map path prefixes to schemes
6//! - File descriptors: Per-process FD tables
7//! - Path resolution: Navigate the namespace hierarchy
8//!
9//! ## Architecture
10//!
11//! ```text
12//! User syscall (open "/net/tcp/0")
13//!      ↓
14//! VFS::open() — path resolution
15//!      ↓
16//! MountTable::resolve() → ("/net" → IpcScheme, "tcp/0")
17//!      ↓
18//! IpcScheme::open("tcp/0") → IPC message to network stack
19//!      ↓
20//! OpenFile created with scheme reference + file_id
21//!      ↓
22//! FD allocated in process FD table
23//!      ↓
24//! Returns FD to userspace
25//! ```
26
27pub mod blkdev_scheme;
28pub mod console_scheme;
29pub mod fd;
30pub mod file;
31pub mod ipcfs;
32pub mod mount;
33pub mod pipe;
34pub mod procfs;
35pub mod pty_scheme;
36pub mod ramfs_scheme;
37pub mod scheme;
38pub mod scheme_router;
39
40use crate::{process::current_task_clone, sync::SpinLock, syscall::error::SyscallError};
41use alloc::{boxed::Box, string::String, sync::Arc};
42use core::fmt::Write;
43
44pub use blkdev_scheme::BlkDevScheme;
45pub use fd::{FileDescriptorTable, STDERR, STDIN, STDOUT};
46pub use file::OpenFile;
47pub use mount::{list_mounts, mount, resolve, unmount, Namespace};
48pub use pipe::PipeScheme;
49pub use procfs::ProcScheme;
50pub use ramfs_scheme::RamfsScheme;
51pub use scheme::{
52    DirEntry, DynScheme, FileFlags, FileStat, IpcScheme, KernelScheme, OpenFlags, Scheme,
53};
54pub use scheme_router::{
55    init_builtin_schemes, list_schemes, mount_scheme, register_initfs_file, register_scheme,
56};
57
58use crate::memory::{UserSliceRead, UserSliceWrite};
59
60// ============================================================================
61// High-level VFS API
62// ============================================================================
63
64/// Open a file and return a file descriptor.
65///
66/// This is the main entry point for opening files from userspace.
67pub fn open(path: &str, flags: OpenFlags) -> Result<u32, SyscallError> {
68    // Resolve path to (scheme, relative_path)
69    let (scheme, relative_path) = mount::resolve(path)?;
70
71    // Open the file via the scheme
72    let open_result = scheme.open(&relative_path, flags)?;
73
74    // Create OpenFile wrapper
75    let open_file = Arc::new(OpenFile::new(
76        scheme,
77        open_result.file_id,
78        String::from(path),
79        flags,
80        open_result.flags,
81        open_result.size,
82    ));
83
84    // Insert into current task's FD table
85    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
86    // SAFETY: We're in syscall context, have exclusive access to FD table
87    let fd = unsafe { (&mut *task.process.fd_table.get()).insert(open_file) };
88
89    Ok(fd)
90}
91
92/// Create a directory.
93pub fn mkdir(path: &str, mode: u32) -> Result<(), SyscallError> {
94    let (scheme, relative_path) = mount::resolve(path)?;
95    scheme.create_directory(&relative_path, mode)?;
96    Ok(())
97}
98
99/// Create an empty regular file.
100pub fn create_file(path: &str, mode: u32) -> Result<(), SyscallError> {
101    let (scheme, relative_path) = mount::resolve(path)?;
102    scheme.create_file(&relative_path, mode)?;
103    Ok(())
104}
105
106/// Remove a file or directory.
107pub fn unlink(path: &str) -> Result<(), SyscallError> {
108    let (scheme, relative_path) = mount::resolve(path)?;
109    scheme.unlink(&relative_path)?;
110    Ok(())
111}
112
113/// Rename a file or directory (must be within the same mount).
114pub fn rename(old_path: &str, new_path: &str) -> Result<(), SyscallError> {
115    let (scheme, old_rel) = mount::resolve(old_path)?;
116    let (scheme2, new_rel) = mount::resolve(new_path)?;
117    if !Arc::ptr_eq(&scheme, &scheme2) {
118        return Err(SyscallError::NotSupported);
119    }
120    scheme.rename(&old_rel, &new_rel)
121}
122
123/// Change permission bits on a path.
124pub fn chmod(path: &str, mode: u32) -> Result<(), SyscallError> {
125    let (scheme, relative_path) = mount::resolve(path)?;
126    scheme.chmod(&relative_path, mode)
127}
128
129/// Change permission bits on an open fd.
130pub fn fchmod(fd: u32, mode: u32) -> Result<(), SyscallError> {
131    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
132    let fd_table = unsafe { &*task.process.fd_table.get() };
133    let file = fd_table.get(fd)?;
134    file.scheme().fchmod(file.file_id(), mode)
135}
136
137/// Truncate a file by path.
138pub fn truncate(path: &str, length: u64) -> Result<(), SyscallError> {
139    let (scheme, relative_path) = mount::resolve(path)?;
140    let res = scheme.open(&relative_path, OpenFlags::WRITE)?;
141    let r = scheme.truncate(res.file_id, length);
142    let _ = scheme.close(res.file_id);
143    r
144}
145
146/// Truncate an open fd.
147pub fn ftruncate(fd: u32, length: u64) -> Result<(), SyscallError> {
148    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
149    let fd_table = unsafe { &*task.process.fd_table.get() };
150    let file = fd_table.get(fd)?;
151    file.scheme().truncate(file.file_id(), length)
152}
153
154/// Create a hard link (must be within the same mount).
155pub fn link(old_path: &str, new_path: &str) -> Result<(), SyscallError> {
156    let (scheme, old_rel) = mount::resolve(old_path)?;
157    let (scheme2, new_rel) = mount::resolve(new_path)?;
158    if !Arc::ptr_eq(&scheme, &scheme2) {
159        return Err(SyscallError::NotSupported);
160    }
161    scheme.link(&old_rel, &new_rel)
162}
163
164/// Create a symbolic link.
165pub fn symlink(target: &str, link_path: &str) -> Result<(), SyscallError> {
166    let (scheme, link_rel) = mount::resolve(link_path)?;
167    scheme.symlink(target, &link_rel)
168}
169
170/// Read the target of a symbolic link.
171pub fn readlink(path: &str) -> Result<String, SyscallError> {
172    let (scheme, relative_path) = mount::resolve(path)?;
173    scheme.readlink(&relative_path)
174}
175
176/// Read from a file descriptor.
177pub fn read(fd: u32, buf: &mut [u8]) -> Result<usize, SyscallError> {
178    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
179    // SAFETY: Syscall context
180    let fd_table = unsafe { &*task.process.fd_table.get() };
181    let file = fd_table.get(fd)?;
182    file.read(buf)
183}
184
185/// Write to a file descriptor.
186pub fn write(fd: u32, buf: &[u8]) -> Result<usize, SyscallError> {
187    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
188    // SAFETY: Syscall context
189    let fd_table = unsafe { &*task.process.fd_table.get() };
190    let file = fd_table.get(fd)?;
191    file.write(buf)
192}
193
194/// Close a file descriptor.
195///
196/// Removes the fd from the table.  If this was the last Arc<OpenFile> reference
197/// (no dup'd / fork'd copies remain) the Drop impl will call scheme.close().
198pub fn close(fd: u32) -> Result<(), SyscallError> {
199    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
200    // SAFETY: Syscall context
201    let fd_table = unsafe { &mut *task.process.fd_table.get() };
202    let _file = fd_table.remove(fd)?;
203    Ok(())
204    // _file (Arc<OpenFile>) is dropped here; if refcount → 0, Drop fires → scheme.close()
205}
206
207/// Seek within a file.
208pub fn seek(fd: u32, offset: u64) -> Result<u64, SyscallError> {
209    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
210    // SAFETY: Syscall context
211    let fd_table = unsafe { &*task.process.fd_table.get() };
212    let file = fd_table.get(fd)?;
213    file.seek(offset)
214}
215
216/// Get current offset in a file.
217pub fn tell(fd: u32) -> Result<u64, SyscallError> {
218    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
219    // SAFETY: Syscall context
220    let fd_table = unsafe { &*task.process.fd_table.get() };
221    let file = fd_table.get(fd)?;
222    Ok(file.tell())
223}
224
225/// Get file size.
226pub fn fsize(fd: u32) -> Result<u64, SyscallError> {
227    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
228    // SAFETY: Syscall context
229    let fd_table = unsafe { &*task.process.fd_table.get() };
230    let file = fd_table.get(fd)?;
231    file.size()
232}
233
234/// Sync file to storage.
235pub fn fsync(fd: u32) -> Result<(), SyscallError> {
236    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
237    // SAFETY: Syscall context
238    let fd_table = unsafe { &*task.process.fd_table.get() };
239    let file = fd_table.get(fd)?;
240    file.sync()
241}
242
243/// POSIX lseek on a file descriptor.
244pub fn lseek(fd: u32, offset: i64, whence: u32) -> Result<u64, SyscallError> {
245    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
246    let fd_table = unsafe { &*task.process.fd_table.get() };
247    let file = fd_table.get(fd)?;
248    file.lseek(offset, whence)
249}
250
251/// fstat on an open file descriptor.
252pub fn fstat(fd: u32) -> Result<FileStat, SyscallError> {
253    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
254    let fd_table = unsafe { &*task.process.fd_table.get() };
255    let file = fd_table.get(fd)?;
256    file.stat()
257}
258
259/// stat by path (opens, stats, closes).
260pub fn stat_path(path: &str) -> Result<FileStat, SyscallError> {
261    let (scheme, relative_path) = mount::resolve(path)?;
262    let open_result = scheme.open(&relative_path, OpenFlags::READ)?;
263    let result = scheme.stat(open_result.file_id);
264    let _ = scheme.close(open_result.file_id);
265    result
266}
267
268/// Read directory entries from an open directory fd.
269pub fn getdents(fd: u32) -> Result<alloc::vec::Vec<DirEntry>, SyscallError> {
270    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
271    let fd_table = unsafe { &*task.process.fd_table.get() };
272    let file = fd_table.get(fd)?;
273    file.readdir()
274}
275
276/// Create a background stdin: a pipe read-end whose write end is immediately
277/// closed.  Any `read()` on the returned file will return 0 (EOF) at once,
278/// preventing processes launched in the background from blocking on stdin or
279/// spinning on EBADF.
280pub fn create_background_stdin() -> Arc<OpenFile> {
281    let pipe_scheme = get_pipe_scheme();
282    let (base_id, pipe) = pipe_scheme.create_pipe();
283
284    // Close write end now (refcount → 0 → write_closed = true).
285    // Subsequent reads on the read end will return EOF immediately.
286    pipe.close_write();
287
288    let dyn_scheme: DynScheme = pipe_scheme as Arc<dyn Scheme>;
289    Arc::new(OpenFile::new(
290        dyn_scheme,
291        base_id, // even = read end
292        String::from("pipe:[bg-stdin]"),
293        OpenFlags::READ,
294        FileFlags::PIPE,
295        None,
296    ))
297}
298
299/// Create a pipe, returning (read_fd, write_fd).
300pub fn pipe() -> Result<(u32, u32), SyscallError> {
301    let pipe_scheme = get_pipe_scheme();
302    let (base_id, _pipe) = pipe_scheme.create_pipe();
303
304    let dyn_scheme: DynScheme = pipe_scheme as Arc<dyn Scheme>;
305
306    let read_file = Arc::new(OpenFile::new(
307        dyn_scheme.clone(),
308        base_id,
309        String::from("pipe:[read]"),
310        OpenFlags::READ,
311        FileFlags::PIPE,
312        None,
313    ));
314    let write_file = Arc::new(OpenFile::new(
315        dyn_scheme.clone(),
316        base_id + 1,
317        String::from("pipe:[write]"),
318        OpenFlags::WRITE,
319        FileFlags::PIPE,
320        None,
321    ));
322
323    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
324    let fd_table = unsafe { &mut *task.process.fd_table.get() };
325    let read_fd = fd_table.insert(read_file);
326    let write_fd = fd_table.insert(write_file);
327
328    Ok((read_fd, write_fd))
329}
330
331/// Duplicate a file descriptor (POSIX dup).
332pub fn dup(old_fd: u32) -> Result<u32, SyscallError> {
333    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
334    let fd_table = unsafe { &mut *task.process.fd_table.get() };
335    fd_table.duplicate(old_fd)
336}
337
338/// Duplicate a file descriptor to a specific number (POSIX dup2).
339pub fn dup2(old_fd: u32, new_fd: u32) -> Result<u32, SyscallError> {
340    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
341    let fd_table = unsafe { &mut *task.process.fd_table.get() };
342    fd_table.duplicate_to(old_fd, new_fd)
343}
344
345/// Read all remaining bytes from a file descriptor.
346pub fn read_all(fd: u32) -> Result<alloc::vec::Vec<u8>, SyscallError> {
347    let mut out = alloc::vec::Vec::new();
348    let mut buf = [0u8; 4096];
349    loop {
350        let n = read(fd, &mut buf)?;
351        if n == 0 {
352            break;
353        }
354        out.extend_from_slice(&buf[..n]);
355    }
356    Ok(out)
357}
358
359// ============================================================================
360// Syscall Handlers (Native ABI)
361// ============================================================================
362
363/// Syscall handler for opening a file.
364pub fn sys_open(path_ptr: u64, path_len: u64, flags: u64) -> Result<u64, SyscallError> {
365    const MAX_PATH_LEN: usize = 4096;
366    if path_len == 0 || path_len as usize > MAX_PATH_LEN {
367        return Err(SyscallError::InvalidArgument);
368    }
369
370    let raw = read_user_path(path_ptr, path_len)?;
371    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
372    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
373    let path = resolve_path(&raw, &cwd);
374
375    let open_flags = OpenFlags::from_bits_truncate(flags as u32);
376
377    let want_read =
378        open_flags.contains(OpenFlags::READ) || open_flags.contains(OpenFlags::DIRECTORY);
379    let want_write = open_flags.contains(OpenFlags::WRITE)
380        || open_flags.contains(OpenFlags::CREATE)
381        || open_flags.contains(OpenFlags::TRUNCATE)
382        || open_flags.contains(OpenFlags::APPEND);
383    crate::silo::enforce_path_for_current_task(&path, want_read, want_write, false)?;
384
385    let fd = open(&path, open_flags)?;
386    Ok(fd as u64)
387}
388
389/// Syscall handler for reading from a file.
390pub fn sys_read(fd: u32, buf_ptr: u64, buf_len: u64) -> Result<u64, SyscallError> {
391    if buf_len == 0 {
392        return Ok(0);
393    }
394
395    // Read directly into chunks to avoid large kernel allocations
396    let mut kbuf = [0u8; 4096];
397    let mut total_read = 0;
398
399    while total_read < buf_len as usize {
400        let to_read = core::cmp::min(kbuf.len(), buf_len as usize - total_read);
401        let n = read(fd, &mut kbuf[..to_read])?;
402        if n == 0 {
403            break;
404        }
405
406        let chunk_user = UserSliceWrite::new(buf_ptr + total_read as u64, n)?;
407        chunk_user.copy_from(&kbuf[..n]);
408
409        total_read += n;
410        if n < to_read {
411            break;
412        }
413    }
414
415    Ok(total_read as u64)
416}
417
418/// Syscall handler for writing to a file.
419pub fn sys_write(fd: u32, buf_ptr: u64, buf_len: u64) -> Result<u64, SyscallError> {
420    if buf_len == 0 {
421        return Ok(0);
422    }
423
424    // For stdout/stderr, fall back to direct console output only when no
425    // FD entry exists (early boot).  Once the FD table is populated (or
426    // after dup2 redirection) the normal VFS path is used.
427    if fd == 1 || fd == 2 {
428        let use_console = match current_task_clone() {
429            Some(t) => {
430                let fd_table = unsafe { &*t.process.fd_table.get() };
431                !fd_table.contains(fd)
432            }
433            None => true,
434        };
435        if use_console {
436            crate::silo::enforce_console_access()?;
437            let len = core::cmp::min(buf_len as usize, 16 * 1024);
438            let mut kbuf = [0u8; 4096];
439            let mut total_written = 0;
440            while total_written < len {
441                let to_write = core::cmp::min(kbuf.len(), len - total_written);
442                let chunk = UserSliceRead::new(buf_ptr + total_written as u64, to_write)?;
443                let n = chunk.copy_to(&mut kbuf[..to_write]);
444                if crate::arch::x86_64::vga::is_available() {
445                    if let Ok(s) = core::str::from_utf8(&kbuf[..n]) {
446                        crate::serial_print!("{}", s);
447                        crate::vga_print!("{}", s);
448                    } else {
449                        for &byte in &kbuf[..n] {
450                            crate::serial_print!("{}", byte as char);
451                        }
452                    }
453                } else {
454                    for &byte in &kbuf[..n] {
455                        crate::serial_print!("{}", byte as char);
456                    }
457                }
458                total_written += n;
459            }
460            return Ok(total_written as u64);
461        }
462    }
463
464    let mut kbuf = [0u8; 4096];
465    let mut total_written = 0;
466
467    while total_written < buf_len as usize {
468        let to_write = core::cmp::min(kbuf.len(), buf_len as usize - total_written);
469        let chunk_user = UserSliceRead::new(buf_ptr + total_written as u64, to_write)?;
470        chunk_user.copy_to(&mut kbuf[..to_write]);
471
472        let n = write(fd, &kbuf[..to_write])?;
473        total_written += n;
474        if n < to_write {
475            break;
476        }
477    }
478
479    Ok(total_written as u64)
480}
481
482/// Syscall handler for closing a file.
483pub fn sys_close(fd: u32) -> Result<u64, SyscallError> {
484    close(fd)?;
485    Ok(0)
486}
487
488/// Syscall handler for lseek.
489pub fn sys_lseek(fd: u32, offset: i64, whence: u32) -> Result<u64, SyscallError> {
490    lseek(fd, offset, whence)
491}
492
493/// Syscall handler for fstat.
494pub fn sys_fstat(fd: u32, stat_ptr: u64) -> Result<u64, SyscallError> {
495    let st = fstat(fd)?;
496    let user = UserSliceWrite::new(stat_ptr, core::mem::size_of::<FileStat>())?;
497    let bytes = unsafe {
498        core::slice::from_raw_parts(
499            &st as *const FileStat as *const u8,
500            core::mem::size_of::<FileStat>(),
501        )
502    };
503    user.copy_from(bytes);
504    Ok(0)
505}
506
507/// Syscall handler for stat (by path).
508pub fn sys_stat(path_ptr: u64, path_len: u64, stat_ptr: u64) -> Result<u64, SyscallError> {
509    const MAX_PATH_LEN: usize = 4096;
510    if path_len == 0 || path_len as usize > MAX_PATH_LEN {
511        return Err(SyscallError::InvalidArgument);
512    }
513    let raw = read_user_path(path_ptr, path_len)?;
514    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
515    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
516    let path = resolve_path(&raw, &cwd);
517    crate::silo::enforce_path_for_current_task(&path, true, false, false)?;
518
519    let st = stat_path(&path)?;
520    let user_out = UserSliceWrite::new(stat_ptr, core::mem::size_of::<FileStat>())?;
521    let out_bytes = unsafe {
522        core::slice::from_raw_parts(
523            &st as *const FileStat as *const u8,
524            core::mem::size_of::<FileStat>(),
525        )
526    };
527    user_out.copy_from(out_bytes);
528    Ok(0)
529}
530
531/// Syscall handler for getdents.
532///
533/// Writes a packed array of `KernelDirent` entries into the user buffer.
534/// Returns the number of bytes written.
535pub fn sys_getdents(fd: u32, buf_ptr: u64, buf_len: u64) -> Result<u64, SyscallError> {
536    use strat9_abi::data::DirentHeader;
537
538    let entries = getdents(fd)?;
539    let mut offset: usize = 0;
540    let buf_size = buf_len as usize;
541
542    for entry in &entries {
543        let name_bytes = entry.name.as_bytes();
544        let name_len = core::cmp::min(name_bytes.len(), 255) as u16;
545        let entry_size = DirentHeader::SIZE + name_len as usize + 1;
546
547        if offset + entry_size > buf_size {
548            break;
549        }
550
551        let user = UserSliceWrite::new(buf_ptr + offset as u64, entry_size)?;
552        let mut kbuf = [0u8; 268];
553        kbuf[0..8].copy_from_slice(&entry.ino.to_le_bytes());
554        kbuf[8] = entry.file_type;
555        kbuf[9..11].copy_from_slice(&name_len.to_le_bytes());
556        kbuf[11] = 0; // DirentHeader::_padding
557        kbuf[12..12 + name_len as usize].copy_from_slice(&name_bytes[..name_len as usize]);
558        kbuf[12 + name_len as usize] = 0;
559        user.copy_from(&kbuf[..entry_size]);
560
561        offset += entry_size;
562    }
563
564    Ok(offset as u64)
565}
566
567/// Syscall handler for pipe.
568pub fn sys_pipe(fds_ptr: u64) -> Result<u64, SyscallError> {
569    let (read_fd, write_fd) = pipe()?;
570    let user = UserSliceWrite::new(fds_ptr, 8)?; // 2 x u32
571    let mut buf = [0u8; 8];
572    buf[0..4].copy_from_slice(&read_fd.to_le_bytes());
573    buf[4..8].copy_from_slice(&write_fd.to_le_bytes());
574    user.copy_from(&buf);
575    Ok(0)
576}
577
578/// Syscall handler for dup.
579pub fn sys_dup(old_fd: u32) -> Result<u64, SyscallError> {
580    let new_fd = dup(old_fd)?;
581    Ok(new_fd as u64)
582}
583
584/// Syscall handler for dup2.
585pub fn sys_dup2(old_fd: u32, new_fd: u32) -> Result<u64, SyscallError> {
586    let fd = dup2(old_fd, new_fd)?;
587    Ok(fd as u64)
588}
589
590// ─── Path helpers ─────────────────────────────────────────────────────────────
591
592/// Read a NUL-terminated or length-bounded path from user space.
593///
594/// `path_ptr` and `path_len` come directly from syscall arguments.
595/// If `path_len` is 0 the string is assumed to be NUL-terminated up to 4096 bytes.
596fn read_user_path(path_ptr: u64, path_len: u64) -> Result<alloc::string::String, SyscallError> {
597    const MAX_PATH: usize = 4096;
598    let len = if path_len == 0 || path_len as usize > MAX_PATH {
599        MAX_PATH
600    } else {
601        path_len as usize
602    };
603    let user = UserSliceRead::new(path_ptr, len)?;
604    let bytes = user.read_to_vec();
605    // Trim at first NUL byte if present.
606    let trimmed = bytes.split(|&b| b == 0).next().unwrap_or(&bytes);
607    if trimmed.is_empty() {
608        return Err(SyscallError::InvalidArgument);
609    }
610    core::str::from_utf8(trimmed)
611        .map(|s| alloc::string::String::from(s))
612        .map_err(|_| SyscallError::InvalidArgument)
613}
614
615/// Resolve `path` relative to the current working directory when it is not
616/// absolute. Returns the normalized absolute path.
617fn resolve_path(path: &str, cwd: &str) -> alloc::string::String {
618    let raw = if path.starts_with('/') {
619        alloc::string::String::from(path)
620    } else if cwd.ends_with('/') {
621        alloc::format!("{}{}", cwd, path)
622    } else {
623        alloc::format!("{}/{}", cwd, path)
624    };
625    normalize_path(&raw)
626}
627
628/// Collapse `.`, `..` and duplicate `/` in an absolute path.
629fn normalize_path(path: &str) -> alloc::string::String {
630    let mut parts: alloc::vec::Vec<&str> = alloc::vec::Vec::new();
631    for seg in path.split('/') {
632        match seg {
633            "" | "." => {}
634            ".." => {
635                parts.pop();
636            }
637            other => parts.push(other),
638        }
639    }
640    let mut out = alloc::string::String::with_capacity(path.len());
641    if parts.is_empty() {
642        out.push('/');
643    } else {
644        for p in &parts {
645            out.push('/');
646            out.push_str(p);
647        }
648    }
649    out
650}
651
652// ─── New VFS syscall handlers ─────────────────────────────────────────────────
653
654/// SYS_CHDIR (440): Change current working directory.
655pub fn sys_chdir(path_ptr: u64, path_len: u64) -> Result<u64, SyscallError> {
656    let raw = read_user_path(path_ptr, path_len)?;
657    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
658    let cwd = unsafe { &*task.process.cwd.get() };
659    let abs = resolve_path(&raw, cwd);
660    crate::silo::enforce_path_for_current_task(&abs, true, false, false)?;
661
662    let (scheme, rel) = mount::resolve(&abs)?;
663    let res = scheme.open(&rel, OpenFlags::READ | OpenFlags::DIRECTORY)?;
664    let _ = scheme.close(res.file_id);
665
666    unsafe { *task.process.cwd.get() = abs };
667    Ok(0)
668}
669
670/// SYS_FCHDIR (441): Change cwd using an open file descriptor.
671pub fn sys_fchdir(fd: u32) -> Result<u64, SyscallError> {
672    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
673    let path = {
674        let fd_table = unsafe { &*task.process.fd_table.get() };
675        let file = fd_table.get(fd)?;
676        alloc::string::String::from(file.path())
677    };
678    unsafe { *task.process.cwd.get() = path };
679    Ok(0)
680}
681
682/// SYS_GETCWD (442): Write the current working directory into a user buffer.
683pub fn sys_getcwd(buf_ptr: u64, buf_len: u64) -> Result<u64, SyscallError> {
684    if buf_len == 0 {
685        return Err(SyscallError::InvalidArgument);
686    }
687    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
688    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
689    let bytes = cwd.as_bytes();
690    let needed = bytes.len() + 1; // include NUL terminator
691    if needed > buf_len as usize {
692        return Err(SyscallError::Range);
693    }
694    let out = UserSliceWrite::new(buf_ptr, needed)?;
695    let mut tmp = alloc::vec![0u8; needed];
696    tmp[..bytes.len()].copy_from_slice(bytes);
697    tmp[bytes.len()] = 0;
698    out.copy_from(&tmp);
699    Ok(needed as u64) // Like Linux: returns byte count written (including NUL)
700}
701
702/// SYS_IOCTL (443): I/O control — stub.
703///
704/// Returns ENOTTY for all file descriptors that are not character devices.
705/// Terminal / PTY support will be added when a TTY driver is implemented.
706pub fn sys_ioctl(_fd: u32, _request: u64, _arg: u64) -> Result<u64, SyscallError> {
707    Err(SyscallError::NotATty)
708}
709
710/// SYS_UMASK (444): Set file creation mask; return the old mask.
711pub fn sys_umask(mask: u64) -> Result<u64, SyscallError> {
712    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
713    let old = task
714        .process
715        .umask
716        .swap(mask as u32 & 0o777, core::sync::atomic::Ordering::Relaxed);
717    Ok(old as u64)
718}
719
720/// SYS_UNLINK (445): Remove a file.
721pub fn sys_unlink(path_ptr: u64, path_len: u64) -> Result<u64, SyscallError> {
722    let raw = read_user_path(path_ptr, path_len)?;
723    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
724    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
725    let abs = resolve_path(&raw, &cwd);
726    crate::silo::enforce_path_for_current_task(&abs, false, true, false)?;
727    unlink(&abs)?;
728    Ok(0)
729}
730
731/// SYS_RMDIR (446): Remove an empty directory.
732pub fn sys_rmdir(path_ptr: u64, path_len: u64) -> Result<u64, SyscallError> {
733    let raw = read_user_path(path_ptr, path_len)?;
734    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
735    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
736    let abs = resolve_path(&raw, &cwd);
737    crate::silo::enforce_path_for_current_task(&abs, false, true, false)?;
738    unlink(&abs)?;
739    Ok(0)
740}
741
742/// SYS_MKDIR (447): Create a directory.
743pub fn sys_mkdir(path_ptr: u64, path_len: u64, mode: u64) -> Result<u64, SyscallError> {
744    let raw = read_user_path(path_ptr, path_len)?;
745    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
746    let umask = task
747        .process
748        .umask
749        .load(core::sync::atomic::Ordering::Relaxed);
750    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
751    let abs = resolve_path(&raw, &cwd);
752    crate::silo::enforce_path_for_current_task(&abs, false, true, false)?;
753    let effective_mode = (mode as u32) & !umask;
754    mkdir(&abs, effective_mode)?;
755    Ok(0)
756}
757
758/// SYS_RENAME (448): Rename a file or directory.
759pub fn sys_rename(
760    old_ptr: u64,
761    old_len: u64,
762    new_ptr: u64,
763    new_len: u64,
764) -> Result<u64, SyscallError> {
765    let old = read_user_path(old_ptr, old_len)?;
766    let new = read_user_path(new_ptr, new_len)?;
767    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
768    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
769    let old_abs = resolve_path(&old, &cwd);
770    let new_abs = resolve_path(&new, &cwd);
771    crate::silo::enforce_path_for_current_task(&old_abs, true, true, false)?;
772    crate::silo::enforce_path_for_current_task(&new_abs, false, true, false)?;
773    rename(&old_abs, &new_abs)?;
774    Ok(0)
775}
776
777/// SYS_LINK (449): Create a hard link.
778pub fn sys_link(
779    old_ptr: u64,
780    old_len: u64,
781    new_ptr: u64,
782    new_len: u64,
783) -> Result<u64, SyscallError> {
784    let old = read_user_path(old_ptr, old_len)?;
785    let new = read_user_path(new_ptr, new_len)?;
786    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
787    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
788    let old_abs = resolve_path(&old, &cwd);
789    let new_abs = resolve_path(&new, &cwd);
790    crate::silo::enforce_path_for_current_task(&old_abs, true, false, false)?;
791    crate::silo::enforce_path_for_current_task(&new_abs, false, true, false)?;
792    link(&old_abs, &new_abs)?;
793    Ok(0)
794}
795
796/// SYS_SYMLINK (450): Create a symbolic link.
797pub fn sys_symlink(
798    target_ptr: u64,
799    target_len: u64,
800    linkpath_ptr: u64,
801    linkpath_len: u64,
802) -> Result<u64, SyscallError> {
803    let target = read_user_path(target_ptr, target_len)?;
804    let linkpath = read_user_path(linkpath_ptr, linkpath_len)?;
805    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
806    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
807    let link_abs = resolve_path(&linkpath, &cwd);
808    crate::silo::enforce_path_for_current_task(&link_abs, false, true, false)?;
809    symlink(&target, &link_abs)?;
810    Ok(0)
811}
812
813/// SYS_READLINK (451): Read a symbolic link.
814pub fn sys_readlink(
815    path_ptr: u64,
816    path_len: u64,
817    buf_ptr: u64,
818    buf_len: u64,
819) -> Result<u64, SyscallError> {
820    let path = read_user_path(path_ptr, path_len)?;
821    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
822    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
823    let abs = resolve_path(&path, &cwd);
824    crate::silo::enforce_path_for_current_task(&abs, true, false, false)?;
825    let target = readlink(&abs)?;
826    let bytes = target.as_bytes();
827    let n = bytes.len().min(buf_len as usize);
828    let user = UserSliceWrite::new(buf_ptr, n)?;
829    user.copy_from(&bytes[..n]);
830    Ok(n as u64)
831}
832
833/// SYS_CHMOD (452): Change file mode bits.
834pub fn sys_chmod(path_ptr: u64, path_len: u64, mode: u64) -> Result<u64, SyscallError> {
835    let path = read_user_path(path_ptr, path_len)?;
836    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
837    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
838    let abs = resolve_path(&path, &cwd);
839    crate::silo::enforce_path_for_current_task(&abs, false, true, false)?;
840    chmod(&abs, mode as u32)?;
841    Ok(0)
842}
843
844/// SYS_FCHMOD (453): Change file mode bits on open fd.
845pub fn sys_fchmod(fd: u32, mode: u64) -> Result<u64, SyscallError> {
846    fchmod(fd, mode as u32)?;
847    Ok(0)
848}
849
850/// SYS_TRUNCATE (454): Truncate file to given length.
851pub fn sys_truncate(path_ptr: u64, path_len: u64, length: u64) -> Result<u64, SyscallError> {
852    let path = read_user_path(path_ptr, path_len)?;
853    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
854    let cwd = unsafe { (&*task.process.cwd.get()).clone() };
855    let abs = resolve_path(&path, &cwd);
856    crate::silo::enforce_path_for_current_task(&abs, false, true, false)?;
857    truncate(&abs, length)?;
858    Ok(0)
859}
860
861/// SYS_FTRUNCATE (455): Truncate open fd to given length.
862pub fn sys_ftruncate(fd: u32, length: u64) -> Result<u64, SyscallError> {
863    ftruncate(fd, length)?;
864    Ok(0)
865}
866
867/// SYS_PREAD (456): Read at offset without changing fd position.
868pub fn sys_pread(fd: u32, buf_ptr: u64, buf_len: u64, offset: u64) -> Result<u64, SyscallError> {
869    if buf_len == 0 {
870        return Ok(0);
871    }
872    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
873    let fd_table = unsafe { &*task.process.fd_table.get() };
874    let file = fd_table.get(fd)?;
875    let mut kbuf = [0u8; 4096];
876    let mut total = 0usize;
877    let mut off = offset;
878    while total < buf_len as usize {
879        let to_read = core::cmp::min(kbuf.len(), buf_len as usize - total);
880        let n = file.pread(off, &mut kbuf[..to_read])?;
881        if n == 0 {
882            break;
883        }
884        let user = UserSliceWrite::new(buf_ptr + total as u64, n)?;
885        user.copy_from(&kbuf[..n]);
886        total += n;
887        off += n as u64;
888        if n < to_read {
889            break;
890        }
891    }
892    Ok(total as u64)
893}
894
895/// SYS_PWRITE (457): Write at offset without changing fd position.
896pub fn sys_pwrite(fd: u32, buf_ptr: u64, buf_len: u64, offset: u64) -> Result<u64, SyscallError> {
897    if buf_len == 0 {
898        return Ok(0);
899    }
900    let task = current_task_clone().ok_or(SyscallError::PermissionDenied)?;
901    let fd_table = unsafe { &*task.process.fd_table.get() };
902    let file = fd_table.get(fd)?;
903    let mut kbuf = [0u8; 4096];
904    let mut total = 0usize;
905    let mut off = offset;
906    while total < buf_len as usize {
907        let to_write = core::cmp::min(kbuf.len(), buf_len as usize - total);
908        let user = UserSliceRead::new(buf_ptr + total as u64, to_write)?;
909        user.copy_to(&mut kbuf[..to_write]);
910        let n = file.pwrite(off, &kbuf[..to_write])?;
911        total += n;
912        off += n as u64;
913        if n < to_write {
914            break;
915        }
916    }
917    Ok(total as u64)
918}
919
920// ============================================================================
921// Global PipeScheme singleton
922// ============================================================================
923
924static PIPE_SCHEME: SpinLock<Option<Arc<PipeScheme>>> = SpinLock::new(None);
925
926/// Returns pipe scheme.
927fn get_pipe_scheme() -> Arc<PipeScheme> {
928    let mut guard = PIPE_SCHEME.lock();
929    if let Some(ref scheme) = *guard {
930        return scheme.clone();
931    }
932    let scheme = Arc::new(PipeScheme::new());
933    *guard = Some(scheme.clone());
934    scheme
935}
936
937/// Performs the build pci inventory text operation.
938fn build_pci_inventory_text() -> String {
939    #[cfg(target_arch = "x86_64")]
940    {
941        let devices = crate::hardware::pci_client::all_devices();
942        let mut out = String::new();
943        out.push_str("bus dev fn vendor device class subclass prog_if irq\n");
944        for dev in devices.iter() {
945            let _ = writeln!(
946                out,
947                "{:02x} {:02x} {} {:04x} {:04x} {:02x} {:02x} {:02x} {}",
948                dev.address.bus,
949                dev.address.device,
950                dev.address.function,
951                dev.vendor_id,
952                dev.device_id,
953                dev.class_code,
954                dev.subclass,
955                dev.prog_if,
956                dev.interrupt_line
957            );
958        }
959        return out;
960    }
961
962    #[cfg(not(target_arch = "x86_64"))]
963    {
964        String::from("unsupported-arch\n")
965    }
966}
967
968// ============================================================================
969// Initialization
970// ============================================================================
971
972/// Initialize the VFS with default mounts.
973pub fn init() {
974    log::info!("[VFS] Initializing virtual file system");
975
976    // ── Root filesystem (RamFS on "/") ────────────────────────────────────
977    // Must be mounted before any other scheme so that longest-prefix resolution
978    // falls back to "/" for paths not covered by a more specific mount point.
979    let rootfs = alloc::sync::Arc::new(RamfsScheme::new());
980    if let Err(e) = mount::mount("/", rootfs.clone()) {
981        log::error!("[VFS] Failed to mount /: {:?}", e);
982    } else {
983        // Populate the standard POSIX directory skeleton.
984        for dir in &[
985            "bin", "sbin", "etc", "tmp", "usr", "lib", "lib64", "home", "root", "run", "var",
986            "mnt", "opt", "srv", "dev", "proc", "sys",
987        ] {
988            rootfs.ensure_dir(dir);
989        }
990        // Nested standard directories
991        rootfs.ensure_dir("usr/bin");
992        rootfs.ensure_dir("usr/sbin");
993        rootfs.ensure_dir("usr/lib");
994        rootfs.ensure_dir("var/log");
995        rootfs.ensure_dir("var/tmp");
996        rootfs.ensure_dir("run/lock");
997        log::info!("[VFS] Mounted / (ramfs) with standard directory tree");
998    }
999
1000    // Initialize scheme router
1001    if let Err(e) = scheme_router::init_builtin_schemes() {
1002        log::error!("[VFS] Failed to init builtin schemes: {:?}", e);
1003    }
1004
1005    // Create and mount kernel scheme for /sys
1006    let kernel_scheme = KernelScheme::new();
1007
1008    // Register some basic kernel files
1009    static VERSION: &[u8] = b"Strat9-OS v0.1.0 (Bedrock)\n";
1010    kernel_scheme.register("version", VERSION.as_ptr(), VERSION.len());
1011
1012    static CMDLINE: &[u8] = b"quiet loglevel=debug\n";
1013    kernel_scheme.register("cmdline", CMDLINE.as_ptr(), CMDLINE.len());
1014
1015    let pci_inventory = build_pci_inventory_text().into_bytes().into_boxed_slice();
1016    let pci_inventory = Box::leak(pci_inventory);
1017    kernel_scheme.register("pci/inventory", pci_inventory.as_ptr(), pci_inventory.len());
1018
1019    let pci_count = pci_inventory
1020        .split(|b| *b == b'\n')
1021        .skip(1)
1022        .filter(|line| !line.is_empty())
1023        .count();
1024    let mut pci_count_str = String::new();
1025    let _ = writeln!(pci_count_str, "{}", pci_count);
1026    let pci_count = Box::leak(pci_count_str.into_bytes().into_boxed_slice());
1027    kernel_scheme.register("pci/count", pci_count.as_ptr(), pci_count.len());
1028
1029    // /sys/cpu/* — CPU information scheme (Plan9-style)
1030    {
1031        let host = crate::arch::x86_64::cpuid::host();
1032        // VFS initializes before SMP/percpu registration is complete.
1033        // Expose at least 1 CPU (BSP) instead of showing 0.
1034        let cpu_count = crate::arch::x86_64::percpu::get_cpu_count().max(1);
1035
1036        let count_s = Box::leak(
1037            alloc::format!("{}\n", cpu_count)
1038                .into_bytes()
1039                .into_boxed_slice(),
1040        );
1041        kernel_scheme.register("cpu/count", count_s.as_ptr(), count_s.len());
1042
1043        let vendor_s = Box::leak(
1044            alloc::format!("{}\n", host.vendor_string())
1045                .into_bytes()
1046                .into_boxed_slice(),
1047        );
1048        kernel_scheme.register("cpu/vendor", vendor_s.as_ptr(), vendor_s.len());
1049
1050        let model_s = Box::leak(
1051            alloc::format!("{}\n", host.model_name_str())
1052                .into_bytes()
1053                .into_boxed_slice(),
1054        );
1055        kernel_scheme.register("cpu/model", model_s.as_ptr(), model_s.len());
1056
1057        let features_s = Box::leak(
1058            alloc::format!(
1059                "{}\n",
1060                crate::arch::x86_64::cpuid::features_to_flags_string(host.features)
1061            )
1062            .into_bytes()
1063            .into_boxed_slice(),
1064        );
1065        kernel_scheme.register("cpu/features", features_s.as_ptr(), features_s.len());
1066
1067        let xcr0_s = Box::leak(
1068            alloc::format!("{:#x}\n", host.max_xcr0)
1069                .into_bytes()
1070                .into_boxed_slice(),
1071        );
1072        kernel_scheme.register("cpu/xcr0", xcr0_s.as_ptr(), xcr0_s.len());
1073
1074        let xsave_s = Box::leak(
1075            alloc::format!("{}\n", host.xsave_size)
1076                .into_bytes()
1077                .into_boxed_slice(),
1078        );
1079        kernel_scheme.register("cpu/xsave_size", xsave_s.as_ptr(), xsave_s.len());
1080    }
1081
1082    let kernel_scheme = Arc::new(kernel_scheme);
1083
1084    // Mount /sys
1085    if let Err(e) = mount::mount("/sys", kernel_scheme.clone()) {
1086        log::error!("[VFS] Failed to mount /sys: {:?}", e);
1087    } else {
1088        log::info!("[VFS] Mounted /sys (kernel scheme)");
1089    }
1090
1091    // Register and mount procfs
1092    let proc_scheme = Arc::new(ProcScheme::new());
1093    if let Err(e) = register_scheme("proc", proc_scheme.clone()) {
1094        log::error!("[VFS] Failed to register proc scheme: {:?}", e);
1095    } else {
1096        log::info!("[VFS] Registered proc scheme");
1097    }
1098
1099    if let Err(e) = mount::mount("/proc", proc_scheme) {
1100        log::error!("[VFS] Failed to mount /proc: {:?}", e);
1101    } else {
1102        log::info!("[VFS] Mounted /proc (procfs)");
1103    }
1104
1105    let ipc_scheme = Arc::new(ipcfs::IpcControlScheme::new());
1106    if let Err(e) = mount::mount("/ipc", ipc_scheme) {
1107        log::error!("[VFS] Failed to mount /ipc: {:?}", e);
1108    } else {
1109        log::info!("[VFS] Mounted /ipc (kernel ipc control scheme)");
1110    }
1111
1112    // Mount /dev — raw block-device scheme backed by AHCI.
1113    // The scheme is registered regardless of whether a disk is present:
1114    // device files appear dynamically when the hardware is available.
1115    let dev_scheme = Arc::new(BlkDevScheme::new());
1116    if let Err(e) = mount::mount("/dev", dev_scheme) {
1117        log::error!("[VFS] Failed to mount /dev: {:?}", e);
1118    } else {
1119        log::info!("[VFS] Mounted /dev (block-device scheme)");
1120    }
1121
1122    // Console scheme (/dev/console) — backs stdin/stdout/stderr for ELF processes
1123    let console = console_scheme::init_console_scheme();
1124    if let Err(e) = mount::mount("/dev/console", console) {
1125        log::error!("[VFS] Failed to mount /dev/console: {:?}", e);
1126    } else {
1127        log::info!("[VFS] Mounted /dev/console (serial + keyboard)");
1128    }
1129
1130    // PTY scheme (/dev/pts) — pseudo-terminals for interactive programs
1131    pty_scheme::init_pty_scheme();
1132    log::info!("[VFS] Mounted /dev/pts (PTY scheme)");
1133
1134    log::info!("[VFS] VFS ready");
1135}