Skip to main content

strat9_kernel/arch/x86_64/
syscall.rs

1//! SYSCALL/SYSRET interface for Strat9-OS
2//!
3//! Configures the x86_64 SYSCALL/SYSRET MSRs and provides the naked
4//! assembly entry point that bridges Ring 3 → Ring 0 → Rust dispatcher.
5//!
6//! ## Per-CPU design (SWAPGS + GS-base)
7//!
8//! SYSCALL entry uses SWAPGS to switch GS base to a per-CPU block
9//! that stores the kernel RSP and temporary user RSP. This enables
10//! SMP-safe SYSCALL entry without a global KERNEL_RSP.
11//!
12//! ## Register convention on SYSCALL entry
13//!
14//! CPU sets: RCX = user RIP, R11 = user RFLAGS, IF cleared via FMASK.
15//! Userspace passes: RAX = syscall number, RDI/RSI/RDX/R10/R8/R9 = args 1-6.
16
17/// MSR addresses
18const IA32_EFER: u32 = 0xC000_0080;
19const IA32_STAR: u32 = 0xC000_0081;
20const IA32_LSTAR: u32 = 0xC000_0082;
21const IA32_FMASK: u32 = 0xC000_0084;
22
23/// EFER bit: System Call Extensions
24const EFER_SCE: u64 = 1 << 0;
25
26/// FMASK: Clear IF (0x200), DF (0x400), TF (0x100) on SYSCALL entry.
27/// This ensures interrupts are disabled and direction flag is clear.
28const FMASK_VALUE: u64 = 0x200 | 0x400 | 0x100;
29
30/// Update the kernel RSP used by the SYSCALL entry point.
31///
32/// Called by the scheduler on every context switch to point to the
33/// top of the new task's kernel stack.
34pub fn set_kernel_rsp(rsp: u64) {
35    // SAFETY: Called with interrupts disabled from the scheduler.
36    crate::arch::x86_64::percpu::set_kernel_rsp_current(rsp);
37}
38
39/// Initialize the SYSCALL/SYSRET MSRs.
40///
41/// Must be called after GDT init (needs segment selectors).
42pub fn init() {
43    use super::{rdmsr, wrmsr};
44
45    // Enable System Call Extensions in EFER
46    let efer = rdmsr(IA32_EFER);
47    wrmsr(IA32_EFER, efer | EFER_SCE);
48
49    // STAR: kernel CS/SS in [47:32], user CS/SS base in [63:48]
50    let star = super::gdt::star_msr_value();
51    wrmsr(IA32_STAR, star);
52
53    // LSTAR: RIP loaded on SYSCALL
54    let entry_addr = syscall_entry as *const () as u64;
55    wrmsr(IA32_LSTAR, entry_addr);
56
57    // FMASK: bits to clear in RFLAGS on SYSCALL
58    wrmsr(IA32_FMASK, FMASK_VALUE);
59
60    log::info!(
61        "SYSCALL/SYSRET initialized: LSTAR={:#x}, STAR={:#x}, FMASK={:#x}",
62        entry_addr,
63        star,
64        FMASK_VALUE,
65    );
66}
67
68/// The SYSCALL entry point (naked function).
69///
70/// On entry from userspace:
71/// - RCX = user RIP (saved by CPU)
72/// - R11 = user RFLAGS (saved by CPU)
73/// - RSP = user stack pointer (NOT saved by CPU — we must save it)
74/// - IF = 0 (cleared by FMASK)
75/// - RAX = syscall number
76/// - RDI, RSI, RDX, R10, R8, R9 = arguments 1-6
77///
78/// We build a `SyscallFrame` on the kernel stack and call the Rust dispatcher.
79#[unsafe(naked)]
80unsafe extern "C" fn syscall_entry() {
81    core::arch::naked_asm!(
82        // Swap GS to kernel base (per-CPU)
83        "swapgs",
84
85        // Save user RSP and switch to kernel stack (per-CPU via GS)
86        "mov gs:[{user_rsp_off}], rsp",
87        "mov rsp, gs:[{kernel_rsp_off}]",
88
89        // Build IRET-compatible frame on kernel stack (for potential IRET exit)
90        // Push order: SS, RSP, RFLAGS, CS, RIP (reverse of IRET pop order)
91        "push 0x23",               // User SS (user_data | RPL3)
92        "push gs:[{user_rsp_off}]",// User RSP
93        "push r11",                // User RFLAGS (saved by CPU in R11)
94        "push 0x2B",               // User CS (user_code64 | RPL3)
95        "push rcx",                // User RIP (saved by CPU in RCX)
96
97        // Save all general-purpose registers (SyscallFrame layout)
98        "push rax",                // Syscall number
99        "push rcx",                // (user RIP, saved again for frame access)
100        "push rdx",
101        "push rdi",
102        "push rsi",
103        "push r8",
104        "push r9",
105        "push r10",
106        "push r11",                // (user RFLAGS, saved again)
107        "push rbx",
108        "push rbp",
109        "push r12",
110        "push r13",
111        "push r14",
112        "push r15",
113
114        // Call Rust dispatcher: rdi = pointer to SyscallFrame
115        "mov rdi, rsp",
116        "call {dispatch}",
117
118        // Return value is in RAX — write it into the frame's rax slot
119        // SyscallFrame layout: r15 is at RSP+0, ..., rax is at RSP+14*8 = RSP+112
120        "mov [rsp + 14*8], rax",
121
122        // Restore general-purpose registers
123        "pop r15",
124        "pop r14",
125        "pop r13",
126        "pop r12",
127        "pop rbp",
128        "pop rbx",
129        "pop r11",
130        "pop r10",
131        "pop r9",
132        "pop r8",
133        "pop rsi",
134        "pop rdi",
135        "pop rdx",
136        "pop rcx",
137        "pop rax",                 // Restored return value
138
139        // Now RSP points to the RIP of the IRET frame.
140        // Peek at user RIP without consuming it yet.
141        "mov rcx, [rsp]",
142
143        // Canonical address check on RCX to prevent privilege escalation.
144        // If a non-canonical address was somehow placed in RCX, SYSRETQ would
145        // execute it in Ring 0 on some CPUs (Intel erratum).
146        // Sign-extend bit 47 to bits 48-63:
147        "mov r11, rcx",
148        "sar r11, 47",
149        "cmp r11, 0",
150        "je 2f",
151        "cmp r11, -1",
152        "je 2f",
153
154        // Slow IRET path (non-canonical or fallback)
155        // The stack is already perfectly set up as an IRET frame [RIP, CS, RFLAGS, RSP, SS].
156        "swapgs",                  // Restore user GS base
157        "iretq",
158
159        "2:",
160        // SYSRETQ fast path — skip RIP and CS in one step.
161        "add rsp, 16",             // Skip RIP + CS
162        "pop r11",                 // User RFLAGS into R11
163        "pop rsp",                 // User RSP
164        "swapgs",                  // Restore user GS base
165        // SYSRETQ: RCX→RIP, R11→RFLAGS, loads user CS/SS from STAR
166        "sysretq",
167
168        user_rsp_off = const crate::arch::x86_64::percpu::USER_RSP_OFFSET,
169        kernel_rsp_off = const crate::arch::x86_64::percpu::KERNEL_RSP_OFFSET,
170        dispatch = sym crate::syscall::dispatch,
171    );
172}