Skip to main content

strat9_kernel/arch/x86_64/
tlb.rs

1//! TLB (Translation Lookaside Buffer) shootdown for SMP.
2//!
3//! When a page table entry is modified on one CPU, all other CPUs that might
4//! have cached that entry in their TLB must be notified to invalidate it.
5//!
6//! This implementation uses a per-CPU mailbox system inspired by Asterinas:
7//! 1. Each CPU has its own queue of pending TLB operations.
8//! 2. The initiator pushes an operation into each target's queue.
9//! 3. The initiator sends a TLB shootdown IPI to all targets.
10//! 4. The targets process their own queue and set an ACK flag.
11//! 5. The initiator waits for all ACK flags to become true.
12//!
13//! This avoids global lock contention and race conditions on global counters.
14
15use core::sync::atomic::{AtomicBool, Ordering};
16use x86_64::VirtAddr;
17
18use crate::sync::SpinLock;
19
20/// Type of TLB shootdown operation.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22enum TlbShootdownKind {
23    /// No pending shootdown.
24    None,
25    /// Invalidate a single page.
26    SinglePage,
27    /// Invalidate a range of pages.
28    Range,
29    /// Flush all TLB entries (full CR3 reload).
30    Full,
31}
32
33/// A single TLB operation.
34#[derive(Debug, Clone, Copy)]
35struct TlbOp {
36    kind: TlbShootdownKind,
37    vaddr_start: u64,
38    vaddr_end: u64,
39}
40
41impl TlbOp {
42    const NONE: Self = Self {
43        kind: TlbShootdownKind::None,
44        vaddr_start: 0,
45        vaddr_end: 0,
46    };
47}
48
49/// Per-CPU queue of pending TLB operations.
50struct TlbQueue {
51    ops: [TlbOp; 16],
52    count: usize,
53}
54
55impl TlbQueue {
56    const fn new() -> Self {
57        Self {
58            ops: [TlbOp::NONE; 16],
59            count: 0,
60        }
61    }
62
63    fn push(&mut self, op: TlbOp) {
64        if self.count < 16 {
65            self.ops[self.count] = op;
66            self.count += 1;
67        } else {
68            // Queue full: upgrade to a full flush to be safe.
69            self.ops[0] = TlbOp {
70                kind: TlbShootdownKind::Full,
71                vaddr_start: 0,
72                vaddr_end: 0,
73            };
74            self.count = 1;
75        }
76    }
77
78    fn clear(&mut self) {
79        self.count = 0;
80    }
81}
82
83/// Global array of per-CPU TLB queues.
84static TLB_QUEUES: [SpinLock<TlbQueue>; crate::arch::x86_64::percpu::MAX_CPUS] =
85    [const { SpinLock::new(TlbQueue::new()) }; crate::arch::x86_64::percpu::MAX_CPUS];
86
87/// Global array of per-CPU acknowledgement flags.
88static TLB_ACKS: [AtomicBool; crate::arch::x86_64::percpu::MAX_CPUS] =
89    [const { AtomicBool::new(true) }; crate::arch::x86_64::percpu::MAX_CPUS];
90
91/// Initialize TLB shootdown system.
92pub fn init() {
93    log::debug!(
94        "TLB shootdown initialized (vector {:#x})",
95        crate::arch::x86_64::apic::IPI_TLB_SHOOTDOWN_VECTOR
96    );
97}
98
99/// Invalidate a single page on all CPUs.
100pub fn shootdown_page(vaddr: VirtAddr) {
101    let op = TlbOp {
102        kind: TlbShootdownKind::SinglePage,
103        vaddr_start: vaddr.as_u64(),
104        vaddr_end: vaddr.as_u64() + 4096,
105    };
106
107    // Flush local TLB.
108    unsafe { invlpg(vaddr) };
109
110    dispatch_op(op);
111}
112
113/// Invalidate a range of pages on all CPUs.
114pub fn shootdown_range(start: VirtAddr, end: VirtAddr) {
115    // Guard: end must be strictly after start; silently promote to full flush
116    // if the range is invalid rather than underflowing in u64 arithmetic.
117    if end.as_u64() <= start.as_u64() {
118        log::warn!(
119            "TLB shootdown_range: invalid range [{:#x}, {:#x}), using full flush",
120            start.as_u64(),
121            end.as_u64(),
122        );
123        shootdown_all();
124        return;
125    }
126
127    let page_count = (end.as_u64() - start.as_u64()) / 4096;
128    if page_count > 64 {
129        shootdown_all();
130        return;
131    }
132
133    let op = TlbOp {
134        kind: TlbShootdownKind::Range,
135        vaddr_start: start.as_u64(),
136        vaddr_end: end.as_u64(),
137    };
138
139    // Flush local TLB.
140    for i in 0..page_count {
141        let addr = start + (i * 4096);
142        unsafe { invlpg(addr) };
143    }
144
145    dispatch_op(op);
146}
147
148/// Flush all TLB entries on all CPUs.
149pub fn shootdown_all() {
150    let op = TlbOp {
151        kind: TlbShootdownKind::Full,
152        vaddr_start: 0,
153        vaddr_end: 0,
154    };
155
156    // Flush local TLB.
157    unsafe { flush_tlb_all() };
158
159    dispatch_op(op);
160}
161
162/// Internal helper to dispatch an operation to target CPUs.
163fn dispatch_op(op: TlbOp) {
164    if !crate::arch::x86_64::apic::is_initialized() {
165        return;
166    }
167
168    let mut targets = [0u32; crate::arch::x86_64::percpu::MAX_CPUS];
169    let count = collect_tlb_targets(&mut targets);
170    if count == 0 {
171        return;
172    }
173
174    // `queued` tracks only the APIC IDs that were successfully pushed to a
175    // mailbox queue.  We must not send an IPI to, or wait for an ACK from,
176    // an AP whose queue was skipped — doing so would either waste cycles or
177    // spin-wait forever on an ACK that was never cleared.
178    let mut queued = [0u32; crate::arch::x86_64::percpu::MAX_CPUS];
179    let mut queued_count = 0usize;
180
181    // 1. Push op to each target's mailbox and clear their ACK.
182    for i in 0..count {
183        let apic_id = targets[i];
184        // cpu_index_by_apic can return None if the AP went offline between
185        // collect_tlb_targets and here; skip silently rather than panicking
186        // in an IPI-send path.
187        let cpu_idx = match crate::arch::x86_64::percpu::cpu_index_by_apic(apic_id) {
188            Some(idx) => idx,
189            None => {
190                log::warn!(
191                    "TLB dispatch: APIC {} not in per-CPU table, skipping",
192                    apic_id
193                );
194                continue;
195            }
196        };
197        let mut queue = TLB_QUEUES[cpu_idx].lock();
198        queue.push(op);
199        TLB_ACKS[cpu_idx].store(false, Ordering::Release);
200        drop(queue);
201        // Record as a successfully-queued target.
202        queued[queued_count] = apic_id;
203        queued_count += 1;
204    }
205
206    if queued_count == 0 {
207        return;
208    }
209
210    // 2. Send IPI only to targets that actually received a queued op.
211    for i in 0..queued_count {
212        send_tlb_ipi(queued[i]);
213    }
214
215    // 3. Wait for ACKs from the same set.
216    wait_for_acks(&queued[..queued_count]);
217}
218
219/// IPI handler for TLB shootdown (called on receiving CPU).
220pub extern "C" fn tlb_shootdown_ipi_handler() {
221    let cpu_idx = current_cpu_index();
222
223    // 1. Take all pending ops from our mailbox.
224    let mut local_ops = [TlbOp::NONE; 16];
225    let mut count = 0;
226    {
227        let mut queue = TLB_QUEUES[cpu_idx].lock();
228        count = queue.count;
229        for i in 0..count {
230            local_ops[i] = queue.ops[i];
231        }
232        queue.clear();
233    }
234
235    // 2. Perform the operations.
236    for i in 0..count {
237        let op = &local_ops[i];
238        match op.kind {
239            TlbShootdownKind::None => {}
240            TlbShootdownKind::SinglePage => {
241                unsafe { invlpg(VirtAddr::new(op.vaddr_start)) };
242            }
243            TlbShootdownKind::Range => {
244                let start = op.vaddr_start;
245                let end = op.vaddr_end;
246                // Guard: corrupt TlbOp must not underflow in release build.
247                if end > start {
248                    let page_count = (end - start) / 4096;
249                    for j in 0..page_count {
250                        let addr = VirtAddr::new(start + j * 4096);
251                        unsafe { invlpg(addr) };
252                    }
253                } else {
254                    unsafe { flush_tlb_all() };
255                }
256            }
257            TlbShootdownKind::Full => {
258                unsafe { flush_tlb_all() };
259            }
260        }
261    }
262
263    // 3. Signal completion.
264    TLB_ACKS[cpu_idx].store(true, Ordering::Release);
265
266    // 4. Send EOI.
267    crate::arch::x86_64::apic::eoi();
268}
269
270/// Invalidate a single TLB entry (local CPU only).
271#[inline]
272unsafe fn invlpg(vaddr: VirtAddr) {
273    core::arch::asm!("invlpg [{}]", in(reg) vaddr.as_u64(), options(nostack, preserves_flags));
274}
275
276/// Flush all TLB entries by reloading CR3 (local CPU only).
277#[inline]
278unsafe fn flush_tlb_all() {
279    use x86_64::registers::control::Cr3;
280    let (frame, flags) = Cr3::read();
281    Cr3::write(frame, flags);
282}
283
284/// Send TLB IPI.
285fn send_tlb_ipi(target_apic_id: u32) {
286    let icr_low = crate::arch::x86_64::apic::IPI_TLB_SHOOTDOWN_VECTOR as u32 | (1 << 14);
287    crate::arch::x86_64::apic::send_ipi_raw(target_apic_id, icr_low);
288}
289
290/// Collect target APIC IDs into a pre-allocated buffer.
291fn collect_tlb_targets(targets: &mut [u32]) -> usize {
292    let my_cpu = crate::arch::x86_64::percpu::current_cpu_index();
293    let mut count = 0;
294    for cpu_idx in 0..crate::arch::x86_64::percpu::MAX_CPUS {
295        if !crate::arch::x86_64::percpu::tlb_ready(cpu_idx) {
296            continue;
297        }
298        if let Some(apic_id) = crate::arch::x86_64::percpu::apic_id_by_cpu_index(cpu_idx) {
299            if cpu_idx != my_cpu {
300                if count < targets.len() {
301                    targets[count] = apic_id;
302                    count += 1;
303                }
304            }
305        }
306    }
307    count
308}
309
310/// Wait for ACKs from specific APIC IDs.
311fn wait_for_acks(targets: &[u32]) {
312    const MAX_WAIT_CYCLES: usize = 10_000_000;
313    for &apic_id in targets {
314        // Use if-let: if the APIC ID is gone (AP offline after we sent the IPI)
315        // there is nothing to wait for — skip rather than panic in kernel context.
316        let cpu_idx = match crate::arch::x86_64::percpu::cpu_index_by_apic(apic_id) {
317            Some(idx) => idx,
318            None => {
319                log::warn!("TLB wait_acks: APIC {} disappeared, skipping", apic_id);
320                continue;
321            }
322        };
323        let mut success = false;
324        for _ in 0..MAX_WAIT_CYCLES {
325            if TLB_ACKS[cpu_idx].load(Ordering::Acquire) {
326                success = true;
327                break;
328            }
329            core::hint::spin_loop();
330        }
331        if !success {
332            log::warn!("TLB shootdown timeout on CPU {}", cpu_idx);
333        }
334    }
335}
336
337/// Current CPU index.
338fn current_cpu_index() -> usize {
339    crate::arch::x86_64::percpu::current_cpu_index()
340}