Skip to main content

strate_fs_abstraction/
unicode.rs

1//! Unicode string handling for Windows.
2//!
3//! Windows uses UTF-16 (wide strings) for all kernel APIs. This module
4//! provides utilities for converting between UTF-8 and UTF-16, and for
5//! working with Windows UNICODE_STRING structures.
6
7use crate::error::{FsError, FsResult};
8
9/// Maximum length for a Windows path component (characters).
10pub const MAX_COMPONENT_LENGTH: usize = 255;
11
12/// Maximum length for a full Windows path (characters).
13pub const MAX_PATH_LENGTH: usize = 32767;
14
15/// UNICODE_STRING structure for Windows kernel APIs.
16///
17/// This is a counted Unicode string, not null-terminated.
18#[repr(C)]
19#[derive(Debug, Clone, Copy)]
20pub struct UnicodeString {
21    /// Length of the string in bytes (not including any null terminator).
22    pub length: u16,
23    /// Maximum size of the Buffer in bytes.
24    pub maximum_length: u16,
25    /// Pointer to the wide string buffer.
26    pub buffer: *mut u16,
27}
28
29impl UnicodeString {
30    /// Creates a `UnicodeString` from a slice of wide characters.
31    ///
32    /// # Safety
33    ///
34    /// The buffer must remain valid for the lifetime of this structure.
35    pub const unsafe fn from_slice(slice: &[u16]) -> Self {
36        Self {
37            length: (slice.len() * 2) as u16,
38            maximum_length: (slice.len() * 2) as u16,
39            buffer: slice.as_ptr() as *mut u16,
40        }
41    }
42
43    /// Returns the string as a slice of wide characters.
44    ///
45    /// # Safety
46    ///
47    /// The buffer pointer must be valid.
48    pub unsafe fn as_slice(&self) -> &[u16] {
49        if self.buffer.is_null() {
50            return &[];
51        }
52        let char_count = (self.length / 2) as usize;
53        // SAFETY: Caller ensures buffer is valid
54        unsafe { core::slice::from_raw_parts(self.buffer, char_count) }
55    }
56
57    /// Returns `true` if the string is empty.
58    pub const fn is_empty(&self) -> bool {
59        self.length == 0
60    }
61
62    /// Returns the length in characters (not bytes).
63    pub const fn char_len(&self) -> usize {
64        (self.length / 2) as usize
65    }
66}
67
68/// In-place wide string buffer for stack allocation.
69///
70/// This avoids heap allocation for common path operations.
71#[repr(C)]
72pub struct WideStringBuffer<const N: usize> {
73    buffer: [u16; N],
74    len: usize,
75}
76
77impl<const N: usize> WideStringBuffer<N> {
78    /// Creates an empty buffer.
79    pub const fn new() -> Self {
80        Self {
81            buffer: [0; N],
82            len: 0,
83        }
84    }
85
86    /// Creates from a UTF-8 string.
87    pub fn from_utf8(s: &str) -> FsResult<Self> {
88        let mut result = Self::new();
89        result.push_utf8(s)?;
90        Ok(result)
91    }
92
93    /// Appends a UTF-8 string.
94    pub fn push_utf8(&mut self, s: &str) -> FsResult<()> {
95        for c in s.encode_utf16() {
96            if self.len >= N {
97                return Err(FsError::StringTooLong);
98            }
99            self.buffer[self.len] = c;
100            self.len += 1;
101        }
102        Ok(())
103    }
104
105    /// Appends a null terminator if there's room.
106    pub fn push_null(&mut self) -> FsResult<()> {
107        if self.len >= N {
108            return Err(FsError::StringTooLong);
109        }
110        self.buffer[self.len] = 0;
111        self.len += 1;
112        Ok(())
113    }
114
115    /// Returns the buffer as a slice.
116    pub fn as_slice(&self) -> &[u16] {
117        &self.buffer[..self.len]
118    }
119
120    /// Returns the buffer as a null-terminated slice (includes the null).
121    pub fn as_slice_with_null(&mut self) -> FsResult<&[u16]> {
122        self.push_null()?;
123        Ok(&self.buffer[..self.len])
124    }
125
126    /// Returns as a UNICODE_STRING.
127    pub fn as_unicode_string(&self) -> UnicodeString {
128        UnicodeString {
129            length: (self.len * 2) as u16,
130            maximum_length: (N * 2) as u16,
131            buffer: self.buffer.as_ptr() as *mut u16,
132        }
133    }
134
135    /// Returns the length in characters.
136    pub const fn len(&self) -> usize {
137        self.len
138    }
139
140    /// Returns `true` if empty.
141    pub const fn is_empty(&self) -> bool {
142        self.len == 0
143    }
144
145    /// Clears the buffer.
146    pub fn clear(&mut self) {
147        self.len = 0;
148    }
149
150    /// Returns capacity in characters.
151    pub const fn capacity(&self) -> usize {
152        N
153    }
154}
155
156impl<const N: usize> Default for WideStringBuffer<N> {
157    /// Implements default.
158    fn default() -> Self {
159        Self::new()
160    }
161}
162
163/// Wrapper for heap-allocated wide strings.
164///
165/// Only available with the `alloc` feature.
166#[cfg(feature = "alloc")]
167pub struct WindowsString {
168    buffer: alloc::vec::Vec<u16>,
169}
170
171#[cfg(feature = "alloc")]
172impl WindowsString {
173    /// Creates a new empty WindowsString.
174    pub fn new() -> Self {
175        Self {
176            buffer: alloc::vec::Vec::new(),
177        }
178    }
179
180    /// Creates from a UTF-8 string.
181    pub fn from_utf8(utf8: &str) -> FsResult<Self> {
182        let wide: alloc::vec::Vec<u16> = utf8
183            .encode_utf16()
184            .chain(core::iter::once(0)) // Null terminator
185            .collect();
186
187        if wide.len() > MAX_PATH_LENGTH {
188            return Err(FsError::StringTooLong);
189        }
190
191        Ok(Self { buffer: wide })
192    }
193
194    /// Creates a UNICODE_STRING pointing to this buffer.
195    pub fn as_unicode_string(&self) -> UnicodeString {
196        let len = if self.buffer.is_empty() {
197            0
198        } else {
199            // Length excludes null terminator
200            (self.buffer.len() - 1) * 2
201        };
202
203        UnicodeString {
204            length: len as u16,
205            maximum_length: (self.buffer.len() * 2) as u16,
206            buffer: self.buffer.as_ptr() as *mut u16,
207        }
208    }
209
210    /// Returns the buffer as a slice (including null terminator).
211    pub fn as_slice(&self) -> &[u16] {
212        &self.buffer
213    }
214
215    /// Returns the length in characters (excluding null terminator).
216    pub fn len(&self) -> usize {
217        if self.buffer.is_empty() {
218            0
219        } else {
220            self.buffer.len() - 1
221        }
222    }
223
224    /// Returns `true` if empty.
225    pub fn is_empty(&self) -> bool {
226        self.buffer.len() <= 1
227    }
228}
229
230#[cfg(feature = "alloc")]
231impl Default for WindowsString {
232    /// Implements default.
233    fn default() -> Self {
234        Self::new()
235    }
236}
237
238/// Compares two wide strings case-insensitively (ASCII only).
239///
240/// For proper Unicode case folding, use Windows APIs like
241/// `RtlCompareUnicodeString`.
242pub fn ascii_iequals(a: &[u16], b: &[u16]) -> bool {
243    if a.len() != b.len() {
244        return false;
245    }
246    for (ca, cb) in a.iter().zip(b.iter()) {
247        let ca_lower = if *ca >= 'A' as u16 && *ca <= 'Z' as u16 {
248            ca + 32
249        } else {
250            *ca
251        };
252        let cb_lower = if *cb >= 'A' as u16 && *cb <= 'Z' as u16 {
253            cb + 32
254        } else {
255            *cb
256        };
257        if ca_lower != cb_lower {
258            return false;
259        }
260    }
261    true
262}
263
264/// Validates that a string contains only valid filename characters.
265///
266/// Invalid characters: \ / : * ? " < > |
267pub fn is_valid_filename(s: &[u16]) -> bool {
268    const INVALID: [u16; 9] = [
269        '\\' as u16,
270        '/' as u16,
271        ':' as u16,
272        '*' as u16,
273        '?' as u16,
274        '"' as u16,
275        '<' as u16,
276        '>' as u16,
277        '|' as u16,
278    ];
279
280    for c in s {
281        if *c == 0 || INVALID.contains(c) {
282            return false;
283        }
284    }
285    true
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291
292    #[test]
293    /// Implements test wide buffer utf8.
294    fn test_wide_buffer_utf8() {
295        let mut buf = WideStringBuffer::<64>::from_utf8("Hello").unwrap();
296        assert_eq!(buf.len(), 5);
297        buf.push_utf8(" World").unwrap();
298        assert_eq!(buf.len(), 11);
299    }
300
301    #[test]
302    /// Implements test wide buffer overflow.
303    fn test_wide_buffer_overflow() {
304        let result = WideStringBuffer::<3>::from_utf8("Hello");
305        assert!(result.is_err());
306    }
307
308    #[test]
309    /// Implements test ascii iequals.
310    fn test_ascii_iequals() {
311        let a = [
312            b'H' as u16,
313            b'e' as u16,
314            b'l' as u16,
315            b'l' as u16,
316            b'o' as u16,
317        ];
318        let b = [
319            b'h' as u16,
320            b'E' as u16,
321            b'L' as u16,
322            b'L' as u16,
323            b'O' as u16,
324        ];
325        assert!(ascii_iequals(&a, &b));
326    }
327
328    #[test]
329    /// Implements test is valid filename.
330    fn test_is_valid_filename() {
331        let valid = [b't' as u16, b'e' as u16, b's' as u16, b't' as u16];
332        assert!(is_valid_filename(&valid));
333
334        let invalid = [b't' as u16, b'*' as u16];
335        assert!(!is_valid_filename(&invalid));
336    }
337}