swc_ecma_lexer/common/lexer/
search.rs

1//! Utilities inspired by OXC lexer for fast byte-wise searching over source
2//! text.
3
4/// How many bytes we process per batch when scanning.
5pub const SEARCH_BATCH_SIZE: usize = 32;
6
7/// Compile-time lookup table guaranteeing UTF-8 boundary safety.
8#[repr(C, align(64))]
9pub struct SafeByteMatchTable([bool; 256]);
10
11impl SafeByteMatchTable {
12    pub const fn new(bytes: [bool; 256]) -> Self {
13        // Safety guarantee: either all leading bytes (0xC0..0xF7) match, or all
14        // continuation bytes (0x80..0xBF) *do not* match. This ensures that if
15        // we stop on a match the input cursor is on a UTF-8 char boundary.
16        let mut unicode_start_all_match = true;
17        let mut unicode_cont_all_no_match = true;
18        let mut i = 0;
19        while i < 256 {
20            let m = bytes[i];
21            if m {
22                if i >= 0x80 && i < 0xc0 {
23                    unicode_cont_all_no_match = false;
24                }
25            } else if i >= 0xc0 && i < 0xf8 {
26                unicode_start_all_match = false;
27            }
28            i += 1;
29        }
30        assert!(
31            unicode_start_all_match || unicode_cont_all_no_match,
32            "Cannot create SafeByteMatchTable with an unsafe pattern"
33        );
34        Self(bytes)
35    }
36
37    #[inline]
38    pub const fn use_table(&self) {}
39
40    #[inline]
41    pub const fn matches(&self, b: u8) -> bool {
42        self.0[b as usize]
43    }
44}
45
46// ------------------------- Macros -------------------------
47
48#[macro_export]
49macro_rules! safe_byte_match_table {
50    (|$byte:ident| $body:expr) => {{
51        use $crate::common::lexer::search::SafeByteMatchTable;
52        #[allow(clippy::eq_op, clippy::allow_attributes)]
53        const TABLE: SafeByteMatchTable = seq_macro::seq!($byte in 0u8..=255 {
54            SafeByteMatchTable::new([#($body,)*])
55        });
56        TABLE
57    }};
58}
59
60#[macro_export]
61macro_rules! byte_search {
62    // Simple version without continue_if
63    (
64        lexer: $lexer:ident,
65        table: $table:ident,
66        handle_eof: $eof_handler:expr $(,)?
67    ) => {
68        byte_search! {
69            lexer: $lexer,
70            table: $table,
71            continue_if: (_byte, _pos) false,
72            handle_eof: $eof_handler,
73        }
74    };
75
76    // Full version with continue_if support
77    (
78        lexer: $lexer:ident,
79        table: $table:ident,
80        continue_if: ($byte:ident, $pos:ident) $should_continue:expr,
81        handle_eof: $eof_handler:expr $(,)?
82    ) => {{
83        $table.use_table();
84        loop {
85            // Open a new scope so the immutable borrow (slice/bytes) ends before we
86            // call `bump_bytes`, which requires `&mut`.
87            let (found_idx, $byte) = {
88                let slice = $lexer.input().as_str();
89                if slice.is_empty() {
90                    $eof_handler
91                }
92                let bytes = slice.as_bytes();
93                let mut idx = 0usize;
94                let len = bytes.len();
95                let mut found: Option<(usize, u8)> = None;
96                while idx < len {
97                    let end = (idx + $crate::common::lexer::search::SEARCH_BATCH_SIZE).min(len);
98                    let mut i = idx;
99                    while i < end {
100                        let b = bytes[i];
101                        if $table.matches(b) {
102                            found = Some((i, b));
103                            break;
104                        }
105                        i += 1;
106                    }
107                    if found.is_some() {
108                        break;
109                    }
110                    idx = end;
111                }
112                match found {
113                    Some((i, b)) => (Some(i), b),
114                    None => (None, 0),
115                }
116            }; // immutable borrow ends here
117
118            match found_idx {
119                Some(i) => {
120                    // Check if we should continue searching
121                    let $pos = i; // Index within current slice
122                    if $should_continue {
123                        // Continue searching from next position
124                        $lexer.input_mut().bump_bytes(i + 1);
125                        continue;
126                    } else {
127                        $lexer.input_mut().bump_bytes(i);
128                        break $byte;
129                    }
130                }
131                None => {
132                    // Consume remainder then run handler.
133                    let len = $lexer.input().as_str().len();
134                    $lexer.input_mut().bump_bytes(len);
135                    $eof_handler
136                }
137            }
138        }
139    }};
140}