swc_ecma_parser/lexer/
table.rs

1//! Lookup table for byte handlers.
2//!
3//! Idea is taken from ratel.
4//!
5//! https://github.com/ratel-rust/ratel-core/blob/e55a1310ba69a3f5ce2a9a6eef643feced02ac08/ratel/src/lexer/mod.rs#L665
6
7use either::Either;
8use swc_common::input::Input;
9
10use super::{pos_span, LexResult, Lexer};
11use crate::{
12    error::SyntaxError,
13    lexer::{
14        char_ext::CharExt,
15        token::{Token, TokenValue},
16    },
17};
18
19pub(super) type ByteHandler = fn(&mut Lexer<'_>) -> LexResult<Token>;
20
21/// Lookup table mapping any incoming byte to a handler function defined below.
22pub(super) static BYTE_HANDLERS: [ByteHandler; 256] = [
23    //   0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F   //
24    ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 0
25    ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1
26    ERR, EXL, QOT, HSH, IDN, PRC, AMP, QOT, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2
27    ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, MOR, QST, // 3
28    AT_, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, // 4
29    IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, BTO, IDN, BTC, CRT, IDN, // 5
30    TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, L_H, L_I, L_J, L_K, L_L, L_M, L_N, L_O, // 6
31    L_P, L_Q, L_R, L_S, L_T, L_U, L_V, L_W, L_X, L_Y, L_Z, BEO, PIP, BEC, TLD, ERR, // 7
32    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 8
33    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 9
34    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // A
35    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // B
36    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C
37    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D
38    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E
39    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F
40];
41
42const ERR: ByteHandler = |lexer| {
43    let c = unsafe {
44        // Safety: Byte handler is only called for non-last chracters
45        lexer.input.cur().unwrap_unchecked()
46    };
47
48    let start = lexer.cur_pos();
49    unsafe {
50        // Safety: Byte handler is only called for non-last chracters
51        lexer.input.bump();
52    }
53    lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
54};
55
56/// Identifier and we know that this cannot be a keyword or known ident.
57const IDN: ByteHandler = |lexer| lexer.read_ident_unknown();
58
59const L_A: ByteHandler = |lexer| {
60    lexer.read_keyword_with(&|s| match s {
61        "abstract" => Some(Token::Abstract),
62        "as" => Some(Token::As),
63        "await" => Some(Token::Await),
64        "async" => Some(Token::Async),
65        "assert" => Some(Token::Assert),
66        "asserts" => Some(Token::Asserts),
67        "any" => Some(Token::Any),
68        "accessor" => Some(Token::Accessor),
69        _ => None,
70    })
71};
72
73const L_B: ByteHandler = |lexer| {
74    lexer.read_keyword_with(&|s| match s {
75        "break" => Some(Token::Break),
76        "boolean" => Some(Token::Boolean),
77        "bigint" => Some(Token::Bigint),
78        _ => None,
79    })
80};
81
82const L_C: ByteHandler = |lexer| {
83    lexer.read_keyword_with(&|s| match s {
84        "case" => Some(Token::Case),
85        "catch" => Some(Token::Catch),
86        "class" => Some(Token::Class),
87        "const" => Some(Token::Const),
88        "continue" => Some(Token::Continue),
89        _ => None,
90    })
91};
92
93const L_D: ByteHandler = |lexer| {
94    lexer.read_keyword_with(&|s| match s {
95        "debugger" => Some(Token::Debugger),
96        "default" => Some(Token::Default),
97        "delete" => Some(Token::Delete),
98        "do" => Some(Token::Do),
99        "declare" => Some(Token::Declare),
100        _ => None,
101    })
102};
103
104const L_E: ByteHandler = |lexer| {
105    lexer.read_keyword_with(&|s| match s {
106        "else" => Some(Token::Else),
107        "enum" => Some(Token::Enum),
108        "export" => Some(Token::Export),
109        "extends" => Some(Token::Extends),
110        _ => None,
111    })
112};
113
114const L_F: ByteHandler = |lexer| {
115    lexer.read_keyword_with(&|s| match s {
116        "false" => Some(Token::False),
117        "finally" => Some(Token::Finally),
118        "for" => Some(Token::For),
119        "function" => Some(Token::Function),
120        "from" => Some(Token::From),
121        _ => None,
122    })
123};
124
125const L_G: ByteHandler = |lexer| {
126    lexer.read_keyword_with(&|s| match s {
127        "global" => Some(Token::Global),
128        "get" => Some(Token::Get),
129        _ => None,
130    })
131};
132
133const L_H: ByteHandler = IDN;
134
135const L_I: ByteHandler = |lexer| {
136    lexer.read_keyword_with(&|s| match s {
137        "if" => Some(Token::If),
138        "import" => Some(Token::Import),
139        "in" => Some(Token::In),
140        "instanceof" => Some(Token::InstanceOf),
141        "is" => Some(Token::Is),
142        "infer" => Some(Token::Infer),
143        "interface" => Some(Token::Interface),
144        "implements" => Some(Token::Implements),
145        "intrinsic" => Some(Token::Intrinsic),
146        _ => None,
147    })
148};
149
150const L_J: ByteHandler = IDN;
151
152const L_K: ByteHandler = |lexer| {
153    lexer.read_keyword_with(&|s| match s {
154        "keyof" => Some(Token::Keyof),
155        _ => None,
156    })
157};
158
159const L_L: ByteHandler = |lexer| {
160    lexer.read_keyword_with(&|s| match s {
161        "let" => Some(Token::Let),
162        _ => None,
163    })
164};
165
166const L_M: ByteHandler = |lexer| {
167    lexer.read_keyword_with(&|s| match s {
168        "meta" => Some(Token::Meta),
169        _ => None,
170    })
171};
172
173const L_N: ByteHandler = |lexer| {
174    lexer.read_keyword_with(&|s| match s {
175        "new" => Some(Token::New),
176        "null" => Some(Token::Null),
177        "number" => Some(Token::Number),
178        "never" => Some(Token::Never),
179        "namespace" => Some(Token::Namespace),
180        _ => None,
181    })
182};
183
184const L_O: ByteHandler = |lexer| {
185    lexer.read_keyword_with(&|s| match s {
186        "of" => Some(Token::Of),
187        "object" => Some(Token::Object),
188        "out" => Some(Token::Out),
189        "override" => Some(Token::Override),
190        _ => None,
191    })
192};
193
194const L_P: ByteHandler = |lexer| {
195    lexer.read_keyword_with(&|s| match s {
196        "public" => Some(Token::Public),
197        "package" => Some(Token::Package),
198        "protected" => Some(Token::Protected),
199        "private" => Some(Token::Private),
200        _ => None,
201    })
202};
203
204const L_Q: ByteHandler = IDN;
205
206const L_R: ByteHandler = |lexer| {
207    lexer.read_keyword_with(&|s| match s {
208        "return" => Some(Token::Return),
209        "readonly" => Some(Token::Readonly),
210        "require" => Some(Token::Require),
211        _ => None,
212    })
213};
214
215const L_S: ByteHandler = |lexer| {
216    lexer.read_keyword_with(&|s| match s {
217        "super" => Some(Token::Super),
218        "static" => Some(Token::Static),
219        "switch" => Some(Token::Switch),
220        "symbol" => Some(Token::Symbol),
221        "set" => Some(Token::Set),
222        "string" => Some(Token::String),
223        "satisfies" => Some(Token::Satisfies),
224        _ => None,
225    })
226};
227
228const L_T: ByteHandler = |lexer| {
229    lexer.read_keyword_with(&|s| match s {
230        "this" => Some(Token::This),
231        "throw" => Some(Token::Throw),
232        "true" => Some(Token::True),
233        "typeof" => Some(Token::TypeOf),
234        "try" => Some(Token::Try),
235        "type" => Some(Token::Type),
236        "target" => Some(Token::Target),
237        _ => None,
238    })
239};
240
241const L_U: ByteHandler = |lexer| {
242    lexer.read_keyword_with(&|s| match s {
243        "using" => Some(Token::Using),
244        "unique" => Some(Token::Unique),
245        "undefined" => Some(Token::Undefined),
246        "unknown" => Some(Token::Unknown),
247        _ => None,
248    })
249};
250
251const L_V: ByteHandler = |lexer| {
252    lexer.read_keyword_with(&|s| match s {
253        "var" => Some(Token::Var),
254        "void" => Some(Token::Void),
255        _ => None,
256    })
257};
258
259const L_W: ByteHandler = |lexer| {
260    lexer.read_keyword_with(&|s| match s {
261        "while" => Some(Token::While),
262        "with" => Some(Token::With),
263        _ => None,
264    })
265};
266
267const L_X: ByteHandler = IDN;
268
269const L_Y: ByteHandler = |lexer| {
270    lexer.read_keyword_with(&|s| match s {
271        "yield" => Some(Token::Yield),
272        _ => None,
273    })
274};
275
276const L_Z: ByteHandler = IDN;
277
278/// `0`
279const ZER: ByteHandler = |lexer| lexer.read_token_zero();
280
281/// Numbers
282const DIG: ByteHandler = |lexer| {
283    debug_assert!(lexer.cur().is_some_and(|cur| cur != '0'));
284    lexer.read_number::<false, false>().map(|v| match v {
285        Either::Left((value, raw)) => {
286            lexer.state.set_token_value(TokenValue::Num { value, raw });
287            Token::Num
288        }
289        Either::Right((value, raw)) => {
290            lexer
291                .state
292                .set_token_value(TokenValue::BigInt { value, raw });
293            Token::BigInt
294        }
295    })
296};
297
298/// String literals with `'` or `"`
299const QOT: ByteHandler = |lexer| lexer.read_str_lit();
300
301/// Unicode
302const UNI: ByteHandler = |lexer| {
303    let c = unsafe {
304        // Safety: Byte handler is only called for non-last chracters
305        lexer.input.cur().unwrap_unchecked()
306    };
307
308    // Identifier or keyword. '\uXXXX' sequences are allowed in
309    // identifiers, so '\' also dispatches to that.
310    if c == '\\' || c.is_ident_start() {
311        return lexer.read_ident_unknown();
312    }
313
314    let start = lexer.cur_pos();
315    unsafe {
316        // Safety: Byte handler is only called for non-last chracters
317        lexer.input.bump();
318    }
319    lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
320};
321
322/// `:`
323const COL: ByteHandler = |lexer| lexer.read_token_colon();
324
325/// `%`
326const PRC: ByteHandler = |lexer| lexer.read_token_mul_mod::<false>();
327
328/// `*`
329const ATR: ByteHandler = |lexer| lexer.read_token_mul_mod::<true>();
330
331/// `?`
332const QST: ByteHandler = |lexer| lexer.read_token_question_mark();
333
334/// `&`
335const AMP: ByteHandler = |lexer| lexer.read_token_logical::<b'&'>();
336
337/// `|`
338const PIP: ByteHandler = |lexer| lexer.read_token_logical::<b'|'>();
339
340macro_rules! single_char {
341    ($name:ident, $c:literal, $token:ident) => {
342        const $name: ByteHandler = |lexer| {
343            lexer.input.bump_bytes(1);
344            Ok(Token::$token)
345        };
346    };
347}
348
349single_char!(SEM, b';', Semi);
350single_char!(COM, b',', Comma);
351
352/// `\``
353const TPL: ByteHandler = |lexer| lexer.read_token_back_quote();
354
355single_char!(TLD, b'~', Tilde);
356single_char!(AT_, b'@', At);
357
358single_char!(PNO, b'(', LParen);
359single_char!(PNC, b')', RParen);
360
361single_char!(BTO, b'[', LBracket);
362single_char!(BTC, b']', RBracket);
363
364single_char!(BEO, b'{', LBrace);
365single_char!(BEC, b'}', RBrace);
366
367/// `^`
368const CRT: ByteHandler = |lexer| {
369    // Bitwise xor
370    lexer.input.bump_bytes(1);
371    Ok(if lexer.input.cur_as_ascii() == Some(b'=') {
372        lexer.input.bump_bytes(1);
373        Token::BitXorEq
374    } else {
375        Token::Caret
376    })
377};
378
379/// `+`
380const PLS: ByteHandler = |lexer| lexer.read_token_plus_minus::<b'+'>();
381
382/// `-`
383const MIN: ByteHandler = |lexer| lexer.read_token_plus_minus::<b'-'>();
384
385/// `!`
386const EXL: ByteHandler = |lexer| lexer.read_token_bang_or_eq::<b'!'>();
387
388/// `=`
389const EQL: ByteHandler = |lexer| lexer.read_token_bang_or_eq::<b'='>();
390
391/// `.`
392const PRD: ByteHandler = |lexer| lexer.read_token_dot();
393
394/// `<`
395const LSS: ByteHandler = |lexer| lexer.read_token_lt_gt::<b'<'>();
396
397/// `>`
398const MOR: ByteHandler = |lexer| lexer.read_token_lt_gt::<b'>'>();
399
400/// `/`
401const SLH: ByteHandler = |lexer| lexer.read_slash();
402
403/// `#`
404const HSH: ByteHandler = |lexer| lexer.read_token_number_sign();