swc_ecma_lexer/lexer/
table.rs

1//! Lookup table for byte handlers.
2//!
3//! Idea is taken from ratel.
4//!
5//! https://github.com/ratel-rust/ratel-core/blob/e55a1310ba69a3f5ce2a9a6eef643feced02ac08/ratel/src/lexer/mod.rs#L665
6
7use either::Either;
8use swc_common::input::Input;
9use swc_ecma_ast::AssignOp;
10
11use super::{LexResult, Lexer, LexerTrait};
12use crate::{
13    common::lexer::{char::CharExt, pos_span},
14    error::SyntaxError,
15    token::{BinOpToken, IdentLike, Keyword, KnownIdent, Token, Word},
16};
17
18pub(super) type ByteHandler = Option<for<'aa> fn(&mut Lexer<'aa>) -> LexResult<Token>>;
19
20/// Lookup table mapping any incoming byte to a handler function defined below.
21pub(super) static BYTE_HANDLERS: [ByteHandler; 256] = [
22    //   0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F   //
23    EOF, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 0
24    ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1
25    ___, EXL, QOT, HSH, IDN, PRC, AMP, QOT, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2
26    ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, MOR, QST, // 3
27    AT_, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, // 4
28    IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, IDN, BTO, IDN, BTC, CRT, IDN, // 5
29    TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, L_H, L_I, L_J, L_K, L_L, L_M, L_N, L_O, // 6
30    L_P, L_Q, L_R, L_S, L_T, L_U, L_V, L_W, L_X, L_Y, L_Z, BEO, PIP, BEC, TLD, ERR, // 7
31    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 8
32    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 9
33    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // A
34    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // B
35    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C
36    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D
37    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E
38    UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F
39];
40
41const ___: ByteHandler = None;
42
43const EOF: ByteHandler = Some(|lexer| {
44    lexer.input.bump_bytes(1);
45
46    Ok(Token::Eof)
47});
48
49const ERR: ByteHandler = Some(|lexer| {
50    let c = unsafe {
51        // Safety: Byte handler is only called for non-last chracters
52        lexer.input.cur().unwrap_unchecked()
53    };
54
55    let start = lexer.cur_pos();
56    unsafe {
57        // Safety: Byte handler is only called for non-last chracters
58        lexer.input.bump();
59    }
60    lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
61});
62
63/// Identifier and we know that this cannot be a keyword or known ident.
64const IDN: ByteHandler = Some(|lexer| lexer.read_ident_unknown());
65
66const L_A: ByteHandler = Some(|lexer| {
67    lexer.read_keyword_with(&|s| match s {
68        "abstract" => Some(Token::Word(Word::Ident(IdentLike::Known(
69            KnownIdent::Abstract,
70        )))),
71        "as" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::As)))),
72        "await" => Some(Token::Word(Word::Keyword(Keyword::Await))),
73        "async" => Some(Token::Word(Word::Ident(IdentLike::Known(
74            KnownIdent::Async,
75        )))),
76        "assert" => Some(Token::Word(Word::Ident(IdentLike::Known(
77            KnownIdent::Assert,
78        )))),
79        "asserts" => Some(Token::Word(Word::Ident(IdentLike::Known(
80            KnownIdent::Asserts,
81        )))),
82        "any" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Any)))),
83        "accessor" => Some(Token::Word(Word::Ident(IdentLike::Known(
84            KnownIdent::Accessor,
85        )))),
86        _ => None,
87    })
88});
89
90const L_B: ByteHandler = Some(|lexer| {
91    lexer.read_keyword_with(&|s| match s {
92        "break" => Some(Token::Word(Word::Keyword(Keyword::Break))),
93        "boolean" => Some(Token::Word(Word::Ident(IdentLike::Known(
94            KnownIdent::Boolean,
95        )))),
96        "bigint" => Some(Token::Word(Word::Ident(IdentLike::Known(
97            KnownIdent::Bigint,
98        )))),
99        _ => None,
100    })
101});
102
103const L_C: ByteHandler = Some(|lexer| {
104    lexer.read_keyword_with(&|s| match s {
105        "case" => Some(Token::Word(Word::Keyword(Keyword::Case))),
106        "catch" => Some(Token::Word(Word::Keyword(Keyword::Catch))),
107        "class" => Some(Token::Word(Word::Keyword(Keyword::Class))),
108        "const" => Some(Token::Word(Word::Keyword(Keyword::Const))),
109        "continue" => Some(Token::Word(Word::Keyword(Keyword::Continue))),
110        _ => None,
111    })
112});
113
114const L_D: ByteHandler = Some(|lexer| {
115    lexer.read_keyword_with(&|s| match s {
116        "debugger" => Some(Token::Word(Word::Keyword(Keyword::Debugger))),
117        "default" => Some(Token::Word(Word::Keyword(Keyword::Default_))),
118        "delete" => Some(Token::Word(Word::Keyword(Keyword::Delete))),
119        "do" => Some(Token::Word(Word::Keyword(Keyword::Do))),
120        "declare" => Some(Token::Word(Word::Ident(IdentLike::Known(
121            KnownIdent::Declare,
122        )))),
123        _ => None,
124    })
125});
126
127const L_E: ByteHandler = Some(|lexer| {
128    lexer.read_keyword_with(&|s| match s {
129        "else" => Some(Token::Word(Word::Keyword(Keyword::Else))),
130        "enum" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Enum)))),
131        "export" => Some(Token::Word(Word::Keyword(Keyword::Export))),
132        "extends" => Some(Token::Word(Word::Keyword(Keyword::Extends))),
133        _ => None,
134    })
135});
136
137const L_F: ByteHandler = Some(|lexer| {
138    lexer.read_keyword_with(&|s| match s {
139        "false" => Some(Token::Word(Word::False)),
140        "finally" => Some(Token::Word(Word::Keyword(Keyword::Finally))),
141        "for" => Some(Token::Word(Word::Keyword(Keyword::For))),
142        "function" => Some(Token::Word(Word::Keyword(Keyword::Function))),
143        "from" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::From)))),
144        _ => None,
145    })
146});
147
148const L_G: ByteHandler = Some(|lexer| {
149    lexer.read_keyword_with(&|s| match s {
150        "global" => Some(Token::Word(Word::Ident(IdentLike::Known(
151            KnownIdent::Global,
152        )))),
153        "get" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Get)))),
154        _ => None,
155    })
156});
157
158const L_H: ByteHandler = IDN;
159
160const L_I: ByteHandler = Some(|lexer| {
161    lexer.read_keyword_with(&|s| match s {
162        "if" => Some(Token::Word(Word::Keyword(Keyword::If))),
163        "import" => Some(Token::Word(Word::Keyword(Keyword::Import))),
164        "in" => Some(Token::Word(Word::Keyword(Keyword::In))),
165        "instanceof" => Some(Token::Word(Word::Keyword(Keyword::InstanceOf))),
166        "is" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Is)))),
167        "infer" => Some(Token::Word(Word::Ident(IdentLike::Known(
168            KnownIdent::Infer,
169        )))),
170        "interface" => Some(Token::Word(Word::Ident(IdentLike::Known(
171            KnownIdent::Interface,
172        )))),
173        "implements" => Some(Token::Word(Word::Ident(IdentLike::Known(
174            KnownIdent::Implements,
175        )))),
176        "intrinsic" => Some(Token::Word(Word::Ident(IdentLike::Known(
177            KnownIdent::Intrinsic,
178        )))),
179        _ => None,
180    })
181});
182
183const L_J: ByteHandler = IDN;
184
185const L_K: ByteHandler = Some(|lexer| {
186    lexer.read_keyword_with(&|s| match s {
187        "keyof" => Some(Token::Word(Word::Ident(IdentLike::Known(
188            KnownIdent::Keyof,
189        )))),
190        _ => None,
191    })
192});
193
194const L_L: ByteHandler = Some(|lexer| {
195    lexer.read_keyword_with(&|s| match s {
196        "let" => Some(Token::Word(Word::Keyword(Keyword::Let))),
197        _ => None,
198    })
199});
200
201const L_M: ByteHandler = Some(|lexer| {
202    lexer.read_keyword_with(&|s| match s {
203        "meta" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Meta)))),
204        _ => None,
205    })
206});
207
208const L_N: ByteHandler = Some(|lexer| {
209    lexer.read_keyword_with(&|s| match s {
210        "new" => Some(Token::Word(Word::Keyword(Keyword::New))),
211        "null" => Some(Token::Word(Word::Null)),
212        "number" => Some(Token::Word(Word::Ident(IdentLike::Known(
213            KnownIdent::Number,
214        )))),
215        "never" => Some(Token::Word(Word::Ident(IdentLike::Known(
216            KnownIdent::Never,
217        )))),
218        "namespace" => Some(Token::Word(Word::Ident(IdentLike::Known(
219            KnownIdent::Namespace,
220        )))),
221        _ => None,
222    })
223});
224
225const L_O: ByteHandler = Some(|lexer| {
226    lexer.read_keyword_with(&|s| match s {
227        "of" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Of)))),
228        "object" => Some(Token::Word(Word::Ident(IdentLike::Known(
229            KnownIdent::Object,
230        )))),
231        _ => None,
232    })
233});
234
235const L_P: ByteHandler = Some(|lexer| {
236    lexer.read_keyword_with(&|s| match s {
237        "public" => Some(Token::Word(Word::Ident(IdentLike::Known(
238            KnownIdent::Public,
239        )))),
240        "package" => Some(Token::Word(Word::Ident(IdentLike::Known(
241            KnownIdent::Package,
242        )))),
243        "protected" => Some(Token::Word(Word::Ident(IdentLike::Known(
244            KnownIdent::Protected,
245        )))),
246        "private" => Some(Token::Word(Word::Ident(IdentLike::Known(
247            KnownIdent::Private,
248        )))),
249        _ => None,
250    })
251});
252
253const L_Q: ByteHandler = IDN;
254
255const L_R: ByteHandler = Some(|lexer| {
256    lexer.read_keyword_with(&|s| match s {
257        "return" => Some(Token::Word(Word::Keyword(Keyword::Return))),
258        "readonly" => Some(Token::Word(Word::Ident(IdentLike::Known(
259            KnownIdent::Readonly,
260        )))),
261        "require" => Some(Token::Word(Word::Ident(IdentLike::Known(
262            KnownIdent::Require,
263        )))),
264        _ => None,
265    })
266});
267
268const L_S: ByteHandler = Some(|lexer| {
269    lexer.read_keyword_with(&|s| match s {
270        "super" => Some(Token::Word(Word::Keyword(Keyword::Super))),
271        "static" => Some(Token::Word(Word::Ident(IdentLike::Known(
272            KnownIdent::Static,
273        )))),
274        "switch" => Some(Token::Word(Word::Keyword(Keyword::Switch))),
275        "symbol" => Some(Token::Word(Word::Ident(IdentLike::Known(
276            KnownIdent::Symbol,
277        )))),
278        "set" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Set)))),
279        "string" => Some(Token::Word(Word::Ident(IdentLike::Known(
280            KnownIdent::String,
281        )))),
282        "satisfies" => Some(Token::Word(Word::Ident(IdentLike::Known(
283            KnownIdent::Satisfies,
284        )))),
285        _ => None,
286    })
287});
288
289const L_T: ByteHandler = Some(|lexer| {
290    lexer.read_keyword_with(&|s| match s {
291        "this" => Some(Token::Word(Word::Keyword(Keyword::This))),
292        "throw" => Some(Token::Word(Word::Keyword(Keyword::Throw))),
293        "true" => Some(Token::Word(Word::True)),
294        "typeof" => Some(Token::Word(Word::Keyword(Keyword::TypeOf))),
295        "try" => Some(Token::Word(Word::Keyword(Keyword::Try))),
296        "type" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Type)))),
297        "target" => Some(Token::Word(Word::Ident(IdentLike::Known(
298            KnownIdent::Target,
299        )))),
300        _ => None,
301    })
302});
303
304const L_U: ByteHandler = Some(|lexer| {
305    lexer.read_keyword_with(&|s| match s {
306        "using" => Some(Token::Word(Word::Ident(IdentLike::Known(
307            KnownIdent::Using,
308        )))),
309        "unique" => Some(Token::Word(Word::Ident(IdentLike::Known(
310            KnownIdent::Unique,
311        )))),
312        "undefined" => Some(Token::Word(Word::Ident(IdentLike::Known(
313            KnownIdent::Undefined,
314        )))),
315        "unknown" => Some(Token::Word(Word::Ident(IdentLike::Known(
316            KnownIdent::Unknown,
317        )))),
318        _ => None,
319    })
320});
321
322const L_V: ByteHandler = Some(|lexer| {
323    lexer.read_keyword_with(&|s| match s {
324        "var" => Some(Token::Word(Word::Keyword(Keyword::Var))),
325        "void" => Some(Token::Word(Word::Keyword(Keyword::Void))),
326        _ => None,
327    })
328});
329
330const L_W: ByteHandler = Some(|lexer| {
331    lexer.read_keyword_with(&|s| match s {
332        "while" => Some(Token::Word(Word::Keyword(Keyword::While))),
333        "with" => Some(Token::Word(Word::Keyword(Keyword::With))),
334        _ => None,
335    })
336});
337
338const L_X: ByteHandler = IDN;
339
340const L_Y: ByteHandler = Some(|lexer| {
341    lexer.read_keyword_with(&|s| match s {
342        "yield" => Some(Token::Word(Word::Keyword(Keyword::Yield))),
343        _ => None,
344    })
345});
346
347const L_Z: ByteHandler = IDN;
348
349/// `0`
350const ZER: ByteHandler = Some(|lexer| lexer.read_token_zero());
351
352/// Numbers
353const DIG: ByteHandler = Some(|lexer| {
354    debug_assert!(lexer.cur().is_some_and(|cur| cur != '0'));
355    lexer.read_number::<false, false>().map(|v| match v {
356        Either::Left((value, raw)) => Token::Num { value, raw },
357        Either::Right((value, raw)) => Token::BigInt { value, raw },
358    })
359});
360
361/// String literals with `'` or `"`
362const QOT: ByteHandler = Some(|lexer| lexer.read_str_lit());
363
364/// Unicode
365const UNI: ByteHandler = Some(|lexer| {
366    let c = unsafe {
367        // Safety: Byte handler is only called for non-last chracters
368        lexer.input.cur().unwrap_unchecked()
369    };
370
371    // Identifier or keyword. '\uXXXX' sequences are allowed in
372    // identifiers, so '\' also dispatches to that.
373    if c == '\\' || c.is_ident_start() {
374        return lexer.read_ident_unknown();
375    }
376
377    let start = lexer.cur_pos();
378    unsafe {
379        // Safety: Byte handler is only called for non-last chracters
380        lexer.input.bump();
381    }
382    lexer.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
383});
384
385/// `:`
386const COL: ByteHandler = Some(|lexer| lexer.read_token_colon());
387
388/// `%`
389const PRC: ByteHandler = Some(|lexer| lexer.read_token_mul_mod(false));
390
391/// `*`
392const ATR: ByteHandler = Some(|lexer| lexer.read_token_mul_mod(true));
393
394/// `?`
395const QST: ByteHandler = Some(|lexer| lexer.read_token_question_mark());
396
397/// `&`
398const AMP: ByteHandler = Some(|lexer| lexer.read_token_logical::<b'&'>());
399
400/// `|`
401const PIP: ByteHandler = Some(|lexer| lexer.read_token_logical::<b'|'>());
402
403macro_rules! single_char {
404    ($name:ident, $c:literal, $token:ident) => {
405        const $name: ByteHandler = Some(|lexer| {
406            lexer.input.bump_bytes(1);
407            Ok(Token::$token)
408        });
409    };
410}
411
412single_char!(SEM, b';', Semi);
413single_char!(COM, b',', Comma);
414single_char!(TPL, b'`', BackQuote);
415single_char!(TLD, b'~', Tilde);
416single_char!(AT_, b'@', At);
417
418single_char!(PNO, b'(', LParen);
419single_char!(PNC, b')', RParen);
420
421single_char!(BTO, b'[', LBracket);
422single_char!(BTC, b']', RBracket);
423
424single_char!(BEO, b'{', LBrace);
425single_char!(BEC, b'}', RBrace);
426
427/// `^`
428const CRT: ByteHandler = Some(|lexer| {
429    // Bitwise xor
430    lexer.input.bump_bytes(1);
431    Ok(if lexer.input.cur_as_ascii() == Some(b'=') {
432        lexer.input.bump_bytes(1);
433        Token::AssignOp(AssignOp::BitXorAssign)
434    } else {
435        Token::BinOp(BinOpToken::BitXor)
436    })
437});
438
439/// `+`
440const PLS: ByteHandler = Some(|lexer| lexer.read_token_plus_minus::<b'+'>());
441
442/// `-`
443const MIN: ByteHandler = Some(|lexer| lexer.read_token_plus_minus::<b'-'>());
444
445/// `!`
446const EXL: ByteHandler = Some(|lexer| lexer.read_token_bang_or_eq::<b'!'>());
447
448/// `=`
449const EQL: ByteHandler = Some(|lexer| lexer.read_token_bang_or_eq::<b'='>());
450
451/// `.`
452const PRD: ByteHandler = Some(|lexer| lexer.read_token_dot());
453
454/// `<`
455const LSS: ByteHandler = Some(|lexer| lexer.read_token_lt_gt::<b'<'>());
456
457/// `>`
458const MOR: ByteHandler = Some(|lexer| lexer.read_token_lt_gt::<b'>'>());
459
460/// `/`
461const SLH: ByteHandler = Some(|lexer| lexer.read_slash());
462
463/// `#`
464const HSH: ByteHandler = Some(|lexer| lexer.read_token_number_sign());