swc_ecma_parser/lexer/
mod.rs

1//! ECMAScript lexer.
2
3use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
4
5use swc_atoms::AtomStoreCell;
6use swc_common::{
7    comments::Comments,
8    input::{Input, StringInput},
9    BytePos,
10};
11use swc_ecma_ast::EsVersion;
12use swc_ecma_lexer::{
13    common::{
14        lexer::{
15            char::CharExt, comments_buffer::CommentsBuffer, fixed_len_span, pos_span, LexResult,
16            Lexer as LexerTrait,
17        },
18        syntax::SyntaxFlags,
19    },
20    lexer::TokenFlags,
21};
22
23use self::table::{ByteHandler, BYTE_HANDLERS};
24use crate::{
25    error::{Error, SyntaxError},
26    input::Tokens,
27    Context, Syntax,
28};
29
30#[cfg(feature = "unstable")]
31pub(crate) mod capturing;
32mod state;
33mod table;
34pub(crate) mod token;
35
36pub(crate) use token::{NextTokenAndSpan, Token, TokenAndSpan, TokenValue};
37
38#[derive(Clone)]
39pub struct Lexer<'a> {
40    comments: Option<&'a dyn Comments>,
41    /// [Some] if comment comment parsing is enabled. Otherwise [None]
42    comments_buffer: Option<CommentsBuffer>,
43
44    pub ctx: Context,
45    input: StringInput<'a>,
46    start_pos: BytePos,
47
48    state: self::state::State,
49    token_flags: TokenFlags,
50    pub(crate) syntax: SyntaxFlags,
51    pub(crate) target: EsVersion,
52
53    errors: Rc<RefCell<Vec<Error>>>,
54    module_errors: Rc<RefCell<Vec<Error>>>,
55
56    atoms: Rc<AtomStoreCell>,
57}
58
59impl FusedIterator for Lexer<'_> {}
60
61impl<'a> swc_ecma_lexer::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> {
62    type State = self::state::State;
63    type Token = self::Token;
64
65    #[inline(always)]
66    fn input(&self) -> &StringInput<'a> {
67        &self.input
68    }
69
70    #[inline(always)]
71    fn input_mut(&mut self) -> &mut StringInput<'a> {
72        &mut self.input
73    }
74
75    #[inline(always)]
76    fn push_error(&self, error: Error) {
77        self.errors.borrow_mut().push(error);
78    }
79
80    #[inline(always)]
81    fn state(&self) -> &Self::State {
82        &self.state
83    }
84
85    #[inline(always)]
86    fn state_mut(&mut self) -> &mut Self::State {
87        &mut self.state
88    }
89
90    #[inline(always)]
91    fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
92        self.comments
93    }
94
95    #[inline(always)]
96    fn comments_buffer(
97        &self,
98    ) -> Option<&swc_ecma_lexer::common::lexer::comments_buffer::CommentsBuffer> {
99        self.comments_buffer.as_ref()
100    }
101
102    #[inline(always)]
103    fn comments_buffer_mut(
104        &mut self,
105    ) -> Option<&mut swc_ecma_lexer::common::lexer::comments_buffer::CommentsBuffer> {
106        self.comments_buffer.as_mut()
107    }
108
109    #[inline(always)]
110    unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
111        self.input.slice(start, end)
112    }
113
114    #[inline(always)]
115    fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
116        self.input_mut().uncons_while(f)
117    }
118
119    #[inline(always)]
120    fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
121        self.atoms.atom(s)
122    }
123}
124
125impl<'a> Lexer<'a> {
126    pub fn new(
127        syntax: Syntax,
128        target: EsVersion,
129        input: StringInput<'a>,
130        comments: Option<&'a dyn Comments>,
131    ) -> Self {
132        let start_pos = input.last_pos();
133
134        Lexer {
135            comments,
136            comments_buffer: comments.is_some().then(CommentsBuffer::new),
137            ctx: Default::default(),
138            input,
139            start_pos,
140            state: self::state::State::new(start_pos),
141            syntax: syntax.into_flags(),
142            target,
143            errors: Default::default(),
144            module_errors: Default::default(),
145            atoms: Default::default(),
146            token_flags: TokenFlags::empty(),
147        }
148    }
149
150    /// babel: `getTokenFromCode`
151    fn read_token(&mut self) -> LexResult<Option<Token>> {
152        self.token_flags = TokenFlags::empty();
153        let byte = match self.input.as_str().as_bytes().first() {
154            Some(&v) => v,
155            None => return Ok(None),
156        };
157
158        let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
159
160        match handler {
161            Some(handler) => handler(self),
162            None => {
163                let start = self.cur_pos();
164                self.input.bump_bytes(1);
165                self.error_span(
166                    pos_span(start),
167                    SyntaxError::UnexpectedChar { c: byte as _ },
168                )
169            }
170        }
171    }
172
173    fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Option<Token>> {
174        let start = self.cur_pos();
175
176        unsafe {
177            // Safety: cur() is Some(c), if this method is called.
178            self.input.bump();
179        }
180
181        // '++', '--'
182        Ok(Some(if self.input.cur() == Some(C as char) {
183            unsafe {
184                // Safety: cur() is Some(c)
185                self.input.bump();
186            }
187
188            // Handle -->
189            if self.state.had_line_break && C == b'-' && self.eat(b'>') {
190                self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
191                self.skip_line_comment(0);
192                self.skip_space::<true>();
193                return self.read_token();
194            }
195
196            if C == b'+' {
197                Token::PlusPlus
198            } else {
199                Token::MinusMinus
200            }
201        } else if self.input.eat_byte(b'=') {
202            if C == b'+' {
203                Token::PlusEq
204            } else {
205                Token::MinusEq
206            }
207        } else if C == b'+' {
208            Token::Plus
209        } else {
210            Token::Minus
211        }))
212    }
213
214    fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Option<Token>> {
215        let start = self.cur_pos();
216        let had_line_break_before_last = self.had_line_break_before_last();
217
218        unsafe {
219            // Safety: cur() is Some(c) if this method is called.
220            self.input.bump();
221        }
222
223        Ok(Some(if self.input.eat_byte(b'=') {
224            // "=="
225
226            if self.input.eat_byte(b'=') {
227                if C == b'!' {
228                    Token::NotEqEq
229                } else {
230                    // =======
231                    //    ^
232                    if had_line_break_before_last && self.is_str("====") {
233                        self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
234                        self.skip_line_comment(4);
235                        self.skip_space::<true>();
236                        return self.read_token();
237                    }
238
239                    Token::EqEqEq
240                }
241            } else if C == b'!' {
242                Token::NotEq
243            } else {
244                Token::EqEq
245            }
246        } else if C == b'=' && self.input.eat_byte(b'>') {
247            // "=>"
248
249            Token::Arrow
250        } else if C == b'!' {
251            Token::Bang
252        } else {
253            Token::Eq
254        }))
255    }
256}
257
258impl Lexer<'_> {
259    #[inline(never)]
260    fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Option<Token>> {
261        let had_line_break_before_last = self.had_line_break_before_last();
262        let start = self.cur_pos();
263        self.bump();
264
265        if self.syntax.typescript()
266            && self.ctx.contains(Context::InType)
267            && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
268        {
269            if C == b'<' {
270                return Ok(Some(Token::Lt));
271            } else if C == b'>' {
272                return Ok(Some(Token::Gt));
273            }
274        }
275
276        // XML style comment. `<!--`
277        if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
278        {
279            self.skip_line_comment(3);
280            self.skip_space::<true>();
281            self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
282
283            return self.read_token();
284        }
285
286        let mut op = if C == b'<' { Token::Lt } else { Token::Gt };
287
288        // '<<', '>>'
289        if self.cur() == Some(C as char) {
290            self.bump();
291            op = if C == b'<' {
292                Token::LShift
293            } else {
294                Token::RShift
295            };
296
297            //'>>>'
298            if C == b'>' && self.cur() == Some(C as char) {
299                self.bump();
300                op = Token::ZeroFillRShift;
301            }
302        }
303
304        let token = if self.eat(b'=') {
305            match op {
306                Token::Lt => Token::LtEq,
307                Token::Gt => Token::GtEq,
308                Token::LShift => Token::LShiftEq,
309                Token::RShift => Token::RShiftEq,
310                Token::ZeroFillRShift => Token::ZeroFillRShiftEq,
311                _ => unreachable!(),
312            }
313        } else {
314            op
315        };
316
317        // All conflict markers consist of the same character repeated seven times.
318        // If it is a <<<<<<< or >>>>>>> marker then it is also followed by a space.
319        // <<<<<<<
320        //   ^
321        // >>>>>>>
322        //    ^
323        if had_line_break_before_last
324            && match op {
325                Token::LShift if self.is_str("<<<<< ") => true,
326                Token::ZeroFillRShift if self.is_str(">>>> ") => true,
327                _ => false,
328            }
329        {
330            self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
331            self.skip_line_comment(5);
332            self.skip_space::<true>();
333            return self.read_token();
334        }
335
336        Ok(Some(token))
337    }
338
339    fn read_token_back_quote(&mut self) -> LexResult<Option<Token>> {
340        let start = self.cur_pos();
341        self.scan_template_token(start, true).map(Some)
342    }
343
344    fn scan_template_token(
345        &mut self,
346        start: BytePos,
347        started_with_backtick: bool,
348    ) -> LexResult<Token> {
349        debug_assert!(self.cur() == Some(if started_with_backtick { '`' } else { '}' }));
350        let mut cooked = Ok(String::with_capacity(8));
351        self.bump(); // `}` or `\``
352        let mut cooked_slice_start = self.cur_pos();
353        let raw_slice_start = cooked_slice_start;
354        let raw_atom = |this: &mut Self| {
355            let last_pos = this.cur_pos();
356            let s = unsafe { this.input.slice(raw_slice_start, last_pos) };
357            this.atoms.atom(s)
358        };
359        macro_rules! consume_cooked {
360            () => {{
361                if let Ok(cooked) = &mut cooked {
362                    let last_pos = self.cur_pos();
363                    cooked.push_str(unsafe {
364                        // Safety: Both of start and last_pos are valid position because we got them
365                        // from `self.input`
366                        self.input.slice(cooked_slice_start, last_pos)
367                    });
368                }
369            }};
370        }
371
372        while let Some(c) = self.cur() {
373            if c == '`' {
374                consume_cooked!();
375                let cooked = cooked.map(|cooked| self.atoms.atom(cooked));
376                let raw = raw_atom(self);
377                self.bump();
378                return Ok(if started_with_backtick {
379                    self.set_token_value(Some(TokenValue::Template { raw, cooked }));
380                    Token::NoSubstitutionTemplateLiteral
381                } else {
382                    self.set_token_value(Some(TokenValue::Template { raw, cooked }));
383                    Token::TemplateTail
384                });
385            } else if c == '$' && self.input.peek() == Some('{') {
386                consume_cooked!();
387                let cooked = cooked.map(|cooked| self.atoms.atom(cooked));
388                let raw = raw_atom(self);
389                self.input.bump_bytes(2);
390                return Ok(if started_with_backtick {
391                    self.set_token_value(Some(TokenValue::Template { raw, cooked }));
392                    Token::TemplateHead
393                } else {
394                    self.set_token_value(Some(TokenValue::Template { raw, cooked }));
395                    Token::TemplateMiddle
396                });
397            } else if c == '\\' {
398                consume_cooked!();
399
400                match self.read_escaped_char(true) {
401                    Ok(Some(chars)) => {
402                        if let Ok(ref mut cooked) = cooked {
403                            for c in chars {
404                                cooked.extend(c);
405                            }
406                        }
407                    }
408                    Ok(None) => {}
409                    Err(error) => {
410                        cooked = Err(error);
411                    }
412                }
413
414                cooked_slice_start = self.cur_pos();
415            } else if c.is_line_terminator() {
416                consume_cooked!();
417
418                let c = if c == '\r' && self.peek() == Some('\n') {
419                    self.bump(); // '\r'
420                    '\n'
421                } else {
422                    match c {
423                        '\n' => '\n',
424                        '\r' => '\n',
425                        '\u{2028}' => '\u{2028}',
426                        '\u{2029}' => '\u{2029}',
427                        _ => unreachable!(),
428                    }
429                };
430
431                self.bump();
432
433                if let Ok(ref mut cooked) = cooked {
434                    cooked.push(c);
435                }
436                cooked_slice_start = self.cur_pos();
437            } else {
438                self.bump();
439            }
440        }
441
442        self.error(start, SyntaxError::UnterminatedTpl)?
443    }
444}