swc_ecma_lexer/lexer/
mod.rs

1//! ECMAScript lexer.
2
3use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
4
5use swc_atoms::{wtf8::Wtf8, AtomStoreCell};
6use swc_common::{
7    comments::Comments,
8    input::{Input, StringInput},
9    BytePos, Span,
10};
11use swc_ecma_ast::{AssignOp, EsVersion};
12
13pub use self::state::{TokenContext, TokenContexts, TokenFlags, TokenType};
14use self::table::{ByteHandler, BYTE_HANDLERS};
15use crate::{
16    common::{
17        lexer::{char::CharExt, fixed_len_span, pos_span, LexResult, Lexer as LexerTrait},
18        syntax::{Syntax, SyntaxFlags},
19    },
20    error::{Error, SyntaxError},
21    lexer::comments_buffer::CommentsBuffer,
22    tok,
23    token::{BinOpToken, Token, TokenAndSpan},
24    Context,
25};
26
27mod comments_buffer;
28mod jsx;
29mod number;
30mod state;
31mod table;
32#[cfg(test)]
33mod tests;
34
35#[derive(Clone)]
36pub struct Lexer<'a> {
37    comments: Option<&'a dyn Comments>,
38    /// [Some] if comment comment parsing is enabled. Otherwise [None]
39    comments_buffer: Option<CommentsBuffer>,
40
41    pub ctx: Context,
42    input: StringInput<'a>,
43    start_pos: BytePos,
44
45    state: self::state::State,
46    pub(crate) syntax: SyntaxFlags,
47    pub(crate) target: EsVersion,
48
49    errors: Rc<RefCell<Vec<Error>>>,
50    module_errors: Rc<RefCell<Vec<Error>>>,
51
52    atoms: Rc<AtomStoreCell>,
53}
54
55impl FusedIterator for Lexer<'_> {}
56
57impl<'a> crate::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> {
58    type CommentsBuffer = CommentsBuffer;
59    type State = self::state::State;
60    type Token = self::Token;
61
62    #[inline(always)]
63    fn input(&self) -> &StringInput<'a> {
64        &self.input
65    }
66
67    #[inline(always)]
68    fn input_mut(&mut self) -> &mut StringInput<'a> {
69        &mut self.input
70    }
71
72    #[inline(always)]
73    fn push_error(&mut self, error: crate::error::Error) {
74        self.errors.borrow_mut().push(error);
75    }
76
77    #[inline(always)]
78    fn state(&self) -> &Self::State {
79        &self.state
80    }
81
82    #[inline(always)]
83    fn state_mut(&mut self) -> &mut Self::State {
84        &mut self.state
85    }
86
87    #[inline(always)]
88    fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
89        self.comments
90    }
91
92    #[inline(always)]
93    fn comments_buffer(&self) -> Option<&Self::CommentsBuffer> {
94        self.comments_buffer.as_ref()
95    }
96
97    #[inline(always)]
98    fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer> {
99        self.comments_buffer.as_mut()
100    }
101
102    #[inline(always)]
103    unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
104        self.input.slice(start, end)
105    }
106
107    #[inline(always)]
108    fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
109        self.input_mut().uncons_while(f)
110    }
111
112    #[inline(always)]
113    fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
114        self.atoms.atom(s)
115    }
116
117    #[inline(always)]
118    fn wtf8_atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, Wtf8>>) -> swc_atoms::Wtf8Atom {
119        self.atoms.wtf8_atom(s)
120    }
121}
122
123impl<'a> Lexer<'a> {
124    pub fn new(
125        syntax: Syntax,
126        target: EsVersion,
127        input: StringInput<'a>,
128        comments: Option<&'a dyn Comments>,
129    ) -> Self {
130        let start_pos = input.last_pos();
131        let syntax_flags = syntax.into_flags();
132
133        Lexer {
134            comments,
135            comments_buffer: comments.is_some().then(CommentsBuffer::new),
136            ctx: Default::default(),
137            input,
138            start_pos,
139            state: self::state::State::new(syntax_flags, start_pos),
140            syntax: syntax_flags,
141            target,
142            errors: Default::default(),
143            module_errors: Default::default(),
144            atoms: Default::default(),
145        }
146    }
147
148    /// babel: `getTokenFromCode`
149    fn read_token(&mut self) -> LexResult<Token> {
150        let byte = match self.input.as_str().as_bytes().first() {
151            Some(&v) => v,
152            None => return Ok(Token::Eof),
153        };
154
155        let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
156
157        match handler {
158            Some(handler) => handler(self),
159            None => {
160                let start = self.cur_pos();
161                self.input.bump_bytes(1);
162                self.error_span(
163                    pos_span(start),
164                    SyntaxError::UnexpectedChar { c: byte as _ },
165                )
166            }
167        }
168    }
169
170    fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Token> {
171        let start = self.cur_pos();
172
173        unsafe {
174            // Safety: cur() is Some(c), if this method is called.
175            self.input.bump();
176        }
177
178        // '++', '--'
179        Ok(if self.input.cur() == Some(C as char) {
180            unsafe {
181                // Safety: cur() is Some(c)
182                self.input.bump();
183            }
184
185            // Handle -->
186            if self.state.had_line_break && C == b'-' && self.eat(b'>') {
187                self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
188                self.skip_line_comment(0);
189                self.skip_space::<true>();
190                return self.read_token();
191            }
192
193            if C == b'+' {
194                Token::PlusPlus
195            } else {
196                Token::MinusMinus
197            }
198        } else if self.input.eat_byte(b'=') {
199            Token::AssignOp(if C == b'+' {
200                AssignOp::AddAssign
201            } else {
202                AssignOp::SubAssign
203            })
204        } else {
205            Token::BinOp(if C == b'+' {
206                BinOpToken::Add
207            } else {
208                BinOpToken::Sub
209            })
210        })
211    }
212
213    fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Token> {
214        let start = self.cur_pos();
215        let had_line_break_before_last = self.had_line_break_before_last();
216
217        unsafe {
218            // Safety: cur() is Some(c) if this method is called.
219            self.input.bump();
220        }
221
222        Ok(if self.input.eat_byte(b'=') {
223            // "=="
224
225            if self.input.eat_byte(b'=') {
226                if C == b'!' {
227                    Token::BinOp(BinOpToken::NotEqEq)
228                } else {
229                    // =======
230                    //    ^
231                    if had_line_break_before_last && self.is_str("====") {
232                        self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
233                        self.skip_line_comment(4);
234                        self.skip_space::<true>();
235                        return self.read_token();
236                    }
237
238                    Token::BinOp(BinOpToken::EqEqEq)
239                }
240            } else if C == b'!' {
241                Token::BinOp(BinOpToken::NotEq)
242            } else {
243                Token::BinOp(BinOpToken::EqEq)
244            }
245        } else if C == b'=' && self.input.eat_byte(b'>') {
246            // "=>"
247
248            Token::Arrow
249        } else if C == b'!' {
250            Token::Bang
251        } else {
252            Token::AssignOp(AssignOp::Assign)
253        })
254    }
255}
256
257impl Lexer<'_> {
258    #[inline(never)]
259    fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Token> {
260        let had_line_break_before_last = self.had_line_break_before_last();
261        let start = self.cur_pos();
262        self.bump();
263
264        if self.syntax.typescript()
265            && self.ctx.contains(Context::InType)
266            && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
267        {
268            if C == b'<' {
269                return Ok(tok!('<'));
270            } else if C == b'>' {
271                return Ok(tok!('>'));
272            }
273        }
274
275        // XML style comment. `<!--`
276        if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
277        {
278            self.skip_line_comment(3);
279            self.skip_space::<true>();
280            self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
281
282            return self.read_token();
283        }
284
285        let mut op = if C == b'<' {
286            BinOpToken::Lt
287        } else {
288            BinOpToken::Gt
289        };
290
291        // '<<', '>>'
292        if self.cur() == Some(C as char) {
293            self.bump();
294            op = if C == b'<' {
295                BinOpToken::LShift
296            } else {
297                BinOpToken::RShift
298            };
299
300            //'>>>'
301            if C == b'>' && self.cur() == Some(C as char) {
302                self.bump();
303                op = BinOpToken::ZeroFillRShift;
304            }
305        }
306
307        let token = if self.eat(b'=') {
308            match op {
309                BinOpToken::Lt => Token::BinOp(BinOpToken::LtEq),
310                BinOpToken::Gt => Token::BinOp(BinOpToken::GtEq),
311                BinOpToken::LShift => Token::AssignOp(AssignOp::LShiftAssign),
312                BinOpToken::RShift => Token::AssignOp(AssignOp::RShiftAssign),
313                BinOpToken::ZeroFillRShift => Token::AssignOp(AssignOp::ZeroFillRShiftAssign),
314                _ => unreachable!(),
315            }
316        } else {
317            Token::BinOp(op)
318        };
319
320        // All conflict markers consist of the same character repeated seven times.
321        // If it is a <<<<<<< or >>>>>>> marker then it is also followed by a space.
322        // <<<<<<<
323        //   ^
324        // >>>>>>>
325        //    ^
326        if had_line_break_before_last
327            && match op {
328                BinOpToken::LShift if self.is_str("<<<<< ") => true,
329                BinOpToken::ZeroFillRShift if self.is_str(">>>> ") => true,
330                _ => false,
331            }
332        {
333            self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
334            self.skip_line_comment(5);
335            self.skip_space::<true>();
336            return self.read_token();
337        }
338
339        Ok(token)
340    }
341}