swc_ecma_lexer/lexer/
mod.rs

1//! ECMAScript lexer.
2
3use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
4
5use swc_atoms::AtomStoreCell;
6use swc_common::{
7    comments::Comments,
8    input::{Input, StringInput},
9    BytePos, Span,
10};
11use swc_ecma_ast::{AssignOp, EsVersion};
12
13pub use self::state::{TokenContext, TokenContexts, TokenFlags, TokenType};
14use self::table::{ByteHandler, BYTE_HANDLERS};
15use crate::{
16    common::{
17        lexer::{char::CharExt, fixed_len_span, pos_span, LexResult, Lexer as LexerTrait},
18        syntax::{Syntax, SyntaxFlags},
19    },
20    error::{Error, SyntaxError},
21    lexer::comments_buffer::CommentsBuffer,
22    tok,
23    token::{BinOpToken, Token, TokenAndSpan},
24    Context,
25};
26
27mod comments_buffer;
28mod jsx;
29mod number;
30mod state;
31mod table;
32#[cfg(test)]
33mod tests;
34
35#[derive(Clone)]
36pub struct Lexer<'a> {
37    comments: Option<&'a dyn Comments>,
38    /// [Some] if comment comment parsing is enabled. Otherwise [None]
39    comments_buffer: Option<CommentsBuffer>,
40
41    pub ctx: Context,
42    input: StringInput<'a>,
43    start_pos: BytePos,
44
45    state: self::state::State,
46    pub(crate) syntax: SyntaxFlags,
47    pub(crate) target: EsVersion,
48
49    errors: Rc<RefCell<Vec<Error>>>,
50    module_errors: Rc<RefCell<Vec<Error>>>,
51
52    atoms: Rc<AtomStoreCell>,
53}
54
55impl FusedIterator for Lexer<'_> {}
56
57impl<'a> crate::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> {
58    type CommentsBuffer = CommentsBuffer;
59    type State = self::state::State;
60    type Token = self::Token;
61
62    #[inline(always)]
63    fn input(&self) -> &StringInput<'a> {
64        &self.input
65    }
66
67    #[inline(always)]
68    fn input_mut(&mut self) -> &mut StringInput<'a> {
69        &mut self.input
70    }
71
72    #[inline(always)]
73    fn push_error(&mut self, error: crate::error::Error) {
74        self.errors.borrow_mut().push(error);
75    }
76
77    #[inline(always)]
78    fn state(&self) -> &Self::State {
79        &self.state
80    }
81
82    #[inline(always)]
83    fn state_mut(&mut self) -> &mut Self::State {
84        &mut self.state
85    }
86
87    #[inline(always)]
88    fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
89        self.comments
90    }
91
92    #[inline(always)]
93    fn comments_buffer(&self) -> Option<&Self::CommentsBuffer> {
94        self.comments_buffer.as_ref()
95    }
96
97    #[inline(always)]
98    fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer> {
99        self.comments_buffer.as_mut()
100    }
101
102    #[inline(always)]
103    unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
104        self.input.slice(start, end)
105    }
106
107    #[inline(always)]
108    fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
109        self.input_mut().uncons_while(f)
110    }
111
112    #[inline(always)]
113    fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
114        self.atoms.atom(s)
115    }
116}
117
118impl<'a> Lexer<'a> {
119    pub fn new(
120        syntax: Syntax,
121        target: EsVersion,
122        input: StringInput<'a>,
123        comments: Option<&'a dyn Comments>,
124    ) -> Self {
125        let start_pos = input.last_pos();
126        let syntax_flags = syntax.into_flags();
127
128        Lexer {
129            comments,
130            comments_buffer: comments.is_some().then(CommentsBuffer::new),
131            ctx: Default::default(),
132            input,
133            start_pos,
134            state: self::state::State::new(syntax_flags, start_pos),
135            syntax: syntax_flags,
136            target,
137            errors: Default::default(),
138            module_errors: Default::default(),
139            atoms: Default::default(),
140        }
141    }
142
143    /// babel: `getTokenFromCode`
144    fn read_token(&mut self) -> LexResult<Token> {
145        let byte = match self.input.as_str().as_bytes().first() {
146            Some(&v) => v,
147            None => return Ok(Token::Eof),
148        };
149
150        let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
151
152        match handler {
153            Some(handler) => handler(self),
154            None => {
155                let start = self.cur_pos();
156                self.input.bump_bytes(1);
157                self.error_span(
158                    pos_span(start),
159                    SyntaxError::UnexpectedChar { c: byte as _ },
160                )
161            }
162        }
163    }
164
165    fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Token> {
166        let start = self.cur_pos();
167
168        unsafe {
169            // Safety: cur() is Some(c), if this method is called.
170            self.input.bump();
171        }
172
173        // '++', '--'
174        Ok(if self.input.cur() == Some(C as char) {
175            unsafe {
176                // Safety: cur() is Some(c)
177                self.input.bump();
178            }
179
180            // Handle -->
181            if self.state.had_line_break && C == b'-' && self.eat(b'>') {
182                self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
183                self.skip_line_comment(0);
184                self.skip_space::<true>();
185                return self.read_token();
186            }
187
188            if C == b'+' {
189                Token::PlusPlus
190            } else {
191                Token::MinusMinus
192            }
193        } else if self.input.eat_byte(b'=') {
194            Token::AssignOp(if C == b'+' {
195                AssignOp::AddAssign
196            } else {
197                AssignOp::SubAssign
198            })
199        } else {
200            Token::BinOp(if C == b'+' {
201                BinOpToken::Add
202            } else {
203                BinOpToken::Sub
204            })
205        })
206    }
207
208    fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Token> {
209        let start = self.cur_pos();
210        let had_line_break_before_last = self.had_line_break_before_last();
211
212        unsafe {
213            // Safety: cur() is Some(c) if this method is called.
214            self.input.bump();
215        }
216
217        Ok(if self.input.eat_byte(b'=') {
218            // "=="
219
220            if self.input.eat_byte(b'=') {
221                if C == b'!' {
222                    Token::BinOp(BinOpToken::NotEqEq)
223                } else {
224                    // =======
225                    //    ^
226                    if had_line_break_before_last && self.is_str("====") {
227                        self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
228                        self.skip_line_comment(4);
229                        self.skip_space::<true>();
230                        return self.read_token();
231                    }
232
233                    Token::BinOp(BinOpToken::EqEqEq)
234                }
235            } else if C == b'!' {
236                Token::BinOp(BinOpToken::NotEq)
237            } else {
238                Token::BinOp(BinOpToken::EqEq)
239            }
240        } else if C == b'=' && self.input.eat_byte(b'>') {
241            // "=>"
242
243            Token::Arrow
244        } else if C == b'!' {
245            Token::Bang
246        } else {
247            Token::AssignOp(AssignOp::Assign)
248        })
249    }
250}
251
252impl Lexer<'_> {
253    #[inline(never)]
254    fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Token> {
255        let had_line_break_before_last = self.had_line_break_before_last();
256        let start = self.cur_pos();
257        self.bump();
258
259        if self.syntax.typescript()
260            && self.ctx.contains(Context::InType)
261            && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
262        {
263            if C == b'<' {
264                return Ok(tok!('<'));
265            } else if C == b'>' {
266                return Ok(tok!('>'));
267            }
268        }
269
270        // XML style comment. `<!--`
271        if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
272        {
273            self.skip_line_comment(3);
274            self.skip_space::<true>();
275            self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
276
277            return self.read_token();
278        }
279
280        let mut op = if C == b'<' {
281            BinOpToken::Lt
282        } else {
283            BinOpToken::Gt
284        };
285
286        // '<<', '>>'
287        if self.cur() == Some(C as char) {
288            self.bump();
289            op = if C == b'<' {
290                BinOpToken::LShift
291            } else {
292                BinOpToken::RShift
293            };
294
295            //'>>>'
296            if C == b'>' && self.cur() == Some(C as char) {
297                self.bump();
298                op = BinOpToken::ZeroFillRShift;
299            }
300        }
301
302        let token = if self.eat(b'=') {
303            match op {
304                BinOpToken::Lt => Token::BinOp(BinOpToken::LtEq),
305                BinOpToken::Gt => Token::BinOp(BinOpToken::GtEq),
306                BinOpToken::LShift => Token::AssignOp(AssignOp::LShiftAssign),
307                BinOpToken::RShift => Token::AssignOp(AssignOp::RShiftAssign),
308                BinOpToken::ZeroFillRShift => Token::AssignOp(AssignOp::ZeroFillRShiftAssign),
309                _ => unreachable!(),
310            }
311        } else {
312            Token::BinOp(op)
313        };
314
315        // All conflict markers consist of the same character repeated seven times.
316        // If it is a <<<<<<< or >>>>>>> marker then it is also followed by a space.
317        // <<<<<<<
318        //   ^
319        // >>>>>>>
320        //    ^
321        if had_line_break_before_last
322            && match op {
323                BinOpToken::LShift if self.is_str("<<<<< ") => true,
324                BinOpToken::ZeroFillRShift if self.is_str(">>>> ") => true,
325                _ => false,
326            }
327        {
328            self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
329            self.skip_line_comment(5);
330            self.skip_space::<true>();
331            return self.read_token();
332        }
333
334        Ok(token)
335    }
336}