swc_ecma_lexer/lexer/
mod.rs1use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
4
5use swc_atoms::AtomStoreCell;
6use swc_common::{
7 comments::Comments,
8 input::{Input, StringInput},
9 BytePos, Span,
10};
11use swc_ecma_ast::{AssignOp, EsVersion};
12
13pub use self::state::{TokenContext, TokenContexts, TokenFlags, TokenType};
14use self::table::{ByteHandler, BYTE_HANDLERS};
15use crate::{
16 common::{
17 lexer::{char::CharExt, fixed_len_span, pos_span, LexResult, Lexer as LexerTrait},
18 syntax::{Syntax, SyntaxFlags},
19 },
20 error::{Error, SyntaxError},
21 lexer::comments_buffer::CommentsBuffer,
22 tok,
23 token::{BinOpToken, Token, TokenAndSpan},
24 Context,
25};
26
27mod comments_buffer;
28mod jsx;
29mod number;
30mod state;
31mod table;
32#[cfg(test)]
33mod tests;
34
35#[derive(Clone)]
36pub struct Lexer<'a> {
37 comments: Option<&'a dyn Comments>,
38 comments_buffer: Option<CommentsBuffer>,
40
41 pub ctx: Context,
42 input: StringInput<'a>,
43 start_pos: BytePos,
44
45 state: self::state::State,
46 pub(crate) syntax: SyntaxFlags,
47 pub(crate) target: EsVersion,
48
49 errors: Rc<RefCell<Vec<Error>>>,
50 module_errors: Rc<RefCell<Vec<Error>>>,
51
52 atoms: Rc<AtomStoreCell>,
53}
54
55impl FusedIterator for Lexer<'_> {}
56
57impl<'a> crate::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> {
58 type CommentsBuffer = CommentsBuffer;
59 type State = self::state::State;
60 type Token = self::Token;
61
62 #[inline(always)]
63 fn input(&self) -> &StringInput<'a> {
64 &self.input
65 }
66
67 #[inline(always)]
68 fn input_mut(&mut self) -> &mut StringInput<'a> {
69 &mut self.input
70 }
71
72 #[inline(always)]
73 fn push_error(&mut self, error: crate::error::Error) {
74 self.errors.borrow_mut().push(error);
75 }
76
77 #[inline(always)]
78 fn state(&self) -> &Self::State {
79 &self.state
80 }
81
82 #[inline(always)]
83 fn state_mut(&mut self) -> &mut Self::State {
84 &mut self.state
85 }
86
87 #[inline(always)]
88 fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
89 self.comments
90 }
91
92 #[inline(always)]
93 fn comments_buffer(&self) -> Option<&Self::CommentsBuffer> {
94 self.comments_buffer.as_ref()
95 }
96
97 #[inline(always)]
98 fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer> {
99 self.comments_buffer.as_mut()
100 }
101
102 #[inline(always)]
103 unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
104 self.input.slice(start, end)
105 }
106
107 #[inline(always)]
108 fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
109 self.input_mut().uncons_while(f)
110 }
111
112 #[inline(always)]
113 fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
114 self.atoms.atom(s)
115 }
116}
117
118impl<'a> Lexer<'a> {
119 pub fn new(
120 syntax: Syntax,
121 target: EsVersion,
122 input: StringInput<'a>,
123 comments: Option<&'a dyn Comments>,
124 ) -> Self {
125 let start_pos = input.last_pos();
126 let syntax_flags = syntax.into_flags();
127
128 Lexer {
129 comments,
130 comments_buffer: comments.is_some().then(CommentsBuffer::new),
131 ctx: Default::default(),
132 input,
133 start_pos,
134 state: self::state::State::new(syntax_flags, start_pos),
135 syntax: syntax_flags,
136 target,
137 errors: Default::default(),
138 module_errors: Default::default(),
139 atoms: Default::default(),
140 }
141 }
142
143 fn read_token(&mut self) -> LexResult<Token> {
145 let byte = match self.input.as_str().as_bytes().first() {
146 Some(&v) => v,
147 None => return Ok(Token::Eof),
148 };
149
150 let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
151
152 match handler {
153 Some(handler) => handler(self),
154 None => {
155 let start = self.cur_pos();
156 self.input.bump_bytes(1);
157 self.error_span(
158 pos_span(start),
159 SyntaxError::UnexpectedChar { c: byte as _ },
160 )
161 }
162 }
163 }
164
165 fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Token> {
166 let start = self.cur_pos();
167
168 unsafe {
169 self.input.bump();
171 }
172
173 Ok(if self.input.cur() == Some(C as char) {
175 unsafe {
176 self.input.bump();
178 }
179
180 if self.state.had_line_break && C == b'-' && self.eat(b'>') {
182 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
183 self.skip_line_comment(0);
184 self.skip_space::<true>();
185 return self.read_token();
186 }
187
188 if C == b'+' {
189 Token::PlusPlus
190 } else {
191 Token::MinusMinus
192 }
193 } else if self.input.eat_byte(b'=') {
194 Token::AssignOp(if C == b'+' {
195 AssignOp::AddAssign
196 } else {
197 AssignOp::SubAssign
198 })
199 } else {
200 Token::BinOp(if C == b'+' {
201 BinOpToken::Add
202 } else {
203 BinOpToken::Sub
204 })
205 })
206 }
207
208 fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Token> {
209 let start = self.cur_pos();
210 let had_line_break_before_last = self.had_line_break_before_last();
211
212 unsafe {
213 self.input.bump();
215 }
216
217 Ok(if self.input.eat_byte(b'=') {
218 if self.input.eat_byte(b'=') {
221 if C == b'!' {
222 Token::BinOp(BinOpToken::NotEqEq)
223 } else {
224 if had_line_break_before_last && self.is_str("====") {
227 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
228 self.skip_line_comment(4);
229 self.skip_space::<true>();
230 return self.read_token();
231 }
232
233 Token::BinOp(BinOpToken::EqEqEq)
234 }
235 } else if C == b'!' {
236 Token::BinOp(BinOpToken::NotEq)
237 } else {
238 Token::BinOp(BinOpToken::EqEq)
239 }
240 } else if C == b'=' && self.input.eat_byte(b'>') {
241 Token::Arrow
244 } else if C == b'!' {
245 Token::Bang
246 } else {
247 Token::AssignOp(AssignOp::Assign)
248 })
249 }
250}
251
252impl Lexer<'_> {
253 #[inline(never)]
254 fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Token> {
255 let had_line_break_before_last = self.had_line_break_before_last();
256 let start = self.cur_pos();
257 self.bump();
258
259 if self.syntax.typescript()
260 && self.ctx.contains(Context::InType)
261 && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
262 {
263 if C == b'<' {
264 return Ok(tok!('<'));
265 } else if C == b'>' {
266 return Ok(tok!('>'));
267 }
268 }
269
270 if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
272 {
273 self.skip_line_comment(3);
274 self.skip_space::<true>();
275 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
276
277 return self.read_token();
278 }
279
280 let mut op = if C == b'<' {
281 BinOpToken::Lt
282 } else {
283 BinOpToken::Gt
284 };
285
286 if self.cur() == Some(C as char) {
288 self.bump();
289 op = if C == b'<' {
290 BinOpToken::LShift
291 } else {
292 BinOpToken::RShift
293 };
294
295 if C == b'>' && self.cur() == Some(C as char) {
297 self.bump();
298 op = BinOpToken::ZeroFillRShift;
299 }
300 }
301
302 let token = if self.eat(b'=') {
303 match op {
304 BinOpToken::Lt => Token::BinOp(BinOpToken::LtEq),
305 BinOpToken::Gt => Token::BinOp(BinOpToken::GtEq),
306 BinOpToken::LShift => Token::AssignOp(AssignOp::LShiftAssign),
307 BinOpToken::RShift => Token::AssignOp(AssignOp::RShiftAssign),
308 BinOpToken::ZeroFillRShift => Token::AssignOp(AssignOp::ZeroFillRShiftAssign),
309 _ => unreachable!(),
310 }
311 } else {
312 Token::BinOp(op)
313 };
314
315 if had_line_break_before_last
322 && match op {
323 BinOpToken::LShift if self.is_str("<<<<< ") => true,
324 BinOpToken::ZeroFillRShift if self.is_str(">>>> ") => true,
325 _ => false,
326 }
327 {
328 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
329 self.skip_line_comment(5);
330 self.skip_space::<true>();
331 return self.read_token();
332 }
333
334 Ok(token)
335 }
336}