1use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
4
5use swc_atoms::{wtf8::Wtf8, AtomStoreCell};
6use swc_common::{
7 comments::Comments,
8 input::{Input, StringInput},
9 BytePos, Span,
10};
11use swc_ecma_ast::{AssignOp, EsVersion};
12
13pub use self::state::{TokenContext, TokenContexts, TokenFlags, TokenType};
14use self::table::{ByteHandler, BYTE_HANDLERS};
15use crate::{
16 common::{
17 lexer::{char::CharExt, fixed_len_span, pos_span, LexResult, Lexer as LexerTrait},
18 syntax::{Syntax, SyntaxFlags},
19 },
20 error::{Error, SyntaxError},
21 lexer::comments_buffer::CommentsBuffer,
22 tok,
23 token::{BinOpToken, Token, TokenAndSpan},
24 Context,
25};
26
27mod comments_buffer;
28mod jsx;
29mod number;
30mod state;
31mod table;
32#[cfg(test)]
33mod tests;
34
35#[derive(Clone)]
36pub struct Lexer<'a> {
37 comments: Option<&'a dyn Comments>,
38 comments_buffer: Option<CommentsBuffer>,
40
41 pub ctx: Context,
42 input: StringInput<'a>,
43 start_pos: BytePos,
44
45 state: self::state::State,
46 pub(crate) syntax: SyntaxFlags,
47 pub(crate) target: EsVersion,
48
49 errors: Rc<RefCell<Vec<Error>>>,
50 module_errors: Rc<RefCell<Vec<Error>>>,
51
52 atoms: Rc<AtomStoreCell>,
53}
54
55impl FusedIterator for Lexer<'_> {}
56
57impl<'a> crate::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> {
58 type CommentsBuffer = CommentsBuffer;
59 type State = self::state::State;
60 type Token = self::Token;
61
62 #[inline(always)]
63 fn input(&self) -> &StringInput<'a> {
64 &self.input
65 }
66
67 #[inline(always)]
68 fn input_mut(&mut self) -> &mut StringInput<'a> {
69 &mut self.input
70 }
71
72 #[inline(always)]
73 fn push_error(&mut self, error: crate::error::Error) {
74 self.errors.borrow_mut().push(error);
75 }
76
77 #[inline(always)]
78 fn state(&self) -> &Self::State {
79 &self.state
80 }
81
82 #[inline(always)]
83 fn state_mut(&mut self) -> &mut Self::State {
84 &mut self.state
85 }
86
87 #[inline(always)]
88 fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
89 self.comments
90 }
91
92 #[inline(always)]
93 fn comments_buffer(&self) -> Option<&Self::CommentsBuffer> {
94 self.comments_buffer.as_ref()
95 }
96
97 #[inline(always)]
98 fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer> {
99 self.comments_buffer.as_mut()
100 }
101
102 #[inline(always)]
103 unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
104 self.input.slice(start, end)
105 }
106
107 #[inline(always)]
108 fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
109 self.input_mut().uncons_while(f)
110 }
111
112 #[inline(always)]
113 fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
114 self.atoms.atom(s)
115 }
116
117 #[inline(always)]
118 fn wtf8_atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, Wtf8>>) -> swc_atoms::Wtf8Atom {
119 self.atoms.wtf8_atom(s)
120 }
121}
122
123impl<'a> Lexer<'a> {
124 pub fn new(
125 syntax: Syntax,
126 target: EsVersion,
127 input: StringInput<'a>,
128 comments: Option<&'a dyn Comments>,
129 ) -> Self {
130 let start_pos = input.last_pos();
131 let syntax_flags = syntax.into_flags();
132
133 Lexer {
134 comments,
135 comments_buffer: comments.is_some().then(CommentsBuffer::new),
136 ctx: Default::default(),
137 input,
138 start_pos,
139 state: self::state::State::new(syntax_flags, start_pos),
140 syntax: syntax_flags,
141 target,
142 errors: Default::default(),
143 module_errors: Default::default(),
144 atoms: Default::default(),
145 }
146 }
147
148 fn read_token(&mut self) -> LexResult<Token> {
150 let byte = match self.input.as_str().as_bytes().first() {
151 Some(&v) => v,
152 None => return Ok(Token::Eof),
153 };
154
155 let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
156
157 match handler {
158 Some(handler) => handler(self),
159 None => {
160 let start = self.cur_pos();
161 self.input.bump_bytes(1);
162 self.error_span(
163 pos_span(start),
164 SyntaxError::UnexpectedChar { c: byte as _ },
165 )
166 }
167 }
168 }
169
170 fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Token> {
171 let start = self.cur_pos();
172
173 unsafe {
174 self.input.bump();
176 }
177
178 Ok(if self.input.cur() == Some(C as char) {
180 unsafe {
181 self.input.bump();
183 }
184
185 if self.state.had_line_break && C == b'-' && self.eat(b'>') {
187 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
188 self.skip_line_comment(0);
189 self.skip_space::<true>();
190 return self.read_token();
191 }
192
193 if C == b'+' {
194 Token::PlusPlus
195 } else {
196 Token::MinusMinus
197 }
198 } else if self.input.eat_byte(b'=') {
199 Token::AssignOp(if C == b'+' {
200 AssignOp::AddAssign
201 } else {
202 AssignOp::SubAssign
203 })
204 } else {
205 Token::BinOp(if C == b'+' {
206 BinOpToken::Add
207 } else {
208 BinOpToken::Sub
209 })
210 })
211 }
212
213 fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Token> {
214 let start = self.cur_pos();
215 let had_line_break_before_last = self.had_line_break_before_last();
216
217 unsafe {
218 self.input.bump();
220 }
221
222 Ok(if self.input.eat_byte(b'=') {
223 if self.input.eat_byte(b'=') {
226 if C == b'!' {
227 Token::BinOp(BinOpToken::NotEqEq)
228 } else {
229 if had_line_break_before_last && self.is_str("====") {
232 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
233 self.skip_line_comment(4);
234 self.skip_space::<true>();
235 return self.read_token();
236 }
237
238 Token::BinOp(BinOpToken::EqEqEq)
239 }
240 } else if C == b'!' {
241 Token::BinOp(BinOpToken::NotEq)
242 } else {
243 Token::BinOp(BinOpToken::EqEq)
244 }
245 } else if C == b'=' && self.input.eat_byte(b'>') {
246 Token::Arrow
249 } else if C == b'!' {
250 Token::Bang
251 } else {
252 Token::AssignOp(AssignOp::Assign)
253 })
254 }
255}
256
257impl Lexer<'_> {
258 #[inline(never)]
259 fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Token> {
260 let had_line_break_before_last = self.had_line_break_before_last();
261 let start = self.cur_pos();
262 self.bump();
263
264 if self.syntax.typescript()
265 && self.ctx.contains(Context::InType)
266 && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
267 {
268 if C == b'<' {
269 return Ok(tok!('<'));
270 } else if C == b'>' {
271 return Ok(tok!('>'));
272 }
273 }
274
275 if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
277 {
278 self.skip_line_comment(3);
279 self.skip_space::<true>();
280 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
281
282 return self.read_token();
283 }
284
285 let mut op = if C == b'<' {
286 BinOpToken::Lt
287 } else {
288 BinOpToken::Gt
289 };
290
291 if self.cur() == Some(C as char) {
293 self.bump();
294 op = if C == b'<' {
295 BinOpToken::LShift
296 } else {
297 BinOpToken::RShift
298 };
299
300 if C == b'>' && self.cur() == Some(C as char) {
302 self.bump();
303 op = BinOpToken::ZeroFillRShift;
304 }
305 }
306
307 let token = if self.eat(b'=') {
308 match op {
309 BinOpToken::Lt => Token::BinOp(BinOpToken::LtEq),
310 BinOpToken::Gt => Token::BinOp(BinOpToken::GtEq),
311 BinOpToken::LShift => Token::AssignOp(AssignOp::LShiftAssign),
312 BinOpToken::RShift => Token::AssignOp(AssignOp::RShiftAssign),
313 BinOpToken::ZeroFillRShift => Token::AssignOp(AssignOp::ZeroFillRShiftAssign),
314 _ => unreachable!(),
315 }
316 } else {
317 Token::BinOp(op)
318 };
319
320 if had_line_break_before_last
327 && match op {
328 BinOpToken::LShift if self.is_str("<<<<< ") => true,
329 BinOpToken::ZeroFillRShift if self.is_str(">>>> ") => true,
330 _ => false,
331 }
332 {
333 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
334 self.skip_line_comment(5);
335 self.skip_space::<true>();
336 return self.read_token();
337 }
338
339 Ok(token)
340 }
341}