1use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
4
5use swc_atoms::AtomStoreCell;
6use swc_common::{
7 comments::Comments,
8 input::{Input, StringInput},
9 BytePos,
10};
11use swc_ecma_ast::EsVersion;
12use swc_ecma_lexer::{
13 common::{
14 lexer::{
15 char::CharExt, comments_buffer::CommentsBuffer, fixed_len_span, pos_span, LexResult,
16 Lexer as LexerTrait,
17 },
18 syntax::SyntaxFlags,
19 },
20 lexer::TokenFlags,
21};
22
23use self::table::{ByteHandler, BYTE_HANDLERS};
24use crate::{
25 error::{Error, SyntaxError},
26 input::Tokens,
27 Context, Syntax,
28};
29
30#[cfg(feature = "unstable")]
31pub(crate) mod capturing;
32mod state;
33mod table;
34pub(crate) mod token;
35
36pub(crate) use token::{NextTokenAndSpan, Token, TokenAndSpan, TokenValue};
37
38#[derive(Clone)]
39pub struct Lexer<'a> {
40 comments: Option<&'a dyn Comments>,
41 comments_buffer: Option<CommentsBuffer>,
43
44 pub ctx: Context,
45 input: StringInput<'a>,
46 start_pos: BytePos,
47
48 state: self::state::State,
49 token_flags: TokenFlags,
50 pub(crate) syntax: SyntaxFlags,
51 pub(crate) target: EsVersion,
52
53 errors: Rc<RefCell<Vec<Error>>>,
54 module_errors: Rc<RefCell<Vec<Error>>>,
55
56 atoms: Rc<AtomStoreCell>,
57}
58
59impl FusedIterator for Lexer<'_> {}
60
61impl<'a> swc_ecma_lexer::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> {
62 type State = self::state::State;
63 type Token = self::Token;
64
65 #[inline(always)]
66 fn input(&self) -> &StringInput<'a> {
67 &self.input
68 }
69
70 #[inline(always)]
71 fn input_mut(&mut self) -> &mut StringInput<'a> {
72 &mut self.input
73 }
74
75 #[inline(always)]
76 fn push_error(&self, error: Error) {
77 self.errors.borrow_mut().push(error);
78 }
79
80 #[inline(always)]
81 fn state(&self) -> &Self::State {
82 &self.state
83 }
84
85 #[inline(always)]
86 fn state_mut(&mut self) -> &mut Self::State {
87 &mut self.state
88 }
89
90 #[inline(always)]
91 fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
92 self.comments
93 }
94
95 #[inline(always)]
96 fn comments_buffer(
97 &self,
98 ) -> Option<&swc_ecma_lexer::common::lexer::comments_buffer::CommentsBuffer> {
99 self.comments_buffer.as_ref()
100 }
101
102 #[inline(always)]
103 fn comments_buffer_mut(
104 &mut self,
105 ) -> Option<&mut swc_ecma_lexer::common::lexer::comments_buffer::CommentsBuffer> {
106 self.comments_buffer.as_mut()
107 }
108
109 #[inline(always)]
110 unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
111 self.input.slice(start, end)
112 }
113
114 #[inline(always)]
115 fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
116 self.input_mut().uncons_while(f)
117 }
118
119 #[inline(always)]
120 fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
121 self.atoms.atom(s)
122 }
123}
124
125impl<'a> Lexer<'a> {
126 pub fn new(
127 syntax: Syntax,
128 target: EsVersion,
129 input: StringInput<'a>,
130 comments: Option<&'a dyn Comments>,
131 ) -> Self {
132 let start_pos = input.last_pos();
133
134 Lexer {
135 comments,
136 comments_buffer: comments.is_some().then(CommentsBuffer::new),
137 ctx: Default::default(),
138 input,
139 start_pos,
140 state: self::state::State::new(start_pos),
141 syntax: syntax.into_flags(),
142 target,
143 errors: Default::default(),
144 module_errors: Default::default(),
145 atoms: Default::default(),
146 token_flags: TokenFlags::empty(),
147 }
148 }
149
150 fn read_token(&mut self) -> LexResult<Option<Token>> {
152 self.token_flags = TokenFlags::empty();
153 let byte = match self.input.as_str().as_bytes().first() {
154 Some(&v) => v,
155 None => return Ok(None),
156 };
157
158 let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
159
160 match handler {
161 Some(handler) => handler(self),
162 None => {
163 let start = self.cur_pos();
164 self.input.bump_bytes(1);
165 self.error_span(
166 pos_span(start),
167 SyntaxError::UnexpectedChar { c: byte as _ },
168 )
169 }
170 }
171 }
172
173 fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Option<Token>> {
174 let start = self.cur_pos();
175
176 unsafe {
177 self.input.bump();
179 }
180
181 Ok(Some(if self.input.cur() == Some(C as char) {
183 unsafe {
184 self.input.bump();
186 }
187
188 if self.state.had_line_break && C == b'-' && self.eat(b'>') {
190 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
191 self.skip_line_comment(0);
192 self.skip_space::<true>();
193 return self.read_token();
194 }
195
196 if C == b'+' {
197 Token::PlusPlus
198 } else {
199 Token::MinusMinus
200 }
201 } else if self.input.eat_byte(b'=') {
202 if C == b'+' {
203 Token::PlusEq
204 } else {
205 Token::MinusEq
206 }
207 } else if C == b'+' {
208 Token::Plus
209 } else {
210 Token::Minus
211 }))
212 }
213
214 fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Option<Token>> {
215 let start = self.cur_pos();
216 let had_line_break_before_last = self.had_line_break_before_last();
217
218 unsafe {
219 self.input.bump();
221 }
222
223 Ok(Some(if self.input.eat_byte(b'=') {
224 if self.input.eat_byte(b'=') {
227 if C == b'!' {
228 Token::NotEqEq
229 } else {
230 if had_line_break_before_last && self.is_str("====") {
233 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
234 self.skip_line_comment(4);
235 self.skip_space::<true>();
236 return self.read_token();
237 }
238
239 Token::EqEqEq
240 }
241 } else if C == b'!' {
242 Token::NotEq
243 } else {
244 Token::EqEq
245 }
246 } else if C == b'=' && self.input.eat_byte(b'>') {
247 Token::Arrow
250 } else if C == b'!' {
251 Token::Bang
252 } else {
253 Token::Eq
254 }))
255 }
256}
257
258impl Lexer<'_> {
259 #[inline(never)]
260 fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Option<Token>> {
261 let had_line_break_before_last = self.had_line_break_before_last();
262 let start = self.cur_pos();
263 self.bump();
264
265 if self.syntax.typescript()
266 && self.ctx.contains(Context::InType)
267 && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
268 {
269 if C == b'<' {
270 return Ok(Some(Token::Lt));
271 } else if C == b'>' {
272 return Ok(Some(Token::Gt));
273 }
274 }
275
276 if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
278 {
279 self.skip_line_comment(3);
280 self.skip_space::<true>();
281 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
282
283 return self.read_token();
284 }
285
286 let mut op = if C == b'<' { Token::Lt } else { Token::Gt };
287
288 if self.cur() == Some(C as char) {
290 self.bump();
291 op = if C == b'<' {
292 Token::LShift
293 } else {
294 Token::RShift
295 };
296
297 if C == b'>' && self.cur() == Some(C as char) {
299 self.bump();
300 op = Token::ZeroFillRShift;
301 }
302 }
303
304 let token = if self.eat(b'=') {
305 match op {
306 Token::Lt => Token::LtEq,
307 Token::Gt => Token::GtEq,
308 Token::LShift => Token::LShiftEq,
309 Token::RShift => Token::RShiftEq,
310 Token::ZeroFillRShift => Token::ZeroFillRShiftEq,
311 _ => unreachable!(),
312 }
313 } else {
314 op
315 };
316
317 if had_line_break_before_last
324 && match op {
325 Token::LShift if self.is_str("<<<<< ") => true,
326 Token::ZeroFillRShift if self.is_str(">>>> ") => true,
327 _ => false,
328 }
329 {
330 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
331 self.skip_line_comment(5);
332 self.skip_space::<true>();
333 return self.read_token();
334 }
335
336 Ok(Some(token))
337 }
338
339 fn read_token_back_quote(&mut self) -> LexResult<Option<Token>> {
340 let start = self.cur_pos();
341 self.scan_template_token(start, true).map(Some)
342 }
343
344 fn scan_template_token(
345 &mut self,
346 start: BytePos,
347 started_with_backtick: bool,
348 ) -> LexResult<Token> {
349 debug_assert!(self.cur() == Some(if started_with_backtick { '`' } else { '}' }));
350 let mut cooked = Ok(String::with_capacity(8));
351 self.bump(); let mut cooked_slice_start = self.cur_pos();
353 let raw_slice_start = cooked_slice_start;
354 let raw_atom = |this: &mut Self| {
355 let last_pos = this.cur_pos();
356 let s = unsafe { this.input.slice(raw_slice_start, last_pos) };
357 this.atoms.atom(s)
358 };
359 macro_rules! consume_cooked {
360 () => {{
361 if let Ok(cooked) = &mut cooked {
362 let last_pos = self.cur_pos();
363 cooked.push_str(unsafe {
364 self.input.slice(cooked_slice_start, last_pos)
367 });
368 }
369 }};
370 }
371
372 while let Some(c) = self.cur() {
373 if c == '`' {
374 consume_cooked!();
375 let cooked = cooked.map(|cooked| self.atoms.atom(cooked));
376 let raw = raw_atom(self);
377 self.bump();
378 return Ok(if started_with_backtick {
379 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
380 Token::NoSubstitutionTemplateLiteral
381 } else {
382 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
383 Token::TemplateTail
384 });
385 } else if c == '$' && self.input.peek() == Some('{') {
386 consume_cooked!();
387 let cooked = cooked.map(|cooked| self.atoms.atom(cooked));
388 let raw = raw_atom(self);
389 self.input.bump_bytes(2);
390 return Ok(if started_with_backtick {
391 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
392 Token::TemplateHead
393 } else {
394 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
395 Token::TemplateMiddle
396 });
397 } else if c == '\\' {
398 consume_cooked!();
399
400 match self.read_escaped_char(true) {
401 Ok(Some(chars)) => {
402 if let Ok(ref mut cooked) = cooked {
403 for c in chars {
404 cooked.extend(c);
405 }
406 }
407 }
408 Ok(None) => {}
409 Err(error) => {
410 cooked = Err(error);
411 }
412 }
413
414 cooked_slice_start = self.cur_pos();
415 } else if c.is_line_terminator() {
416 consume_cooked!();
417
418 let c = if c == '\r' && self.peek() == Some('\n') {
419 self.bump(); '\n'
421 } else {
422 match c {
423 '\n' => '\n',
424 '\r' => '\n',
425 '\u{2028}' => '\u{2028}',
426 '\u{2029}' => '\u{2029}',
427 _ => unreachable!(),
428 }
429 };
430
431 self.bump();
432
433 if let Ok(ref mut cooked) = cooked {
434 cooked.push(c);
435 }
436 cooked_slice_start = self.cur_pos();
437 } else {
438 self.bump();
439 }
440 }
441
442 self.error(start, SyntaxError::UnterminatedTpl)?
443 }
444}