1use std::mem::take;
2
3use swc_atoms::wtf8::CodePoint;
4use swc_common::BytePos;
5use swc_ecma_ast::EsVersion;
6
7use super::{Context, Input, Lexer};
8use crate::{
9 error::{Error, SyntaxError},
10 input::Tokens,
11 lexer::{
12 char_ext::CharExt,
13 comments_buffer::{BufferedCommentKind, CommentsBufferCheckpoint},
14 token::{Token, TokenAndSpan, TokenValue},
15 LexResult,
16 },
17 syntax::SyntaxFlags,
18};
19
20bitflags::bitflags! {
21 #[derive(Debug, Default, Clone, Copy)]
22 pub struct TokenFlags: u8 {
23 const UNICODE = 1 << 0;
24 }
25}
26
27#[derive(Clone)]
31pub struct State {
32 pub had_line_break: bool,
34 is_first: bool,
36 pub next_regexp: Option<BytePos>,
37 pub start: BytePos,
38 pub prev_hi: BytePos,
39
40 pub(super) token_value: Option<TokenValue>,
41 token_type: Option<Token>,
42}
43
44pub struct LexerCheckpoint {
45 comments_buffer: CommentsBufferCheckpoint,
46 state: State,
47 ctx: Context,
48 input_last_pos: BytePos,
49}
50
51impl crate::input::Tokens for Lexer<'_> {
52 type Checkpoint = LexerCheckpoint;
53
54 fn checkpoint_save(&self) -> LexerCheckpoint {
55 LexerCheckpoint {
56 state: self.state.clone(),
57 ctx: self.ctx,
58 input_last_pos: self.input.last_pos(),
59 comments_buffer: self
60 .comments_buffer
61 .as_ref()
62 .map(|cb| cb.checkpoint_save())
63 .unwrap_or_default(),
64 }
65 }
66
67 fn checkpoint_load(&mut self, checkpoint: LexerCheckpoint) {
68 self.state = checkpoint.state;
69 self.ctx = checkpoint.ctx;
70 unsafe { self.input.reset_to(checkpoint.input_last_pos) };
71 if let Some(comments_buffer) = self.comments_buffer.as_mut() {
72 comments_buffer.checkpoint_load(checkpoint.comments_buffer);
73 }
74 }
75
76 #[inline]
77 fn set_ctx(&mut self, ctx: Context) {
78 if ctx.contains(Context::Module) && !self.module_errors.is_empty() {
79 self.errors.append(&mut self.module_errors);
80 }
81 self.ctx = ctx
82 }
83
84 #[inline]
85 fn ctx(&self) -> Context {
86 self.ctx
87 }
88
89 #[inline]
90 fn ctx_mut(&mut self) -> &mut Context {
91 &mut self.ctx
92 }
93
94 #[inline]
95 fn syntax(&self) -> SyntaxFlags {
96 self.syntax
97 }
98
99 #[inline]
100 fn target(&self) -> EsVersion {
101 self.target
102 }
103
104 #[inline]
105 fn start_pos(&self) -> BytePos {
106 self.start_pos
107 }
108
109 #[inline]
110 fn set_expr_allowed(&mut self, _: bool) {}
111
112 #[inline]
113 fn set_next_regexp(&mut self, start: Option<BytePos>) {
114 self.state.next_regexp = start;
115 }
116
117 fn add_error(&mut self, error: Error) {
118 self.errors.push(error);
119 }
120
121 fn add_module_mode_error(&mut self, error: Error) {
122 if self.ctx.contains(Context::Module) {
123 self.add_error(error);
124 return;
125 }
126 self.module_errors.push(error);
127 }
128
129 #[inline]
130 fn take_errors(&mut self) -> Vec<Error> {
131 take(&mut self.errors)
132 }
133
134 #[inline]
135 fn take_script_module_errors(&mut self) -> Vec<Error> {
136 take(&mut self.module_errors)
137 }
138
139 #[inline]
140 fn end_pos(&self) -> BytePos {
141 self.input.end_pos()
142 }
143
144 #[inline]
145 fn update_token_flags(&mut self, f: impl FnOnce(&mut TokenFlags)) {
146 f(&mut self.token_flags)
147 }
148
149 #[inline]
150 fn token_flags(&self) -> TokenFlags {
151 self.token_flags
152 }
153
154 fn clone_token_value(&self) -> Option<TokenValue> {
155 self.state.token_value.clone()
156 }
157
158 fn get_token_value(&self) -> Option<&TokenValue> {
159 self.state.token_value.as_ref()
160 }
161
162 fn set_token_value(&mut self, token_value: Option<TokenValue>) {
163 self.state.token_value = token_value;
164 }
165
166 fn take_token_value(&mut self) -> Option<TokenValue> {
167 self.state.token_value.take()
168 }
169
170 fn rescan_jsx_token(&mut self, allow_multiline_jsx_text: bool, reset: BytePos) -> TokenAndSpan {
171 unsafe {
172 self.input.reset_to(reset);
173 }
174 Tokens::scan_jsx_token(self, allow_multiline_jsx_text)
175 }
176
177 fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> TokenAndSpan {
178 unsafe {
179 self.input.reset_to(reset);
180 }
181 Tokens::scan_jsx_open_el_terminal_token(self)
182 }
183
184 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> TokenAndSpan {
185 let start = self.cur_pos();
186 let res = match self.scan_jsx_token(allow_multiline_jsx_text) {
187 Ok(res) => Ok(res),
188 Err(error) => {
189 self.state.set_token_value(TokenValue::Error(error));
190 Err(Token::Error)
191 }
192 };
193 let token = match res {
194 Ok(t) => t,
195 Err(e) => e,
196 };
197 let span = self.span(start);
198 if token != Token::Eof {
199 if let Some(comments) = self.comments_buffer.as_mut() {
200 comments.pending_to_comment(BufferedCommentKind::Leading, start);
201 }
202
203 self.state.set_token_type(token);
204 self.state.prev_hi = self.last_pos();
205 }
206 TokenAndSpan {
208 token,
209 had_line_break: self.had_line_break_before_last(),
210 span,
211 }
212 }
213
214 fn scan_jsx_open_el_terminal_token(&mut self) -> TokenAndSpan {
215 self.skip_space::<true>();
216 let start = self.input.cur_pos();
217 let res = match self.scan_jsx_attrs_terminal_token() {
218 Ok(res) => Ok(res),
219 Err(error) => {
220 self.state.set_token_value(TokenValue::Error(error));
221 Err(Token::Error)
222 }
223 };
224 let token = match res {
225 Ok(t) => t,
226 Err(e) => e,
227 };
228 let span = self.span(start);
229 if token != Token::Eof {
230 if let Some(comments) = self.comments_buffer.as_mut() {
231 comments.pending_to_comment(BufferedCommentKind::Leading, start);
232 }
233
234 self.state.set_token_type(token);
235 self.state.prev_hi = self.last_pos();
236 }
237 TokenAndSpan {
239 token,
240 had_line_break: self.had_line_break_before_last(),
241 span,
242 }
243 }
244
245 fn scan_jsx_identifier(&mut self, start: BytePos) -> TokenAndSpan {
246 let token = self.state.token_type.unwrap();
247 debug_assert!(token.is_word());
248 let mut v = String::with_capacity(16);
249 while let Some(ch) = self.input().cur() {
250 if ch == '-' {
251 v.push(ch);
252 self.bump();
253 } else {
254 let old_pos = self.cur_pos();
255 v.push_str(&self.scan_identifier_parts());
256 if self.cur_pos() == old_pos {
257 break;
258 }
259 }
260 }
261 let v = if !v.is_empty() {
262 let v = if token.is_known_ident() || token.is_keyword() {
263 format!("{}{}", token.to_string(None), v)
264 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
265 format!("{value}{v}")
266 } else {
267 format!("{}{}", token.to_string(None), v)
268 };
269 self.atom(v)
270 } else if token.is_known_ident() || token.is_keyword() {
271 self.atom(token.to_string(None))
272 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
273 value
274 } else {
275 unreachable!(
276 "`token_value` should be a word, but got: {:?}",
277 self.state.token_value
278 )
279 };
280 self.state.set_token_value(TokenValue::Word(v));
281 TokenAndSpan {
282 token: Token::JSXName,
283 had_line_break: self.had_line_break_before_last(),
284 span: self.span(start),
285 }
286 }
287
288 fn scan_jsx_attribute_value(&mut self) -> TokenAndSpan {
289 let Some(cur) = self.cur() else {
290 let start = self.cur_pos();
291 return TokenAndSpan {
292 token: Token::Eof,
293 had_line_break: self.had_line_break_before_last(),
294 span: self.span(start),
295 };
296 };
297 let start = self.cur_pos();
298
299 match cur {
300 '\'' | '"' => {
301 let token = self.read_jsx_str(cur);
302 let token = match token {
303 Ok(token) => token,
304 Err(e) => {
305 self.state.set_token_value(TokenValue::Error(e));
306 return TokenAndSpan {
307 token: Token::Error,
308 had_line_break: self.had_line_break_before_last(),
309 span: self.span(start),
310 };
311 }
312 };
313 debug_assert!(self
314 .get_token_value()
315 .is_some_and(|t| matches!(t, TokenValue::Str { .. })));
316 debug_assert!(token == Token::Str);
317 TokenAndSpan {
318 token,
319 had_line_break: self.had_line_break_before_last(),
320 span: self.span(start),
321 }
322 }
323 _ => self.next().unwrap_or_else(|| TokenAndSpan {
324 token: Token::Eof,
325 had_line_break: self.had_line_break_before_last(),
326 span: self.span(start),
327 }),
328 }
329 }
330
331 fn rescan_template_token(
332 &mut self,
333 start: BytePos,
334 start_with_back_tick: bool,
335 ) -> TokenAndSpan {
336 unsafe { self.input.reset_to(start) };
337 let res = self.scan_template_token(start, start_with_back_tick);
338 let token = match res.map_err(|e| {
339 self.state.set_token_value(TokenValue::Error(e));
340 Token::Error
341 }) {
342 Ok(t) => t,
343 Err(e) => e,
344 };
345 let span = if start_with_back_tick {
346 self.span(start)
347 } else {
348 self.span(start + BytePos(1))
350 };
351
352 if token != Token::Eof {
353 if let Some(comments) = self.comments_buffer.as_mut() {
354 comments.pending_to_comment(BufferedCommentKind::Leading, start);
355 }
356
357 self.state.set_token_type(token);
358 self.state.prev_hi = self.last_pos();
359 }
360 TokenAndSpan {
362 token,
363 had_line_break: self.had_line_break_before_last(),
364 span,
365 }
366 }
367}
368
369impl Lexer<'_> {
370 fn next_token(&mut self, start: &mut BytePos) -> Result<Token, Error> {
371 if let Some(next_regexp) = self.state.next_regexp {
372 *start = next_regexp;
373 return self.read_regexp(next_regexp);
374 }
375
376 if self.state.is_first {
377 if let Some(shebang) = self.read_shebang()? {
378 self.state.set_token_value(TokenValue::Word(shebang));
379 return Ok(Token::Shebang);
380 }
381 }
382
383 self.state.had_line_break = self.state.is_first;
384 self.state.is_first = false;
385
386 self.skip_space::<true>();
387 *start = self.input.cur_pos();
388
389 if self.input.last_pos() == self.input.end_pos() {
390 self.consume_pending_comments();
392 return Ok(Token::Eof);
393 }
394
395 self.state.start = *start;
402
403 self.read_token()
404 }
405
406 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> Result<Token, Error> {
407 debug_assert!(self.syntax.jsx());
408
409 if self.input_mut().as_str().is_empty() {
410 return Ok(Token::Eof);
411 };
412
413 if self.input.eat_byte(b'<') {
414 return Ok(if self.input.eat_byte(b'/') {
415 Token::LessSlash
416 } else {
417 Token::Lt
418 });
419 } else if self.input.eat_byte(b'{') {
420 return Ok(Token::LBrace);
421 }
422
423 let start = self.input.cur_pos();
424 let mut first_non_whitespace = 0;
425 let mut chunk_start = start;
426 let mut value = String::new();
427
428 while let Some(ch) = self.input_mut().cur() {
429 if ch == '{' {
430 break;
431 } else if ch == '<' {
432 break;
434 }
435
436 if ch == '>' {
437 self.emit_error(
438 self.input().cur_pos(),
439 SyntaxError::UnexpectedTokenWithSuggestions {
440 candidate_list: vec!["`{'>'}`", "`>`"],
441 },
442 );
443 } else if ch == '}' {
444 self.emit_error(
445 self.input().cur_pos(),
446 SyntaxError::UnexpectedTokenWithSuggestions {
447 candidate_list: vec!["`{'}'}`", "`}`"],
448 },
449 );
450 }
451
452 if first_non_whitespace == 0 && ch.is_line_terminator() {
453 first_non_whitespace = -1;
454 } else if !allow_multiline_jsx_text
455 && ch.is_line_terminator()
456 && first_non_whitespace > 0
457 {
458 break;
459 } else if ch.is_whitespace() {
460 first_non_whitespace = self.cur_pos().0 as i32;
461 }
462
463 if ch == '&' {
464 let s = unsafe {
465 self.input_slice_to_cur(chunk_start)
467 };
468 value.push_str(s);
469
470 if let Ok(jsx_entity) = self.read_jsx_entity() {
471 value.push(jsx_entity.0);
472
473 chunk_start = self.input.cur_pos();
474 }
475 } else {
476 self.bump();
477 }
478 }
479
480 let raw = unsafe {
481 self.input_slice_to_cur(start)
483 };
484 let value = if value.is_empty() {
485 self.atom(raw)
486 } else {
487 let s = unsafe {
488 self.input_slice_to_cur(chunk_start)
490 };
491 value.push_str(s);
492 self.atom(value)
493 };
494
495 let raw: swc_atoms::Atom = self.atom(raw);
496
497 self.state.set_token_value(TokenValue::Str {
498 raw,
499 value: value.into(),
500 });
501
502 self.state.start = start;
503
504 Ok(Token::JSXText)
505 }
506
507 fn scan_jsx_attrs_terminal_token(&mut self) -> LexResult<Token> {
508 if self.input_mut().as_str().is_empty() {
509 Ok(Token::Eof)
510 } else if self.input.eat_byte(b'>') {
511 Ok(Token::Gt)
512 } else if self.input.eat_byte(b'/') {
513 Ok(Token::Slash)
514 } else {
515 self.read_token()
516 }
517 }
518
519 fn scan_identifier_parts(&mut self) -> String {
520 let mut v = String::with_capacity(16);
521 while let Some(ch) = self.input().cur() {
522 if ch.is_ident_part() {
523 v.push(ch);
524 self.input_mut().bump_bytes(ch.len_utf8());
525 } else if ch == '\\' {
526 self.bump(); if !self.is(b'u') {
528 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
529 continue;
530 }
531 self.bump(); let Ok(value) = self.read_unicode_escape() else {
533 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
534 break;
535 };
536 if let Some(c) = CodePoint::from(value).to_char() {
537 v.push(c);
538 } else {
539 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
540 }
541 self.token_flags |= TokenFlags::UNICODE;
542 } else {
543 break;
544 }
545 }
546 v
547 }
548}
549
550impl Iterator for Lexer<'_> {
551 type Item = TokenAndSpan;
552
553 fn next(&mut self) -> Option<Self::Item> {
554 let mut start = self.cur_pos();
555
556 let token = match self.next_token(&mut start) {
557 Ok(res) => res,
558 Err(error) => {
559 self.state.set_token_value(TokenValue::Error(error));
560 Token::Error
561 }
562 };
563
564 let span = self.span(start);
565 if token != Token::Eof {
566 if let Some(comments) = self.comments_buffer.as_mut() {
567 comments.pending_to_comment(BufferedCommentKind::Leading, start);
568 }
569
570 self.state.set_token_type(token);
571 self.state.prev_hi = self.last_pos();
572 Some(TokenAndSpan {
574 token,
575 had_line_break: self.had_line_break_before_last(),
576 span,
577 })
578 } else {
579 None
580 }
581 }
582}
583
584impl State {
585 pub fn new(start_pos: BytePos) -> Self {
586 State {
587 had_line_break: false,
588 is_first: true,
589 next_regexp: None,
590 start: BytePos(0),
591 prev_hi: start_pos,
592 token_value: None,
593 token_type: None,
594 }
595 }
596
597 pub(crate) fn set_token_value(&mut self, token_value: TokenValue) {
598 self.token_value = Some(token_value);
599 }
600}
601
602impl State {
603 #[inline(always)]
604 pub fn had_line_break(&self) -> bool {
605 self.had_line_break
606 }
607
608 #[inline(always)]
609 pub fn mark_had_line_break(&mut self) {
610 self.had_line_break = true;
611 }
612
613 #[inline(always)]
614 pub fn set_token_type(&mut self, token_type: Token) {
615 self.token_type = Some(token_type);
616 }
617
618 #[inline(always)]
619 pub fn token_type(&self) -> Option<Token> {
620 self.token_type
621 }
622
623 #[inline(always)]
624 pub fn prev_hi(&self) -> BytePos {
625 self.prev_hi
626 }
627
628 #[inline(always)]
629 pub fn start(&self) -> BytePos {
630 self.start
631 }
632
633 pub fn can_have_trailing_line_comment(&self) -> bool {
634 let Some(t) = self.token_type() else {
635 return true;
636 };
637 !t.is_bin_op()
638 }
639
640 pub fn can_have_trailing_comment(&self) -> bool {
641 self.token_type().is_some_and(|t| {
642 !t.is_keyword()
643 && (t == Token::Semi
644 || t == Token::LBrace
645 || t.is_other_and_can_have_trailing_comment())
646 })
647 }
648}