1use std::mem::take;
2
3use swc_common::BytePos;
4use swc_ecma_ast::EsVersion;
5use swc_ecma_lexer::{
6 common::{
7 lexer::{
8 char::CharExt,
9 comments_buffer::{BufferedCommentKind, CommentsBufferTrait},
10 state::State as StateTrait,
11 LexResult,
12 },
13 syntax::SyntaxFlags,
14 },
15 error::SyntaxError,
16 TokenContexts,
17};
18
19use super::{Context, Input, Lexer, LexerTrait};
20use crate::{
21 error::Error,
22 input::Tokens,
23 lexer::{
24 comments_buffer::CommentsBufferCheckpoint,
25 token::{Token, TokenAndSpan, TokenValue},
26 },
27};
28
29#[derive(Clone)]
33pub struct State {
34 pub had_line_break: bool,
36 pub had_line_break_before_last: bool,
38 is_first: bool,
40 pub next_regexp: Option<BytePos>,
41 pub start: BytePos,
42 pub prev_hi: BytePos,
43
44 pub(super) token_value: Option<TokenValue>,
45 token_type: Option<Token>,
46}
47
48pub struct LexerCheckpoint {
49 comments_buffer: CommentsBufferCheckpoint,
50 state: State,
51 ctx: Context,
52 input_last_pos: BytePos,
53}
54
55impl<'a> swc_ecma_lexer::common::input::Tokens<TokenAndSpan> for Lexer<'a> {
56 type Checkpoint = LexerCheckpoint;
57
58 fn checkpoint_save(&self) -> Self::Checkpoint {
59 Self::Checkpoint {
60 state: self.state.clone(),
61 ctx: self.ctx,
62 input_last_pos: self.input.last_pos(),
63 comments_buffer: self
64 .comments_buffer
65 .as_ref()
66 .map(|cb| cb.checkpoint_save())
67 .unwrap_or_default(),
68 }
69 }
70
71 fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
72 self.state = checkpoint.state;
73 self.ctx = checkpoint.ctx;
74 unsafe { self.input.reset_to(checkpoint.input_last_pos) };
75 if let Some(comments_buffer) = self.comments_buffer.as_mut() {
76 comments_buffer.checkpoint_load(checkpoint.comments_buffer);
77 }
78 }
79
80 #[inline]
81 fn set_ctx(&mut self, ctx: Context) {
82 if ctx.contains(Context::Module) && !self.module_errors.is_empty() {
83 self.errors.append(&mut self.module_errors);
84 }
85 self.ctx = ctx
86 }
87
88 #[inline]
89 fn ctx(&self) -> Context {
90 self.ctx
91 }
92
93 #[inline]
94 fn ctx_mut(&mut self) -> &mut Context {
95 &mut self.ctx
96 }
97
98 #[inline]
99 fn syntax(&self) -> SyntaxFlags {
100 self.syntax
101 }
102
103 #[inline]
104 fn target(&self) -> EsVersion {
105 self.target
106 }
107
108 #[inline]
109 fn start_pos(&self) -> BytePos {
110 self.start_pos
111 }
112
113 #[inline]
114 fn set_expr_allowed(&mut self, _: bool) {}
115
116 #[inline]
117 fn set_next_regexp(&mut self, start: Option<BytePos>) {
118 self.state.next_regexp = start;
119 }
120
121 #[inline]
122 fn token_context(&self) -> &TokenContexts {
123 unreachable!();
124 }
125
126 #[inline]
127 fn token_context_mut(&mut self) -> &mut TokenContexts {
128 unreachable!();
129 }
130
131 #[inline]
132 fn set_token_context(&mut self, _: TokenContexts) {
133 unreachable!();
134 }
135
136 fn add_error(&mut self, error: Error) {
137 self.errors.push(error);
138 }
139
140 fn add_module_mode_error(&mut self, error: Error) {
141 if self.ctx.contains(Context::Module) {
142 self.add_error(error);
143 return;
144 }
145 self.module_errors.push(error);
146 }
147
148 #[inline]
149 fn take_errors(&mut self) -> Vec<Error> {
150 take(&mut self.errors)
151 }
152
153 #[inline]
154 fn take_script_module_errors(&mut self) -> Vec<Error> {
155 take(&mut self.module_errors)
156 }
157
158 #[inline]
159 fn end_pos(&self) -> BytePos {
160 self.input.end_pos()
161 }
162
163 #[inline]
164 fn update_token_flags(&mut self, f: impl FnOnce(&mut swc_ecma_lexer::lexer::TokenFlags)) {
165 f(&mut self.token_flags)
166 }
167
168 #[inline]
169 fn token_flags(&self) -> swc_ecma_lexer::lexer::TokenFlags {
170 self.token_flags
171 }
172}
173
174impl crate::input::Tokens for Lexer<'_> {
175 fn clone_token_value(&self) -> Option<TokenValue> {
176 self.state.token_value.clone()
177 }
178
179 fn get_token_value(&self) -> Option<&TokenValue> {
180 self.state.token_value.as_ref()
181 }
182
183 fn set_token_value(&mut self, token_value: Option<TokenValue>) {
184 self.state.token_value = token_value;
185 }
186
187 fn take_token_value(&mut self) -> Option<TokenValue> {
188 self.state.token_value.take()
189 }
190
191 fn rescan_jsx_token(&mut self, allow_multiline_jsx_text: bool, reset: BytePos) -> TokenAndSpan {
192 unsafe {
193 self.input.reset_to(reset);
194 }
195 Tokens::scan_jsx_token(self, allow_multiline_jsx_text)
196 }
197
198 fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> TokenAndSpan {
199 unsafe {
200 self.input.reset_to(reset);
201 }
202 Tokens::scan_jsx_open_el_terminal_token(self)
203 }
204
205 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> TokenAndSpan {
206 let start = self.cur_pos();
207 let res = match self.scan_jsx_token(allow_multiline_jsx_text) {
208 Ok(res) => Ok(res),
209 Err(error) => {
210 self.state.set_token_value(TokenValue::Error(error));
211 Err(Token::Error)
212 }
213 };
214 let token = match res {
215 Ok(t) => t,
216 Err(e) => e,
217 };
218 let span = self.span(start);
219 if token != Token::Eof {
220 if let Some(comments) = self.comments_buffer.as_mut() {
221 comments.pending_to_comment(BufferedCommentKind::Leading, start);
222 }
223
224 self.state.set_token_type(token);
225 self.state.prev_hi = self.last_pos();
226 self.state.had_line_break_before_last = self.had_line_break_before_last();
227 }
228 TokenAndSpan {
230 token,
231 had_line_break: self.had_line_break_before_last(),
232 span,
233 }
234 }
235
236 fn scan_jsx_open_el_terminal_token(&mut self) -> TokenAndSpan {
237 self.skip_space::<true>();
238 let start = self.input.cur_pos();
239 let res = match self.scan_jsx_attrs_terminal_token() {
240 Ok(res) => Ok(res),
241 Err(error) => {
242 self.state.set_token_value(TokenValue::Error(error));
243 Err(Token::Error)
244 }
245 };
246 let token = match res {
247 Ok(t) => t,
248 Err(e) => e,
249 };
250 let span = self.span(start);
251 if token != Token::Eof {
252 if let Some(comments) = self.comments_buffer.as_mut() {
253 comments.pending_to_comment(BufferedCommentKind::Leading, start);
254 }
255
256 self.state.set_token_type(token);
257 self.state.prev_hi = self.last_pos();
258 self.state.had_line_break_before_last = self.had_line_break_before_last();
259 }
260 TokenAndSpan {
262 token,
263 had_line_break: self.had_line_break_before_last(),
264 span,
265 }
266 }
267
268 fn scan_jsx_identifier(&mut self, start: BytePos) -> TokenAndSpan {
269 let token = self.state.token_type.unwrap();
270 debug_assert!(token.is_word());
271 let mut v = String::with_capacity(16);
272 while let Some(ch) = self.input().cur() {
273 if ch == '-' {
274 v.push(ch);
275 self.bump();
276 } else {
277 let old_pos = self.cur_pos();
278 v.push_str(&self.scan_identifier_parts());
279 if self.cur_pos() == old_pos {
280 break;
281 }
282 }
283 }
284 let v = if !v.is_empty() {
285 let v = if token.is_known_ident() {
286 format!("{}{}", token.to_string(None), v)
287 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
288 format!("{value}{v}")
289 } else {
290 format!("{}{}", token.to_string(None), v)
291 };
292 self.atom(v)
293 } else if token.is_known_ident() || token.is_keyword() {
294 self.atom(token.to_string(None))
295 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
296 value
297 } else {
298 unreachable!(
299 "`token_value` should be a word, but got: {:?}",
300 self.state.token_value
301 )
302 };
303 self.state.set_token_value(TokenValue::Word(v));
304 TokenAndSpan {
305 token: Token::JSXName,
306 had_line_break: self.had_line_break_before_last(),
307 span: self.span(start),
308 }
309 }
310
311 fn scan_jsx_attribute_value(&mut self) -> TokenAndSpan {
312 let Some(cur) = self.cur() else {
313 let start = self.cur_pos();
314 return TokenAndSpan {
315 token: Token::Eof,
316 had_line_break: self.had_line_break_before_last(),
317 span: self.span(start),
318 };
319 };
320 let start = self.cur_pos();
321
322 match cur {
323 '\'' | '"' => {
324 let token = self.read_jsx_str(cur);
325 let token = match token {
326 Ok(token) => token,
327 Err(e) => {
328 self.state.set_token_value(TokenValue::Error(e));
329 return TokenAndSpan {
330 token: Token::Error,
331 had_line_break: self.had_line_break_before_last(),
332 span: self.span(start),
333 };
334 }
335 };
336 debug_assert!(self
337 .get_token_value()
338 .is_some_and(|t| matches!(t, TokenValue::Str { .. })));
339 debug_assert!(token == Token::Str);
340 TokenAndSpan {
341 token,
342 had_line_break: self.had_line_break_before_last(),
343 span: self.span(start),
344 }
345 }
346 _ => self.next().unwrap_or_else(|| TokenAndSpan {
347 token: Token::Eof,
348 had_line_break: self.had_line_break_before_last(),
349 span: self.span(start),
350 }),
351 }
352 }
353
354 fn rescan_template_token(
355 &mut self,
356 start: BytePos,
357 start_with_back_tick: bool,
358 ) -> TokenAndSpan {
359 unsafe { self.input.reset_to(start) };
360 let res = self.scan_template_token(start, start_with_back_tick);
361 let token = match res.map_err(|e| {
362 self.state.set_token_value(TokenValue::Error(e));
363 Token::Error
364 }) {
365 Ok(t) => t,
366 Err(e) => e,
367 };
368 let span = if start_with_back_tick {
369 self.span(start)
370 } else {
371 self.span(start + BytePos(1))
373 };
374
375 if token != Token::Eof {
376 if let Some(comments) = self.comments_buffer.as_mut() {
377 comments.pending_to_comment(BufferedCommentKind::Leading, start);
378 }
379
380 self.state.set_token_type(token);
381 self.state.prev_hi = self.last_pos();
382 self.state.had_line_break_before_last = self.had_line_break_before_last();
383 }
384 TokenAndSpan {
386 token,
387 had_line_break: self.had_line_break_before_last(),
388 span,
389 }
390 }
391}
392
393impl Lexer<'_> {
394 fn next_token(&mut self, start: &mut BytePos) -> Result<Token, Error> {
395 if let Some(next_regexp) = self.state.next_regexp {
396 *start = next_regexp;
397 return self.read_regexp(next_regexp);
398 }
399
400 if self.state.is_first {
401 if let Some(shebang) = self.read_shebang()? {
402 self.state.set_token_value(TokenValue::Word(shebang));
403 return Ok(Token::Shebang);
404 }
405 }
406
407 self.state.had_line_break = self.state.is_first;
408 self.state.is_first = false;
409
410 self.skip_space::<true>();
411 *start = self.input.cur_pos();
412
413 if self.input.last_pos() == self.input.end_pos() {
414 self.consume_pending_comments();
416 return Ok(Token::Eof);
417 }
418
419 self.state.start = *start;
426
427 self.read_token()
428 }
429
430 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> Result<Token, Error> {
431 debug_assert!(self.syntax.jsx());
432
433 if self.input_mut().as_str().is_empty() {
434 return Ok(Token::Eof);
435 };
436
437 if self.input.eat_byte(b'<') {
438 return Ok(if self.input.eat_byte(b'/') {
439 Token::LessSlash
440 } else {
441 Token::Lt
442 });
443 } else if self.input.eat_byte(b'{') {
444 return Ok(Token::LBrace);
445 }
446
447 let start = self.input.cur_pos();
448 let mut first_non_whitespace = 0;
449 let mut chunk_start = start;
450 let mut value = String::new();
451
452 while let Some(ch) = self.input_mut().cur() {
453 if ch == '{' {
454 break;
455 } else if ch == '<' {
456 break;
458 }
459
460 if ch == '>' {
461 self.emit_error(
462 self.input().cur_pos(),
463 SyntaxError::UnexpectedTokenWithSuggestions {
464 candidate_list: vec!["`{'>'}`", "`>`"],
465 },
466 );
467 } else if ch == '}' {
468 self.emit_error(
469 self.input().cur_pos(),
470 SyntaxError::UnexpectedTokenWithSuggestions {
471 candidate_list: vec!["`{'}'}`", "`}`"],
472 },
473 );
474 }
475
476 if first_non_whitespace == 0 && ch.is_line_terminator() {
477 first_non_whitespace = -1;
478 } else if !allow_multiline_jsx_text
479 && ch.is_line_terminator()
480 && first_non_whitespace > 0
481 {
482 break;
483 } else if ch.is_whitespace() {
484 first_non_whitespace = self.cur_pos().0 as i32;
485 }
486
487 if ch == '&' {
488 let cur_pos = self.input().cur_pos();
489
490 let s = unsafe {
491 self.input_slice(chunk_start, cur_pos)
493 };
494 value.push_str(s);
495
496 if let Ok(jsx_entity) = self.read_jsx_entity() {
497 value.push(jsx_entity.0);
498
499 chunk_start = self.input.cur_pos();
500 }
501 } else {
502 self.bump();
503 }
504 }
505
506 let end = self.input().cur_pos();
507 let raw = unsafe {
508 self.input_slice(start, end)
510 };
511 let value = if value.is_empty() {
512 self.atom(raw)
513 } else {
514 let s = unsafe {
515 self.input_slice(chunk_start, end)
517 };
518 value.push_str(s);
519 self.atom(value)
520 };
521
522 let raw: swc_atoms::Atom = self.atom(raw);
523
524 self.state.set_token_value(TokenValue::Str { raw, value });
525
526 self.state.start = start;
527
528 Ok(Token::JSXText)
529 }
530
531 fn scan_jsx_attrs_terminal_token(&mut self) -> LexResult<Token> {
532 if self.input_mut().as_str().is_empty() {
533 Ok(Token::Eof)
534 } else if self.input.eat_byte(b'>') {
535 Ok(Token::Gt)
536 } else if self.input.eat_byte(b'/') {
537 Ok(Token::Slash)
538 } else {
539 self.read_token()
540 }
541 }
542
543 fn scan_identifier_parts(&mut self) -> String {
544 let mut v = String::with_capacity(16);
545 while let Some(ch) = self.input().cur() {
546 if ch.is_ident_part() {
547 v.push(ch);
548 self.input_mut().bump_bytes(ch.len_utf8());
549 } else if ch == '\\' {
550 self.bump(); if !self.is(b'u') {
552 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
553 continue;
554 }
555 self.bump(); let Ok(chars) = self.read_unicode_escape() else {
557 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
558 break;
559 };
560 for c in chars {
561 v.extend(c);
562 }
563 self.token_flags |= swc_ecma_lexer::lexer::TokenFlags::UNICODE;
564 } else {
565 break;
566 }
567 }
568 v
569 }
570}
571
572impl Iterator for Lexer<'_> {
573 type Item = TokenAndSpan;
574
575 fn next(&mut self) -> Option<Self::Item> {
576 let mut start = self.cur_pos();
577
578 let token = match self.next_token(&mut start) {
579 Ok(res) => res,
580 Err(error) => {
581 self.state.set_token_value(TokenValue::Error(error));
582 Token::Error
583 }
584 };
585
586 let span = self.span(start);
587 if token != Token::Eof {
588 if let Some(comments) = self.comments_buffer.as_mut() {
589 comments.pending_to_comment(BufferedCommentKind::Leading, start);
590 }
591
592 self.state.set_token_type(token);
593 self.state.prev_hi = self.last_pos();
594 self.state.had_line_break_before_last = self.had_line_break_before_last();
595 Some(TokenAndSpan {
597 token,
598 had_line_break: self.had_line_break_before_last(),
599 span,
600 })
601 } else {
602 None
603 }
604 }
605}
606
607impl State {
608 pub fn new(start_pos: BytePos) -> Self {
609 State {
610 had_line_break: false,
611 had_line_break_before_last: false,
612 is_first: true,
613 next_regexp: None,
614 start: BytePos(0),
615 prev_hi: start_pos,
616 token_value: None,
617 token_type: None,
618 }
619 }
620
621 pub(crate) fn set_token_value(&mut self, token_value: TokenValue) {
622 self.token_value = Some(token_value);
623 }
624}
625
626impl swc_ecma_lexer::common::lexer::state::State for State {
627 type TokenKind = Token;
628 type TokenType = Token;
629
630 #[inline(always)]
631 fn is_expr_allowed(&self) -> bool {
632 unreachable!("is_expr_allowed should not be called in Parser/State")
633 }
634
635 #[inline(always)]
636 fn set_is_expr_allowed(&mut self, _: bool) {
637 }
639
640 #[inline(always)]
641 fn set_next_regexp(&mut self, start: Option<BytePos>) {
642 self.next_regexp = start;
643 }
644
645 #[inline(always)]
646 fn had_line_break(&self) -> bool {
647 self.had_line_break
648 }
649
650 #[inline(always)]
651 fn mark_had_line_break(&mut self) {
652 self.had_line_break = true;
653 }
654
655 #[inline(always)]
656 fn had_line_break_before_last(&self) -> bool {
657 self.had_line_break_before_last
658 }
659
660 #[inline(always)]
661 fn token_contexts(&self) -> &swc_ecma_lexer::TokenContexts {
662 unreachable!();
663 }
664
665 #[inline(always)]
666 fn mut_token_contexts(&mut self) -> &mut swc_ecma_lexer::TokenContexts {
667 unreachable!();
668 }
669
670 #[inline(always)]
671 fn set_token_type(&mut self, token_type: Self::TokenType) {
672 self.token_type = Some(token_type);
673 }
674
675 #[inline(always)]
676 fn token_type(&self) -> Option<Self::TokenType> {
677 self.token_type
678 }
679
680 #[inline(always)]
681 fn syntax(&self) -> SyntaxFlags {
682 unreachable!("syntax is not stored in State, but in Lexer")
683 }
684
685 #[inline(always)]
686 fn prev_hi(&self) -> BytePos {
687 self.prev_hi
688 }
689
690 #[inline(always)]
691 fn start(&self) -> BytePos {
692 self.start
693 }
694}