1use std::mem::take;
2
3use swc_common::BytePos;
4use swc_ecma_ast::EsVersion;
5use swc_ecma_lexer::{
6 common::{
7 lexer::{
8 char::CharExt,
9 comments_buffer::{BufferedComment, BufferedCommentKind},
10 state::State as StateTrait,
11 LexResult,
12 },
13 syntax::SyntaxFlags,
14 },
15 error::SyntaxError,
16 TokenContexts,
17};
18
19use super::{Context, Input, Lexer, LexerTrait};
20use crate::{
21 error::Error,
22 input::Tokens,
23 lexer::token::{Token, TokenAndSpan, TokenValue},
24};
25
26#[derive(Clone)]
30pub struct State {
31 pub had_line_break: bool,
33 pub had_line_break_before_last: bool,
35 is_first: bool,
37 pub next_regexp: Option<BytePos>,
38 pub start: BytePos,
39 pub prev_hi: BytePos,
40
41 pub(super) token_value: Option<TokenValue>,
42 token_type: Option<Token>,
43}
44
45impl swc_ecma_lexer::common::input::Tokens<TokenAndSpan> for Lexer<'_> {
46 #[inline]
47 fn set_ctx(&mut self, ctx: Context) {
48 if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() {
49 let mut module_errors = self.module_errors.borrow_mut();
50 self.errors.borrow_mut().append(&mut *module_errors);
51 }
52 self.ctx = ctx
53 }
54
55 #[inline]
56 fn ctx(&self) -> Context {
57 self.ctx
58 }
59
60 #[inline]
61 fn ctx_mut(&mut self) -> &mut Context {
62 &mut self.ctx
63 }
64
65 #[inline]
66 fn syntax(&self) -> SyntaxFlags {
67 self.syntax
68 }
69
70 #[inline]
71 fn target(&self) -> EsVersion {
72 self.target
73 }
74
75 #[inline]
76 fn start_pos(&self) -> BytePos {
77 self.start_pos
78 }
79
80 #[inline]
81 fn set_expr_allowed(&mut self, _: bool) {}
82
83 #[inline]
84 fn set_next_regexp(&mut self, start: Option<BytePos>) {
85 self.state.next_regexp = start;
86 }
87
88 #[inline]
89 fn token_context(&self) -> &TokenContexts {
90 unreachable!();
91 }
92
93 #[inline]
94 fn token_context_mut(&mut self) -> &mut TokenContexts {
95 unreachable!();
96 }
97
98 #[inline]
99 fn set_token_context(&mut self, _: TokenContexts) {
100 unreachable!();
101 }
102
103 fn add_error(&self, error: Error) {
104 self.errors.borrow_mut().push(error);
105 }
106
107 fn add_module_mode_error(&self, error: Error) {
108 if self.ctx.contains(Context::Module) {
109 self.add_error(error);
110 return;
111 }
112 self.module_errors.borrow_mut().push(error);
113 }
114
115 #[inline]
116 fn take_errors(&mut self) -> Vec<Error> {
117 take(&mut self.errors.borrow_mut())
118 }
119
120 #[inline]
121 fn take_script_module_errors(&mut self) -> Vec<Error> {
122 take(&mut self.module_errors.borrow_mut())
123 }
124
125 #[inline]
126 fn end_pos(&self) -> BytePos {
127 self.input.end_pos()
128 }
129
130 #[inline]
131 fn update_token_flags(&mut self, f: impl FnOnce(&mut swc_ecma_lexer::lexer::TokenFlags)) {
132 f(&mut self.token_flags)
133 }
134
135 #[inline]
136 fn token_flags(&self) -> swc_ecma_lexer::lexer::TokenFlags {
137 self.token_flags
138 }
139}
140
141impl crate::input::Tokens for Lexer<'_> {
142 fn clone_token_value(&self) -> Option<TokenValue> {
143 self.state.token_value.clone()
144 }
145
146 fn get_token_value(&self) -> Option<&TokenValue> {
147 self.state.token_value.as_ref()
148 }
149
150 fn set_token_value(&mut self, token_value: Option<TokenValue>) {
151 self.state.token_value = token_value;
152 }
153
154 fn take_token_value(&mut self) -> Option<TokenValue> {
155 self.state.token_value.take()
156 }
157
158 fn rescan_jsx_token(
159 &mut self,
160 allow_multiline_jsx_text: bool,
161 reset: BytePos,
162 ) -> Option<TokenAndSpan> {
163 unsafe {
164 self.input.reset_to(reset);
165 }
166 Tokens::scan_jsx_token(self, allow_multiline_jsx_text)
167 }
168
169 fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> Option<TokenAndSpan> {
170 unsafe {
171 self.input.reset_to(reset);
172 }
173 Tokens::scan_jsx_open_el_terminal_token(self)
174 }
175
176 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> Option<TokenAndSpan> {
177 let start = self.cur_pos();
178 let res = match self.scan_jsx_token(allow_multiline_jsx_text) {
179 Ok(res) => Ok(res),
180 Err(error) => {
181 self.state.set_token_value(TokenValue::Error(error));
182 Err(Token::Error)
183 }
184 };
185 let token = match res.map_err(Some) {
186 Ok(t) => t,
187 Err(e) => e,
188 };
189 let span = self.span(start);
190 if let Some(token) = token {
191 if let Some(comments) = self.comments_buffer.as_mut() {
192 for comment in comments.take_pending_leading() {
193 comments.push(BufferedComment {
194 kind: BufferedCommentKind::Leading,
195 pos: start,
196 comment,
197 });
198 }
199 }
200
201 self.state.set_token_type(token);
202 self.state.prev_hi = self.last_pos();
203 self.state.had_line_break_before_last = self.had_line_break_before_last();
204 }
205 token.map(|token| {
206 TokenAndSpan {
208 token,
209 had_line_break: self.had_line_break_before_last(),
210 span,
211 }
212 })
213 }
214
215 fn scan_jsx_open_el_terminal_token(&mut self) -> Option<TokenAndSpan> {
216 self.skip_space::<true>();
217 let start = self.input.cur_pos();
218 let res = match self.scan_jsx_attrs_terminal_token() {
219 Ok(res) => Ok(res),
220 Err(error) => {
221 self.state.set_token_value(TokenValue::Error(error));
222 Err(Token::Error)
223 }
224 };
225 let token = match res.map_err(Some) {
226 Ok(t) => t,
227 Err(e) => e,
228 };
229 let span = self.span(start);
230 if let Some(token) = token {
231 if let Some(comments) = self.comments_buffer.as_mut() {
232 for comment in comments.take_pending_leading() {
233 comments.push(BufferedComment {
234 kind: BufferedCommentKind::Leading,
235 pos: start,
236 comment,
237 });
238 }
239 }
240
241 self.state.set_token_type(token);
242 self.state.prev_hi = self.last_pos();
243 self.state.had_line_break_before_last = self.had_line_break_before_last();
244 }
245 token.map(|token| {
246 TokenAndSpan {
248 token,
249 had_line_break: self.had_line_break_before_last(),
250 span,
251 }
252 })
253 }
254
255 fn scan_jsx_identifier(&mut self, start: BytePos) -> TokenAndSpan {
256 let token = self.state.token_type.unwrap();
257 debug_assert!(token.is_word());
258 let mut v = String::with_capacity(16);
259 while let Some(ch) = self.input().cur() {
260 if ch == '-' {
261 v.push(ch);
262 self.bump();
263 } else {
264 let old_pos = self.cur_pos();
265 v.push_str(&self.scan_identifier_parts());
266 if self.cur_pos() == old_pos {
267 break;
268 }
269 }
270 }
271 let v = if !v.is_empty() {
272 let v = if token.is_known_ident() {
273 format!("{}{}", token.to_string(None), v)
274 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
275 format!("{value}{v}")
276 } else {
277 format!("{}{}", token.to_string(None), v)
278 };
279 self.atom(v)
280 } else if token.is_known_ident() || token.is_keyword() {
281 self.atom(token.to_string(None))
282 } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
283 value
284 } else {
285 self.atom(token.to_string(None))
286 };
287 self.state.set_token_value(TokenValue::Word(v));
288 TokenAndSpan {
289 token: Token::JSXName,
290 had_line_break: self.had_line_break_before_last(),
291 span: self.span(start),
292 }
293 }
294
295 fn scan_jsx_attribute_value(&mut self) -> Option<TokenAndSpan> {
296 let Some(cur) = self.cur() else {
297 return self.next();
298 };
299 let start = self.cur_pos();
300
301 match cur {
302 '\'' | '"' => {
303 let token = self.read_jsx_str(cur).ok()?;
304 debug_assert!(self
305 .get_token_value()
306 .is_some_and(|t| matches!(t, TokenValue::Str { .. })));
307 debug_assert!(token == Token::Str);
308 Some(TokenAndSpan {
309 token,
310 had_line_break: self.had_line_break_before_last(),
311 span: self.span(start),
312 })
313 }
314 _ => self.next(),
315 }
316 }
317
318 fn rescan_template_token(
319 &mut self,
320 start: BytePos,
321 start_with_back_tick: bool,
322 ) -> Option<TokenAndSpan> {
323 unsafe { self.input.reset_to(start) };
324 let res = self
325 .scan_template_token(start, start_with_back_tick)
326 .map(Some);
327 let token = match res
328 .map_err(|e| {
329 self.state.set_token_value(TokenValue::Error(e));
330 Token::Error
331 })
332 .map_err(Some)
333 {
334 Ok(t) => t,
335 Err(e) => e,
336 };
337 let span = if start_with_back_tick {
338 self.span(start)
339 } else {
340 self.span(start + BytePos(1))
342 };
343 if let Some(token) = token {
344 if let Some(comments) = self.comments_buffer.as_mut() {
345 for comment in comments.take_pending_leading() {
346 comments.push(BufferedComment {
347 kind: BufferedCommentKind::Leading,
348 pos: start,
349 comment,
350 });
351 }
352 }
353
354 self.state.set_token_type(token);
355 self.state.prev_hi = self.last_pos();
356 self.state.had_line_break_before_last = self.had_line_break_before_last();
357 }
358 token.map(|token| {
359 TokenAndSpan {
361 token,
362 had_line_break: self.had_line_break_before_last(),
363 span,
364 }
365 })
366 }
367}
368
369impl Lexer<'_> {
370 fn next_token(&mut self, start: &mut BytePos) -> Result<Option<Token>, Error> {
371 if let Some(start) = self.state.next_regexp {
372 return Ok(Some(self.read_regexp(start)?));
373 }
374
375 if self.state.is_first {
376 if let Some(shebang) = self.read_shebang()? {
377 self.state.set_token_value(TokenValue::Word(shebang));
378 return Ok(Some(Token::Shebang));
379 }
380 }
381
382 self.state.had_line_break = self.state.is_first;
383 self.state.is_first = false;
384
385 self.skip_space::<true>();
386 *start = self.input.cur_pos();
387
388 if self.input.last_pos() == self.input.end_pos() {
389 self.consume_pending_comments();
391 return Ok(None);
392 }
393
394 self.state.start = *start;
401
402 self.read_token()
403 }
404
405 fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> Result<Option<Token>, Error> {
406 debug_assert!(self.syntax.jsx());
407
408 if self.input_mut().as_str().is_empty() {
409 return Ok(None);
410 };
411
412 if self.input.eat_byte(b'<') {
413 return Ok(Some(if self.input.eat_byte(b'/') {
414 Token::LessSlash
415 } else {
416 Token::Lt
417 }));
418 } else if self.input.eat_byte(b'{') {
419 return Ok(Some(Token::LBrace));
420 }
421
422 let start = self.input.cur_pos();
423 let mut first_non_whitespace = 0;
424 let mut chunk_start = start;
425 let mut value = String::new();
426
427 while let Some(ch) = self.input_mut().cur() {
428 if ch == '{' {
429 break;
430 } else if ch == '<' {
431 break;
433 }
434
435 if ch == '>' {
436 self.emit_error(
437 self.input().cur_pos(),
438 SyntaxError::UnexpectedTokenWithSuggestions {
439 candidate_list: vec!["`{'>'}`", "`>`"],
440 },
441 );
442 } else if ch == '}' {
443 self.emit_error(
444 self.input().cur_pos(),
445 SyntaxError::UnexpectedTokenWithSuggestions {
446 candidate_list: vec!["`{'}'}`", "`}`"],
447 },
448 );
449 }
450
451 if first_non_whitespace == 0 && ch.is_line_terminator() {
452 first_non_whitespace = -1;
453 } else if !allow_multiline_jsx_text
454 && ch.is_line_terminator()
455 && first_non_whitespace > 0
456 {
457 break;
458 } else if ch.is_whitespace() {
459 first_non_whitespace = self.cur_pos().0 as i32;
460 }
461
462 if ch == '&' {
463 let cur_pos = self.input().cur_pos();
464
465 let s = unsafe {
466 self.input_slice(chunk_start, cur_pos)
468 };
469 value.push_str(s);
470
471 if let Ok(jsx_entity) = self.read_jsx_entity() {
472 value.push(jsx_entity.0);
473
474 chunk_start = self.input.cur_pos();
475 }
476 } else {
477 self.bump();
478 }
479 }
480
481 let end = self.input().cur_pos();
482 let raw = unsafe {
483 self.input_slice(start, end)
485 };
486 let value = if value.is_empty() {
487 self.atom(raw)
488 } else {
489 let s = unsafe {
490 self.input_slice(chunk_start, end)
492 };
493 value.push_str(s);
494 self.atom(value)
495 };
496
497 let raw: swc_atoms::Atom = self.atom(raw);
498
499 self.state.set_token_value(TokenValue::Str { raw, value });
500
501 self.state.start = start;
502
503 Ok(Some(Token::JSXText))
504 }
505
506 fn scan_jsx_attrs_terminal_token(&mut self) -> LexResult<Option<Token>> {
507 if self.input_mut().as_str().is_empty() {
508 Ok(None)
509 } else if self.input.eat_byte(b'>') {
510 Ok(Some(Token::Gt))
511 } else if self.input.eat_byte(b'/') {
512 Ok(Some(Token::Slash))
513 } else {
514 self.read_token()
515 }
516 }
517
518 fn scan_identifier_parts(&mut self) -> String {
519 let mut v = String::with_capacity(16);
520 while let Some(ch) = self.input().cur() {
521 if ch.is_ident_part() {
522 v.push(ch);
523 self.input_mut().bump_bytes(ch.len_utf8());
524 } else if ch == '\\' {
525 self.bump(); if !self.is(b'u') {
527 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
528 continue;
529 }
530 self.bump(); let Ok(chars) = self.read_unicode_escape() else {
532 self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
533 break;
534 };
535 for c in chars {
536 v.extend(c);
537 }
538 self.token_flags |= swc_ecma_lexer::lexer::TokenFlags::UNICODE;
539 } else {
540 break;
541 }
542 }
543 v
544 }
545}
546
547impl Iterator for Lexer<'_> {
548 type Item = TokenAndSpan;
549
550 fn next(&mut self) -> Option<Self::Item> {
551 let mut start = self.cur_pos();
552
553 let res = match self.next_token(&mut start) {
554 Ok(res) => Ok(res),
555 Err(error) => {
556 self.state.set_token_value(TokenValue::Error(error));
557 Err(Token::Error)
558 }
559 };
560 let token = match res.map_err(Some) {
561 Ok(t) => t,
562 Err(e) => e,
563 };
564
565 let span = self.span(start);
566 if let Some(token) = token {
567 if let Some(comments) = self.comments_buffer.as_mut() {
568 for comment in comments.take_pending_leading() {
569 comments.push(BufferedComment {
570 kind: BufferedCommentKind::Leading,
571 pos: start,
572 comment,
573 });
574 }
575 }
576
577 self.state.set_token_type(token);
578 self.state.prev_hi = self.last_pos();
579 self.state.had_line_break_before_last = self.had_line_break_before_last();
580 }
581
582 token.map(|token| {
583 TokenAndSpan {
585 token,
586 had_line_break: self.had_line_break_before_last(),
587 span,
588 }
589 })
590 }
591}
592
593impl State {
594 pub fn new(start_pos: BytePos) -> Self {
595 State {
596 had_line_break: false,
597 had_line_break_before_last: false,
598 is_first: true,
599 next_regexp: None,
600 start: BytePos(0),
601 prev_hi: start_pos,
602 token_value: None,
603 token_type: None,
604 }
605 }
606
607 pub(crate) fn set_token_value(&mut self, token_value: TokenValue) {
608 self.token_value = Some(token_value);
609 }
610}
611
612impl swc_ecma_lexer::common::lexer::state::State for State {
613 type TokenKind = Token;
614 type TokenType = Token;
615
616 #[inline(always)]
617 fn is_expr_allowed(&self) -> bool {
618 unreachable!("is_expr_allowed should not be called in Parser/State")
619 }
620
621 #[inline(always)]
622 fn set_is_expr_allowed(&mut self, _: bool) {
623 }
625
626 #[inline(always)]
627 fn set_next_regexp(&mut self, start: Option<BytePos>) {
628 self.next_regexp = start;
629 }
630
631 #[inline(always)]
632 fn had_line_break(&self) -> bool {
633 self.had_line_break
634 }
635
636 #[inline(always)]
637 fn mark_had_line_break(&mut self) {
638 self.had_line_break = true;
639 }
640
641 #[inline(always)]
642 fn had_line_break_before_last(&self) -> bool {
643 self.had_line_break_before_last
644 }
645
646 #[inline(always)]
647 fn token_contexts(&self) -> &swc_ecma_lexer::TokenContexts {
648 unreachable!();
649 }
650
651 #[inline(always)]
652 fn mut_token_contexts(&mut self) -> &mut swc_ecma_lexer::TokenContexts {
653 unreachable!();
654 }
655
656 #[inline(always)]
657 fn set_token_type(&mut self, token_type: Self::TokenType) {
658 self.token_type = Some(token_type);
659 }
660
661 #[inline(always)]
662 fn token_type(&self) -> Option<Self::TokenType> {
663 self.token_type
664 }
665
666 #[inline(always)]
667 fn syntax(&self) -> SyntaxFlags {
668 unreachable!("syntax is not stored in State, but in Lexer")
669 }
670
671 #[inline(always)]
672 fn prev_hi(&self) -> BytePos {
673 self.prev_hi
674 }
675
676 #[inline(always)]
677 fn start(&self) -> BytePos {
678 self.start
679 }
680}