1use std::mem::take;
2
3use smallvec::{smallvec, SmallVec};
4use swc_common::{BytePos, Span};
5use swc_ecma_ast::EsVersion;
6use tracing::trace;
7
8use super::{
9 comments_buffer::{BufferedComment, BufferedCommentKind},
10 Context, Input, Lexer,
11};
12use crate::{
13 error::{Error, SyntaxError},
14 input::Tokens,
15 lexer::util::CharExt,
16 token::{BinOpToken, Keyword, Token, TokenAndSpan, TokenKind, WordKind},
17 Syntax, *,
18};
19
20#[derive(Clone)]
24pub(super) struct State {
25 pub is_expr_allowed: bool,
26 pub next_regexp: Option<BytePos>,
27 pub had_line_break: bool,
29 pub had_line_break_before_last: bool,
31 is_first: bool,
33 pub start: BytePos,
34 pub cur_line: usize,
35 pub line_start: BytePos,
36 pub prev_hi: BytePos,
37 pub tpl_start: BytePos,
38
39 context: TokenContexts,
40 syntax: Syntax,
41
42 token_type: Option<TokenType>,
43}
44
45#[derive(Debug, Copy, Clone, PartialEq, Eq)]
46enum TokenType {
47 Template,
48 Dot,
49 Colon,
50 LBrace,
51 RParen,
52 Semi,
53 BinOp(BinOpToken),
54 Keyword(Keyword),
55 JSXName,
56 JSXText,
57 JSXTagStart,
58 JSXTagEnd,
59 Arrow,
60 Other {
61 before_expr: bool,
62 can_have_trailing_comment: bool,
63 },
64}
65impl TokenType {
66 #[inline]
67 const fn before_expr(self) -> bool {
68 match self {
69 TokenType::JSXName
70 | TokenType::JSXTagStart
71 | TokenType::JSXTagEnd
72 | TokenType::Template
73 | TokenType::Dot
74 | TokenType::RParen => false,
75
76 TokenType::JSXText
77 | TokenType::Colon
78 | TokenType::LBrace
79 | TokenType::Semi
80 | TokenType::Arrow => true,
81
82 TokenType::BinOp(b) => b.before_expr(),
83 TokenType::Keyword(k) => k.before_expr(),
84 TokenType::Other { before_expr, .. } => before_expr,
85 }
86 }
87}
88
89impl From<TokenKind> for TokenType {
90 #[inline]
91 fn from(t: TokenKind) -> Self {
92 match t {
93 TokenKind::Template => TokenType::Template,
94 TokenKind::Dot => TokenType::Dot,
95 TokenKind::Colon => TokenType::Colon,
96 TokenKind::LBrace => TokenType::LBrace,
97 TokenKind::RParen => TokenType::RParen,
98 TokenKind::Semi => TokenType::Semi,
99 TokenKind::JSXTagEnd => TokenType::JSXTagEnd,
100 TokenKind::JSXTagStart => TokenType::JSXTagStart,
101 TokenKind::JSXText => TokenType::JSXText,
102 TokenKind::JSXName => TokenType::JSXName,
103 TokenKind::BinOp(op) => TokenType::BinOp(op),
104 TokenKind::Arrow => TokenType::Arrow,
105
106 TokenKind::Word(WordKind::Keyword(k)) => TokenType::Keyword(k),
107 _ => TokenType::Other {
108 before_expr: t.before_expr(),
109 can_have_trailing_comment: matches!(
110 t,
111 TokenKind::Num
112 | TokenKind::Str
113 | TokenKind::Word(WordKind::Ident(..))
114 | TokenKind::DollarLBrace
115 | TokenKind::Regex
116 | TokenKind::BigInt
117 | TokenKind::JSXText
118 | TokenKind::RBrace
119 ),
120 },
121 }
122 }
123}
124
125impl Tokens for Lexer<'_> {
126 #[inline]
127 fn set_ctx(&mut self, ctx: Context) {
128 if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() {
129 let mut module_errors = self.module_errors.borrow_mut();
130 self.errors.borrow_mut().append(&mut *module_errors);
131 }
132 self.ctx = ctx
133 }
134
135 #[inline]
136 fn ctx(&self) -> Context {
137 self.ctx
138 }
139
140 #[inline]
141 fn syntax(&self) -> Syntax {
142 self.syntax
143 }
144
145 #[inline]
146 fn target(&self) -> EsVersion {
147 self.target
148 }
149
150 #[inline]
151 fn start_pos(&self) -> BytePos {
152 self.start_pos
153 }
154
155 #[inline]
156 fn set_expr_allowed(&mut self, allow: bool) {
157 self.set_expr_allowed(allow)
158 }
159
160 #[inline]
161 fn set_next_regexp(&mut self, start: Option<BytePos>) {
162 self.state.next_regexp = start;
163 }
164
165 #[inline]
166 fn token_context(&self) -> &TokenContexts {
167 &self.state.context
168 }
169
170 #[inline]
171 fn token_context_mut(&mut self) -> &mut TokenContexts {
172 &mut self.state.context
173 }
174
175 #[inline]
176 fn set_token_context(&mut self, c: TokenContexts) {
177 self.state.context = c;
178 }
179
180 fn add_error(&self, error: Error) {
181 self.errors.borrow_mut().push(error);
182 }
183
184 fn add_module_mode_error(&self, error: Error) {
185 if self.ctx.contains(Context::Module) {
186 self.add_error(error);
187 return;
188 }
189 self.module_errors.borrow_mut().push(error);
190 }
191
192 fn take_errors(&mut self) -> Vec<Error> {
193 take(&mut self.errors.borrow_mut())
194 }
195
196 fn take_script_module_errors(&mut self) -> Vec<Error> {
197 take(&mut self.module_errors.borrow_mut())
198 }
199
200 fn end_pos(&self) -> BytePos {
201 self.input.end_pos()
202 }
203}
204
205impl Lexer<'_> {
206 #[cold]
210 #[inline(never)]
211 fn consume_pending_comments(&mut self) {
212 if let Some(comments) = self.comments.as_mut() {
213 let comments_buffer = self.comments_buffer.as_mut().unwrap();
214 let last = self.state.prev_hi;
215
216 for c in comments_buffer.take_pending_leading() {
218 if last == self.start_pos {
222 comments_buffer.push(BufferedComment {
223 kind: BufferedCommentKind::Leading,
224 pos: last,
225 comment: c,
226 });
227 } else {
228 comments_buffer.push(BufferedComment {
229 kind: BufferedCommentKind::Trailing,
230 pos: last,
231 comment: c,
232 });
233 }
234 }
235
236 for comment in comments_buffer.take_comments() {
238 match comment.kind {
239 BufferedCommentKind::Leading => {
240 comments.add_leading(comment.pos, comment.comment);
241 }
242 BufferedCommentKind::Trailing => {
243 comments.add_trailing(comment.pos, comment.comment);
244 }
245 }
246 }
247 }
248 }
249
250 fn next_token(&mut self, start: &mut BytePos) -> Result<Option<Token>, Error> {
251 if let Some(start) = self.state.next_regexp {
252 return Ok(Some(self.read_regexp(start)?));
253 }
254
255 if self.state.is_first {
256 if let Some(shebang) = self.read_shebang()? {
257 return Ok(Some(Token::Shebang(shebang)));
258 }
259 }
260
261 self.state.had_line_break = self.state.is_first;
262 self.state.is_first = false;
263
264 if self.state.can_skip_space() {
266 self.skip_space::<true>();
267 *start = self.input.cur_pos();
268 };
269
270 match self.input.cur() {
271 Some(..) => {}
272 None => {
274 self.consume_pending_comments();
275
276 return Ok(None);
277 }
278 };
279
280 self.state.start = *start;
287
288 if self.syntax.jsx()
289 && !self.ctx.contains(Context::InPropertyName)
290 && !self.ctx.contains(Context::InType)
291 {
292 if self.state.context.current() == Some(TokenContext::JSXExpr) {
294 return self.read_jsx_token();
295 }
296
297 let c = self.cur();
298 if let Some(c) = c {
299 if self.state.context.current() == Some(TokenContext::JSXOpeningTag)
300 || self.state.context.current() == Some(TokenContext::JSXClosingTag)
301 {
302 if c.is_ident_start() {
303 return self.read_jsx_word().map(Some);
304 }
305
306 if c == '>' {
307 unsafe {
308 self.input.bump();
310 }
311 return Ok(Some(Token::JSXTagEnd));
312 }
313
314 if (c == '\'' || c == '"')
315 && self.state.context.current() == Some(TokenContext::JSXOpeningTag)
316 {
317 return self.read_jsx_str(c).map(Some);
318 }
319 }
320
321 if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some('!') {
322 let had_line_break_before_last = self.had_line_break_before_last();
323 let cur_pos = self.input.cur_pos();
324
325 unsafe {
326 self.input.bump();
328 }
329
330 if had_line_break_before_last && self.is_str("<<<<<< ") {
331 let span = Span::new(cur_pos, cur_pos + BytePos(7));
332
333 self.emit_error_span(span, SyntaxError::TS1185);
334 self.skip_line_comment(6);
335 self.skip_space::<true>();
336 return self.read_token();
337 }
338
339 return Ok(Some(Token::JSXTagStart));
340 }
341 }
342 }
343
344 if let Some(TokenContext::Tpl) = self.state.context.current() {
345 let start = self.state.tpl_start;
346 return self.read_tmpl_token(start).map(Some);
347 }
348
349 self.read_token()
350 }
351}
352
353impl Iterator for Lexer<'_> {
354 type Item = TokenAndSpan;
355
356 fn next(&mut self) -> Option<Self::Item> {
357 let mut start = self.cur_pos();
358
359 let res = self.next_token(&mut start);
360
361 let token = match res.map_err(Token::Error).map_err(Some) {
362 Ok(t) => t,
363 Err(e) => e,
364 };
365
366 let span = self.span(start);
367 if let Some(ref token) = token {
368 if let Some(comments) = self.comments_buffer.as_mut() {
369 for comment in comments.take_pending_leading() {
370 comments.push(BufferedComment {
371 kind: BufferedCommentKind::Leading,
372 pos: start,
373 comment,
374 });
375 }
376 }
377
378 self.state.update(start, token.kind());
379 self.state.prev_hi = self.last_pos();
380 self.state.had_line_break_before_last = self.had_line_break_before_last();
381 }
382
383 token.map(|token| {
384 TokenAndSpan {
386 token,
387 had_line_break: self.had_line_break_before_last(),
388 span,
389 }
390 })
391 }
392}
393
394impl State {
395 pub fn new(syntax: Syntax, start_pos: BytePos) -> Self {
396 let context = TokenContexts(smallvec![TokenContext::BraceStmt]);
397
398 State {
399 is_expr_allowed: true,
400 next_regexp: None,
401 had_line_break: false,
402 had_line_break_before_last: false,
403 is_first: true,
404 start: BytePos(0),
405 cur_line: 1,
406 line_start: BytePos(0),
407 prev_hi: start_pos,
408 tpl_start: BytePos::DUMMY,
409 context,
410 syntax,
411 token_type: None,
412 }
413 }
414}
415
416impl State {
417 pub fn can_skip_space(&self) -> bool {
418 !self
419 .context
420 .current()
421 .map(|t| t.preserve_space())
422 .unwrap_or(false)
423 }
424
425 pub fn can_have_trailing_line_comment(&self) -> bool {
426 match self.token_type {
427 Some(TokenType::BinOp(..)) => false,
428 _ => true,
429 }
430 }
431
432 pub fn can_have_trailing_comment(&self) -> bool {
433 match self.token_type {
434 Some(TokenType::Keyword(..)) => false,
435 Some(TokenType::Semi) | Some(TokenType::LBrace) => true,
436 Some(TokenType::Other {
437 can_have_trailing_comment,
438 ..
439 }) => can_have_trailing_comment,
440 _ => false,
441 }
442 }
443
444 pub fn last_was_tpl_element(&self) -> bool {
445 matches!(self.token_type, Some(TokenType::Template))
446 }
447
448 fn update(&mut self, start: BytePos, next: TokenKind) {
449 if cfg!(feature = "debug") {
450 trace!(
451 "updating state: next={:?}, had_line_break={} ",
452 next,
453 self.had_line_break
454 );
455 }
456
457 let prev = self.token_type.take();
458 self.token_type = Some(TokenType::from(next));
459
460 self.is_expr_allowed = self.is_expr_allowed_on_next(prev, start, next);
461 }
462
463 fn is_expr_allowed_on_next(
466 &mut self,
467 prev: Option<TokenType>,
468 start: BytePos,
469 next: TokenKind,
470 ) -> bool {
471 let State {
472 ref mut context,
473 had_line_break,
474 had_line_break_before_last,
475 is_expr_allowed,
476 syntax,
477 ..
478 } = *self;
479
480 let is_next_keyword = matches!(next, TokenKind::Word(WordKind::Keyword(..)));
481
482 if is_next_keyword && prev == Some(TokenType::Dot) {
483 false
484 } else {
485 match next {
487 TokenKind::RParen | TokenKind::RBrace => {
488 if context.len() == 1 {
490 return true;
491 }
492
493 let out = context.pop().unwrap();
494
495 if out == TokenContext::BraceStmt
497 && matches!(
498 context.current(),
499 Some(TokenContext::FnExpr | TokenContext::ClassExpr)
500 )
501 {
502 context.pop();
503 return false;
504 }
505
506 if out == TokenContext::TplQuasi {
508 match context.current() {
509 Some(TokenContext::Tpl) => return false,
510 _ => return true,
511 }
512 }
513
514 !out.is_expr()
516 }
517
518 TokenKind::Word(WordKind::Keyword(Keyword::Function)) => {
519 if is_expr_allowed
522 && !context.is_brace_block(prev, had_line_break, is_expr_allowed)
523 {
524 context.push(TokenContext::FnExpr);
525 }
526 false
527 }
528
529 TokenKind::Word(WordKind::Keyword(Keyword::Class)) => {
530 if is_expr_allowed
531 && !context.is_brace_block(prev, had_line_break, is_expr_allowed)
532 {
533 context.push(TokenContext::ClassExpr);
534 }
535 false
536 }
537
538 TokenKind::Colon
539 if matches!(
540 context.current(),
541 Some(TokenContext::FnExpr | TokenContext::ClassExpr)
542 ) =>
543 {
544 context.pop(); true
551 }
552
553 known_ident_token!("of")
555 if Some(TokenContext::ParenStmt { is_for_loop: true }) == context.current() =>
556 {
557 !prev
559 .expect("context.current() if ParenStmt, so prev token cannot be None")
560 .before_expr()
561 }
562
563 TokenKind::Word(WordKind::Ident(..)) => {
564 match prev {
566 Some(prev) => match prev {
567 TokenType::Keyword(Keyword::Let)
569 | TokenType::Keyword(Keyword::Const)
570 | TokenType::Keyword(Keyword::Var)
571 if had_line_break_before_last =>
572 {
573 true
574 }
575 _ => false,
576 },
577 _ => false,
578 }
579 }
580
581 TokenKind::LBrace => {
582 let cur = context.current();
583 if syntax.jsx() && cur == Some(TokenContext::JSXOpeningTag) {
584 context.push(TokenContext::BraceExpr)
585 } else if syntax.jsx() && cur == Some(TokenContext::JSXExpr) {
586 context.push(TokenContext::TplQuasi);
587 } else {
588 let next_ctxt =
589 if context.is_brace_block(prev, had_line_break, is_expr_allowed) {
590 TokenContext::BraceStmt
591 } else {
592 TokenContext::BraceExpr
593 };
594 context.push(next_ctxt);
595 }
596 true
597 }
598
599 TokenKind::BinOp(BinOpToken::Div)
600 if syntax.jsx() && prev == Some(TokenType::JSXTagStart) =>
601 {
602 context.pop();
603 context.pop(); context.push(TokenContext::JSXClosingTag); false
606 }
607
608 TokenKind::DollarLBrace => {
609 context.push(TokenContext::TplQuasi);
610 true
611 }
612
613 TokenKind::LParen => {
614 context.push(match prev {
617 Some(TokenType::Keyword(k)) => match k {
618 Keyword::If | Keyword::With | Keyword::While => {
619 TokenContext::ParenStmt { is_for_loop: false }
620 }
621 Keyword::For => TokenContext::ParenStmt { is_for_loop: true },
622 _ => TokenContext::ParenExpr,
623 },
624 _ => TokenContext::ParenExpr,
625 });
626 true
627 }
628
629 TokenKind::PlusPlus | TokenKind::MinusMinus => is_expr_allowed,
631
632 TokenKind::BackQuote => {
633 if let Some(TokenContext::Tpl) = context.current() {
635 context.pop();
636 } else {
637 self.tpl_start = start;
638 context.push(TokenContext::Tpl);
639 }
640 false
641 }
642
643 TokenKind::JSXTagStart => {
645 context.push(TokenContext::JSXExpr); context.push(TokenContext::JSXOpeningTag); false
648 }
649
650 TokenKind::JSXTagEnd => {
652 let out = context.pop();
653 if (out == Some(TokenContext::JSXOpeningTag)
654 && prev == Some(TokenType::BinOp(BinOpToken::Div)))
655 || out == Some(TokenContext::JSXClosingTag)
656 {
657 context.pop();
658 context.current() == Some(TokenContext::JSXExpr)
659 } else {
660 true
661 }
662 }
663
664 _ => next.before_expr(),
665 }
666 }
667 }
668}
669
670#[derive(Clone, Default)]
671pub struct TokenContexts(pub SmallVec<[TokenContext; 128]>);
672
673impl TokenContexts {
674 fn is_brace_block(
677 &self,
678 prev: Option<TokenType>,
679 had_line_break: bool,
680 is_expr_allowed: bool,
681 ) -> bool {
682 if let Some(TokenType::Colon) = prev {
683 match self.current() {
684 Some(TokenContext::BraceStmt) => return true,
685 Some(TokenContext::BraceExpr) => return false,
688 _ => {}
689 };
690 }
691
692 match prev {
693 Some(TokenType::Keyword(Keyword::Return))
703 | Some(TokenType::Keyword(Keyword::Yield)) => {
704 return had_line_break;
705 }
706
707 Some(TokenType::Keyword(Keyword::Else))
708 | Some(TokenType::Semi)
709 | None
710 | Some(TokenType::RParen) => {
711 return true;
712 }
713
714 Some(TokenType::LBrace) => {
716 if self.current() == Some(TokenContext::BraceExpr) {
719 let len = self.len();
720 if let Some(TokenContext::JSXOpeningTag) = self.0.get(len - 2) {
721 return true;
722 }
723 }
724
725 return self.current() == Some(TokenContext::BraceStmt);
726 }
727
728 Some(TokenType::BinOp(BinOpToken::Lt)) | Some(TokenType::BinOp(BinOpToken::Gt)) => {
730 return true
731 }
732
733 Some(TokenType::Arrow) => return true,
735 _ => {}
736 }
737
738 if had_line_break {
739 if let Some(TokenType::Other {
740 before_expr: false, ..
741 }) = prev
742 {
743 return true;
744 }
745 }
746
747 !is_expr_allowed
748 }
749
750 #[inline]
751 pub fn len(&self) -> usize {
752 self.0.len()
753 }
754
755 #[inline]
756 pub fn is_empty(&self) -> bool {
757 self.0.is_empty()
758 }
759
760 #[inline]
761 pub fn pop(&mut self) -> Option<TokenContext> {
762 let opt = self.0.pop();
763 if cfg!(feature = "debug") {
764 trace!("context.pop({:?}): {:?}", opt, self.0);
765 }
766 opt
767 }
768
769 #[inline]
770 pub fn current(&self) -> Option<TokenContext> {
771 self.0.last().cloned()
772 }
773
774 #[inline]
775 pub fn push(&mut self, t: TokenContext) {
776 self.0.push(t);
777
778 if cfg!(feature = "debug") {
779 trace!("context.push({:?}): {:?}", t, self.0);
780 }
781 }
782}
783
784#[derive(Debug, Clone, Copy, PartialEq, Eq)]
788pub enum TokenContext {
789 BraceStmt,
790 BraceExpr,
791 TplQuasi,
792 ParenStmt {
793 is_for_loop: bool,
795 },
796 ParenExpr,
797 Tpl,
798 FnExpr,
799 ClassExpr,
800 JSXOpeningTag,
801 JSXClosingTag,
802 JSXExpr,
803}
804
805impl TokenContext {
806 pub(crate) const fn is_expr(&self) -> bool {
807 matches!(
808 self,
809 Self::BraceExpr
810 | Self::TplQuasi
811 | Self::ParenExpr
812 | Self::Tpl
813 | Self::FnExpr
814 | Self::ClassExpr
815 | Self::JSXExpr
816 )
817 }
818
819 pub(crate) const fn preserve_space(&self) -> bool {
820 match self {
821 Self::Tpl | Self::JSXExpr => true,
822 _ => false,
823 }
824 }
825}
826
827#[cfg(test)]
828pub(crate) fn with_lexer<F, Ret>(
829 syntax: Syntax,
830 target: EsVersion,
831 s: &str,
832 f: F,
833) -> Result<Ret, ::testing::StdErr>
834where
835 F: FnOnce(&mut Lexer<'_>) -> Result<Ret, ()>,
836{
837 crate::with_test_sess(s, |_, fm| {
838 let mut l = Lexer::new(syntax, target, fm, None);
839 let res = f(&mut l);
840
841 #[cfg(debug_assertions)]
842 let c = TokenContexts(smallvec![TokenContext::BraceStmt]);
843 #[cfg(debug_assertions)]
844 debug_assert_eq!(l.state.context.0, c.0);
845
846 res
847 })
848}
849
850#[cfg(test)]
851pub(crate) fn lex(syntax: Syntax, s: &'static str) -> Vec<TokenAndSpan> {
852 with_lexer(syntax, Default::default(), s, |l| Ok(l.collect())).unwrap()
853}
854
855#[cfg(test)]
857pub(crate) fn lex_module_errors(syntax: Syntax, s: &'static str) -> Vec<Error> {
858 with_lexer(syntax, Default::default(), s, |l| {
859 l.ctx.insert(Context::Module);
860 l.ctx.insert(Context::Strict);
861
862 let _: Vec<_> = l.collect();
863
864 Ok(l.take_errors())
865 })
866 .unwrap()
867}
868
869#[cfg(test)]
870pub(crate) fn lex_tokens(syntax: Syntax, s: &'static str) -> Vec<Token> {
871 with_lexer(syntax, Default::default(), s, |l| {
872 Ok(l.map(|ts| ts.token).collect())
873 })
874 .unwrap()
875}
876
877#[cfg(test)]
880pub(crate) fn lex_errors(syntax: Syntax, s: &'static str) -> (Vec<Token>, Vec<Error>) {
881 with_lexer(syntax, EsVersion::Es2020, s, |l| {
882 let tokens = l.map(|ts| ts.token).collect();
883 let errors = l.take_errors();
884 Ok((tokens, errors))
885 })
886 .unwrap()
887}