1use std::{borrow::Cow, char, iter::FusedIterator, rc::Rc};
4
5use either::Either::{self, Left, Right};
6use smartstring::{LazyCompact, SmartString};
7use swc_atoms::{
8 wtf8::{CodePoint, Wtf8, Wtf8Buf},
9 Atom, AtomStoreCell,
10};
11use swc_common::{
12 comments::{Comment, CommentKind, Comments},
13 input::{Input, StringInput},
14 BytePos, Span,
15};
16use swc_ecma_ast::{EsVersion, Ident};
17
18use self::table::{ByteHandler, BYTE_HANDLERS};
19use crate::{
20 byte_search,
21 error::{Error, SyntaxError},
22 input::Tokens,
23 lexer::{
24 char_ext::CharExt,
25 comments_buffer::{BufferedComment, BufferedCommentKind, CommentsBuffer},
26 jsx::xhtml,
27 number::{parse_integer, LazyInteger},
28 search::SafeByteMatchTable,
29 state::State,
30 },
31 safe_byte_match_table,
32 syntax::SyntaxFlags,
33 BigIntValue, Context, Syntax,
34};
35
36#[cfg(feature = "unstable")]
37pub(crate) mod capturing;
38mod char_ext;
39mod comments_buffer;
40mod jsx;
41mod number;
42pub(crate) mod search;
43mod state;
44mod table;
45pub(crate) mod token;
46mod whitespace;
47
48pub(crate) use state::TokenFlags;
49pub(crate) use token::{NextTokenAndSpan, Token, TokenAndSpan, TokenValue};
50
51const LS_OR_PS_FIRST: u8 = 0xe2;
54const LS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa8];
55const PS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa9];
56
57static LINE_BREAK_TABLE: SafeByteMatchTable =
58 safe_byte_match_table!(|b| matches!(b, b'\n' | b'\r' | LS_OR_PS_FIRST));
59
60static BLOCK_COMMENT_SCAN_TABLE: SafeByteMatchTable =
61 safe_byte_match_table!(|b| { matches!(b, b'*' | b'\n' | b'\r' | LS_OR_PS_FIRST) });
62
63static DOUBLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
64 safe_byte_match_table!(|b| matches!(b, b'"' | b'\n' | b'\\' | b'\r'));
65static SINGLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
66 safe_byte_match_table!(|b| matches!(b, b'\'' | b'\n' | b'\\' | b'\r'));
67
68static NOT_ASCII_ID_CONTINUE_TABLE: SafeByteMatchTable =
69 safe_byte_match_table!(|b| !(b.is_ascii_alphanumeric() || b == b'_' || b == b'$'));
70
71#[inline]
74const fn pair_to_code_point(high: u32, low: u32) -> u32 {
75 (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000
76}
77
78#[derive(Debug)]
82pub enum UnicodeEscape {
83 CodePoint(char),
86 SurrogatePair(char),
89 LoneSurrogate(u32),
92}
93
94impl From<UnicodeEscape> for CodePoint {
95 fn from(value: UnicodeEscape) -> Self {
96 match value {
97 UnicodeEscape::CodePoint(c) | UnicodeEscape::SurrogatePair(c) => {
98 CodePoint::from_char(c)
99 }
100 UnicodeEscape::LoneSurrogate(u) => unsafe { CodePoint::from_u32_unchecked(u) },
101 }
102 }
103}
104
105pub type LexResult<T> = Result<T, crate::error::Error>;
106
107fn remove_underscore(s: &str, has_underscore: bool) -> Cow<'_, str> {
108 if has_underscore {
109 debug_assert!(s.contains('_'));
110 s.chars().filter(|&c| c != '_').collect::<String>().into()
111 } else {
112 debug_assert!(!s.contains('_'));
113 Cow::Borrowed(s)
114 }
115}
116
117#[derive(Clone)]
118pub struct Lexer<'a> {
119 comments: Option<&'a dyn Comments>,
120 comments_buffer: Option<CommentsBuffer>,
122
123 pub ctx: Context,
124 input: StringInput<'a>,
125 start_pos: BytePos,
126
127 state: State,
128 token_flags: TokenFlags,
129 pub(crate) syntax: SyntaxFlags,
130 pub(crate) target: EsVersion,
131
132 errors: Vec<Error>,
133 module_errors: Vec<Error>,
134
135 atoms: Rc<AtomStoreCell>,
136}
137
138impl FusedIterator for Lexer<'_> {}
139
140impl<'a> Lexer<'a> {
141 #[inline(always)]
142 fn input(&self) -> &StringInput<'a> {
143 &self.input
144 }
145
146 #[inline(always)]
147 fn input_mut(&mut self) -> &mut StringInput<'a> {
148 &mut self.input
149 }
150
151 #[inline(always)]
152 fn push_error(&mut self, error: Error) {
153 self.errors.push(error);
154 }
155
156 #[inline(always)]
157 fn state(&self) -> &State {
158 &self.state
159 }
160
161 #[inline(always)]
162 fn state_mut(&mut self) -> &mut State {
163 &mut self.state
164 }
165
166 #[inline(always)]
167 fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
168 self.comments
169 }
170
171 #[inline(always)]
172 fn comments_buffer(&self) -> Option<&CommentsBuffer> {
173 self.comments_buffer.as_ref()
174 }
175
176 #[inline(always)]
177 fn comments_buffer_mut(&mut self) -> Option<&mut CommentsBuffer> {
178 self.comments_buffer.as_mut()
179 }
180
181 #[inline(always)]
182 unsafe fn input_slice_to_cur(&mut self, start: BytePos) -> &'a str {
183 self.input.slice_to_cur(start)
184 }
185
186 #[inline(always)]
187 unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
188 self.input.slice(start, end)
189 }
190
191 #[inline(always)]
192 fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
193 self.input_mut().uncons_while(f)
194 }
195
196 #[inline(always)]
197 fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
198 self.atoms.atom(s)
199 }
200
201 #[inline(always)]
202 fn wtf8_atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, Wtf8>>) -> swc_atoms::Wtf8Atom {
203 self.atoms.wtf8_atom(s)
204 }
205}
206
207impl<'a> Lexer<'a> {
208 pub fn new(
209 syntax: Syntax,
210 target: EsVersion,
211 input: StringInput<'a>,
212 comments: Option<&'a dyn Comments>,
213 ) -> Self {
214 let start_pos = input.last_pos();
215
216 Lexer {
217 comments,
218 comments_buffer: comments.is_some().then(CommentsBuffer::new),
219 ctx: Default::default(),
220 input,
221 start_pos,
222 state: State::new(start_pos),
223 syntax: syntax.into_flags(),
224 target,
225 errors: Default::default(),
226 module_errors: Default::default(),
227 atoms: Default::default(),
228 token_flags: TokenFlags::empty(),
229 }
230 }
231
232 fn read_token(&mut self) -> LexResult<Token> {
234 self.token_flags = TokenFlags::empty();
235 let byte = match self.input.as_str().as_bytes().first() {
236 Some(&v) => v,
237 None => return Ok(Token::Eof),
238 };
239
240 let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
241 handler(self)
242 }
243
244 fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Token> {
245 let start = self.cur_pos();
246
247 unsafe {
248 self.input.bump();
250 }
251
252 Ok(if self.input.cur() == Some(C as char) {
254 unsafe {
255 self.input.bump();
257 }
258
259 if self.state.had_line_break && C == b'-' && self.eat(b'>') {
261 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
262 self.skip_line_comment(0);
263 self.skip_space::<true>();
264 return self.read_token();
265 }
266
267 if C == b'+' {
268 Token::PlusPlus
269 } else {
270 Token::MinusMinus
271 }
272 } else if self.input.eat_byte(b'=') {
273 if C == b'+' {
274 Token::PlusEq
275 } else {
276 Token::MinusEq
277 }
278 } else if C == b'+' {
279 Token::Plus
280 } else {
281 Token::Minus
282 })
283 }
284
285 fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Token> {
286 let start = self.cur_pos();
287 let had_line_break_before_last = self.had_line_break_before_last();
288
289 unsafe {
290 self.input.bump();
292 }
293
294 Ok(if self.input.eat_byte(b'=') {
295 if self.input.eat_byte(b'=') {
298 if C == b'!' {
299 Token::NotEqEq
300 } else {
301 if had_line_break_before_last && self.is_str("====") {
304 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
305 self.skip_line_comment(4);
306 self.skip_space::<true>();
307 return self.read_token();
308 }
309
310 Token::EqEqEq
311 }
312 } else if C == b'!' {
313 Token::NotEq
314 } else {
315 Token::EqEq
316 }
317 } else if C == b'=' && self.input.eat_byte(b'>') {
318 Token::Arrow
321 } else if C == b'!' {
322 Token::Bang
323 } else {
324 Token::Eq
325 })
326 }
327}
328
329impl Lexer<'_> {
330 fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Token> {
331 let had_line_break_before_last = self.had_line_break_before_last();
332 let start = self.cur_pos();
333 self.bump();
334
335 if self.syntax.typescript()
336 && self.ctx.contains(Context::InType)
337 && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
338 {
339 if C == b'<' {
340 return Ok(Token::Lt);
341 } else if C == b'>' {
342 return Ok(Token::Gt);
343 }
344 }
345
346 if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
348 {
349 self.skip_line_comment(3);
350 self.skip_space::<true>();
351 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
352
353 return self.read_token();
354 }
355
356 let mut op = if C == b'<' { Token::Lt } else { Token::Gt };
357
358 if self.cur() == Some(C as char) {
360 self.bump();
361 op = if C == b'<' {
362 Token::LShift
363 } else {
364 Token::RShift
365 };
366
367 if C == b'>' && self.cur() == Some(C as char) {
369 self.bump();
370 op = Token::ZeroFillRShift;
371 }
372 }
373
374 let token = if self.eat(b'=') {
375 match op {
376 Token::Lt => Token::LtEq,
377 Token::Gt => Token::GtEq,
378 Token::LShift => Token::LShiftEq,
379 Token::RShift => Token::RShiftEq,
380 Token::ZeroFillRShift => Token::ZeroFillRShiftEq,
381 _ => unreachable!(),
382 }
383 } else {
384 op
385 };
386
387 if had_line_break_before_last
394 && match op {
395 Token::LShift if self.is_str("<<<<< ") => true,
396 Token::ZeroFillRShift if self.is_str(">>>> ") => true,
397 _ => false,
398 }
399 {
400 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
401 self.skip_line_comment(5);
402 self.skip_space::<true>();
403 return self.read_token();
404 }
405
406 Ok(token)
407 }
408
409 fn read_token_back_quote(&mut self) -> LexResult<Token> {
410 let start = self.cur_pos();
411 self.scan_template_token(start, true)
412 }
413
414 fn scan_template_token(
415 &mut self,
416 start: BytePos,
417 started_with_backtick: bool,
418 ) -> LexResult<Token> {
419 debug_assert!(self.cur() == Some(if started_with_backtick { '`' } else { '}' }));
420 let mut cooked = Ok(Wtf8Buf::with_capacity(8));
421 self.bump(); let mut cooked_slice_start = self.cur_pos();
423 let raw_slice_start = cooked_slice_start;
424 let raw_atom = |this: &mut Self| {
425 let last_pos = this.cur_pos();
426 let s = unsafe { this.input.slice(raw_slice_start, last_pos) };
427 this.atoms.atom(s)
428 };
429 macro_rules! consume_cooked {
430 () => {{
431 if let Ok(cooked) = &mut cooked {
432 let last_pos = self.cur_pos();
433 cooked.push_str(unsafe {
434 self.input.slice(cooked_slice_start, last_pos)
437 });
438 }
439 }};
440 }
441
442 while let Some(c) = self.cur() {
443 if c == '`' {
444 consume_cooked!();
445 let cooked = cooked.map(|cooked| self.atoms.wtf8_atom(&*cooked));
446 let raw = raw_atom(self);
447 self.bump();
448 return Ok(if started_with_backtick {
449 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
450 Token::NoSubstitutionTemplateLiteral
451 } else {
452 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
453 Token::TemplateTail
454 });
455 } else if c == '$' && self.input.peek() == Some('{') {
456 consume_cooked!();
457 let cooked = cooked.map(|cooked| self.atoms.wtf8_atom(&*cooked));
458 let raw = raw_atom(self);
459 self.input.bump_bytes(2);
460 return Ok(if started_with_backtick {
461 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
462 Token::TemplateHead
463 } else {
464 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
465 Token::TemplateMiddle
466 });
467 } else if c == '\\' {
468 consume_cooked!();
469
470 match self.read_escaped_char(true) {
471 Ok(Some(escaped)) => {
472 if let Ok(ref mut cooked) = cooked {
473 cooked.push(escaped);
474 }
475 }
476 Ok(None) => {}
477 Err(error) => {
478 cooked = Err(error);
479 }
480 }
481
482 cooked_slice_start = self.cur_pos();
483 } else if c.is_line_terminator() {
484 consume_cooked!();
485
486 let c = if c == '\r' && self.peek() == Some('\n') {
487 self.bump(); '\n'
489 } else {
490 match c {
491 '\n' => '\n',
492 '\r' => '\n',
493 '\u{2028}' => '\u{2028}',
494 '\u{2029}' => '\u{2029}',
495 _ => unreachable!(),
496 }
497 };
498
499 self.bump();
500
501 if let Ok(ref mut cooked) = cooked {
502 cooked.push_char(c);
503 }
504 cooked_slice_start = self.cur_pos();
505 } else {
506 self.bump();
507 }
508 }
509
510 self.error(start, SyntaxError::UnterminatedTpl)?
511 }
512}
513
514impl<'a> Lexer<'a> {
515 #[inline(always)]
516 #[allow(clippy::misnamed_getters)]
517 fn had_line_break_before_last(&self) -> bool {
518 self.state().had_line_break()
519 }
520
521 #[inline(always)]
522 fn span(&self, start: BytePos) -> Span {
523 let end = self.last_pos();
524 if cfg!(debug_assertions) && start > end {
525 unreachable!(
526 "assertion failed: (span.start <= span.end).
527 start = {}, end = {}",
528 start.0, end.0
529 )
530 }
531 Span { lo: start, hi: end }
532 }
533
534 #[inline(always)]
535 fn bump(&mut self) {
536 unsafe {
537 self.input_mut().bump()
539 }
540 }
541
542 #[inline(always)]
543 fn is(&self, c: u8) -> bool {
544 self.input().is_byte(c)
545 }
546
547 #[inline(always)]
548 fn is_str(&self, s: &str) -> bool {
549 self.input().is_str(s)
550 }
551
552 #[inline(always)]
553 fn eat(&mut self, c: u8) -> bool {
554 self.input_mut().eat_byte(c)
555 }
556
557 #[inline(always)]
558 fn cur(&self) -> Option<char> {
559 self.input().cur()
560 }
561
562 #[inline(always)]
563 fn peek(&self) -> Option<char> {
564 self.input().peek()
565 }
566
567 #[inline(always)]
568 fn peek_ahead(&self) -> Option<char> {
569 self.input().peek_ahead()
570 }
571
572 #[inline(always)]
573 fn cur_pos(&self) -> BytePos {
574 self.input().cur_pos()
575 }
576
577 #[inline(always)]
578 fn last_pos(&self) -> BytePos {
579 self.input().last_pos()
580 }
581
582 #[cold]
584 #[inline(never)]
585 fn error<T>(&self, start: BytePos, kind: SyntaxError) -> LexResult<T> {
586 let span = self.span(start);
587 self.error_span(span, kind)
588 }
589
590 #[cold]
591 #[inline(never)]
592 fn error_span<T>(&self, span: Span, kind: SyntaxError) -> LexResult<T> {
593 Err(crate::error::Error::new(span, kind))
594 }
595
596 #[cold]
597 #[inline(never)]
598 fn emit_error(&mut self, start: BytePos, kind: SyntaxError) {
599 let span = self.span(start);
600 self.emit_error_span(span, kind)
601 }
602
603 #[cold]
604 #[inline(never)]
605 fn emit_error_span(&mut self, span: Span, kind: SyntaxError) {
606 if self.ctx().contains(Context::IgnoreError) {
607 return;
608 }
609 tracing::warn!("Lexer error at {:?}", span);
610 let err = crate::error::Error::new(span, kind);
611 self.push_error(err);
612 }
613
614 #[cold]
615 #[inline(never)]
616 fn emit_strict_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
617 let span = self.span(start);
618 if self.ctx().contains(Context::Strict) {
619 self.emit_error_span(span, kind);
620 } else {
621 let err = crate::error::Error::new(span, kind);
622 self.add_module_mode_error(err);
623 }
624 }
625
626 #[cold]
627 #[inline(never)]
628 fn emit_module_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
629 let span = self.span(start);
630 let err = crate::error::Error::new(span, kind);
631 self.add_module_mode_error(err);
632 }
633
634 #[inline(never)]
635 fn skip_line_comment(&mut self, start_skip: usize) {
636 let start = self.cur_pos();
638 self.input_mut().bump_bytes(start_skip);
639 let slice_start = self.cur_pos();
640
641 let is_for_next =
649 self.state().had_line_break() || !self.state().can_have_trailing_line_comment();
650
651 byte_search! {
653 lexer: self,
654 table: LINE_BREAK_TABLE,
655 continue_if: (matched_byte, pos_offset) {
656 if matched_byte != LS_OR_PS_FIRST {
657 false
659 } else {
660 let current_slice = self.input().as_str();
663 let byte_pos = pos_offset;
664 if byte_pos + 2 < current_slice.len() {
665 let bytes = current_slice.as_bytes();
666 let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
667 if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
668 false
670 } else {
671 true
673 }
674 } else {
675 true
677 }
678 }
679 },
680 handle_eof: {
681 let end = self.input().end_pos();
683
684 if self.comments_buffer().is_some() {
685 let s = unsafe { self.input_slice(slice_start, end) };
686 let cmt = swc_common::comments::Comment {
687 kind: swc_common::comments::CommentKind::Line,
688 span: Span::new_with_checked(start, end),
689 text: self.atom(s),
690 };
691
692 if is_for_next {
693 self.comments_buffer_mut().unwrap().push_pending(cmt);
694 } else {
695 let pos = self.state().prev_hi();
696 self.comments_buffer_mut().unwrap().push_comment(BufferedComment {
697 kind: BufferedCommentKind::Trailing,
698 pos,
699 comment: cmt,
700 });
701 }
702 }
703
704 return;
705 }
706 };
707
708 let end = self.cur_pos();
710
711 if self.comments_buffer().is_some() {
713 let s = unsafe {
714 self.input_slice_to_cur(slice_start)
716 };
717 let cmt = swc_common::comments::Comment {
718 kind: swc_common::comments::CommentKind::Line,
719 span: Span::new_with_checked(start, end),
720 text: self.atom(s),
721 };
722
723 if is_for_next {
724 self.comments_buffer_mut().unwrap().push_pending(cmt);
725 } else {
726 let pos = self.state().prev_hi();
727 self.comments_buffer_mut()
728 .unwrap()
729 .push_comment(BufferedComment {
730 kind: BufferedCommentKind::Trailing,
731 pos,
732 comment: cmt,
733 });
734 }
735 }
736
737 unsafe {
738 self.input_mut().reset_to(end);
740 }
741 }
742
743 fn skip_block_comment(&mut self) {
745 let start = self.cur_pos();
746
747 debug_assert_eq!(self.cur(), Some('/'));
748 debug_assert_eq!(self.peek(), Some('*'));
749
750 self.input_mut().bump_bytes(2);
752
753 let slice_start = self.cur_pos();
755
756 let had_line_break_before_last = self.had_line_break_before_last();
757 let mut should_mark_had_line_break = false;
758
759 loop {
760 let matched_byte = byte_search! {
761 lexer: self,
762 table: BLOCK_COMMENT_SCAN_TABLE,
763 continue_if: (matched_byte, pos_offset) {
764 if matched_byte == LS_OR_PS_FIRST {
765 let current_slice = self.input().as_str();
767 let byte_pos = pos_offset;
768 if byte_pos + 2 < current_slice.len() {
769 let bytes = current_slice.as_bytes();
770 let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
771 if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
772 false
774 } else {
775 true
777 }
778 } else {
779 true
781 }
782 } else {
783 false
785 }
786 },
787 handle_eof: {
788 if should_mark_had_line_break {
789 self.state_mut().mark_had_line_break();
790 }
791 let end_pos = self.input().end_pos();
792 let span = Span::new_with_checked(end_pos, end_pos);
793 self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
794 return;
795 }
796 };
797
798 match matched_byte {
799 b'*' => {
800 if self.peek() == Some('/') {
801 self.input_mut().bump_bytes(2);
803
804 if should_mark_had_line_break {
805 self.state_mut().mark_had_line_break();
806 }
807
808 let end = self.cur_pos();
809
810 let mut is_for_next =
812 had_line_break_before_last || !self.state().can_have_trailing_comment();
813
814 if !had_line_break_before_last && self.input().is_byte(b';') {
816 is_for_next = false;
817 }
818
819 if self.comments_buffer().is_some() {
820 let src = unsafe {
821 self.input_mut().slice(slice_start, end)
824 };
825 let s = &src[..src.len() - 2];
826 let cmt = Comment {
827 kind: CommentKind::Block,
828 span: Span::new_with_checked(start, end),
829 text: self.atom(s),
830 };
831
832 if is_for_next {
833 self.comments_buffer_mut().unwrap().push_pending(cmt);
834 } else {
835 let pos = self.state().prev_hi();
836 self.comments_buffer_mut()
837 .unwrap()
838 .push_comment(BufferedComment {
839 kind: BufferedCommentKind::Trailing,
840 pos,
841 comment: cmt,
842 });
843 }
844 }
845
846 return;
847 } else {
848 self.bump();
850 }
851 }
852 b'\n' => {
853 should_mark_had_line_break = true;
854 self.bump();
855 }
856 b'\r' => {
857 should_mark_had_line_break = true;
858 self.bump();
859 if self.peek() == Some('\n') {
860 self.bump();
861 }
862 }
863 _ => {
864 if let Some('\u{2028}' | '\u{2029}') = self.cur() {
866 should_mark_had_line_break = true;
867 }
868 self.bump();
869 }
870 }
871 }
872 }
873
874 #[inline(never)]
878 fn skip_space<const LEX_COMMENTS: bool>(&mut self) {
879 loop {
880 let (offset, newline) = {
881 let mut skip = self::whitespace::SkipWhitespace {
882 input: self.input().as_str(),
883 newline: false,
884 offset: 0,
885 };
886
887 skip.scan();
888
889 (skip.offset, skip.newline)
890 };
891
892 self.input_mut().bump_bytes(offset as usize);
893 if newline {
894 self.state_mut().mark_had_line_break();
895 }
896
897 if LEX_COMMENTS && self.input().is_byte(b'/') {
898 if let Some(c) = self.peek() {
899 if c == '/' {
900 self.skip_line_comment(2);
901 continue;
902 } else if c == '*' {
903 self.skip_block_comment();
904 continue;
905 }
906 }
907 }
908
909 break;
910 }
911 }
912
913 fn ensure_not_ident(&mut self) -> LexResult<()> {
915 match self.cur() {
916 Some(c) if c.is_ident_start() => {
917 let span = pos_span(self.cur_pos());
918 self.error_span(span, SyntaxError::IdentAfterNum)?
919 }
920 _ => Ok(()),
921 }
922 }
923
924 fn make_legacy_octal(&mut self, start: BytePos, val: f64) -> LexResult<f64> {
925 self.ensure_not_ident()?;
926 if self.syntax().typescript() && self.target() >= EsVersion::Es5 {
927 self.emit_error(start, SyntaxError::TS1085);
928 }
929 self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
930 Ok(val)
931 }
932
933 fn read_digits<F, Ret, const RADIX: u8>(
935 &mut self,
936 mut op: F,
937 allow_num_separator: bool,
938 has_underscore: &mut bool,
939 ) -> LexResult<Ret>
940 where
941 F: FnMut(Ret, u8, u32) -> LexResult<(Ret, bool)>,
942 Ret: Copy + Default,
943 {
944 debug_assert!(
945 RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
946 "radix for read_int should be one of 2, 8, 10, 16, but got {RADIX}"
947 );
948
949 if cfg!(feature = "debug") {
950 tracing::trace!("read_digits(radix = {}), cur = {:?}", RADIX, self.cur());
951 }
952
953 let start = self.cur_pos();
954 let mut total: Ret = Default::default();
955 let mut prev = None;
956
957 while let Some(c) = self.cur() {
958 if c == '_' {
959 *has_underscore = true;
960 if allow_num_separator {
961 let is_allowed = |c: Option<char>| {
962 let Some(c) = c else {
963 return false;
964 };
965 c.is_digit(RADIX as _)
966 };
967 let is_forbidden = |c: Option<char>| {
968 let Some(c) = c else {
969 return false;
970 };
971
972 if RADIX == 16 {
973 matches!(c, '.' | 'X' | '_' | 'x')
974 } else {
975 matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
976 }
977 };
978
979 let next = self.input().peek();
980
981 if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) {
982 self.emit_error(
983 start,
984 SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits,
985 );
986 }
987
988 unsafe {
990 self.input_mut().bump();
992 }
993
994 continue;
995 }
996 }
997
998 let val = if let Some(val) = c.to_digit(RADIX as _) {
1000 val
1001 } else {
1002 return Ok(total);
1003 };
1004
1005 self.bump();
1006
1007 let (t, cont) = op(total, RADIX, val)?;
1008
1009 total = t;
1010
1011 if !cont {
1012 return Ok(total);
1013 }
1014
1015 prev = Some(c);
1016 }
1017
1018 Ok(total)
1019 }
1020
1021 fn read_number_no_dot_as_str<const RADIX: u8>(&mut self) -> LexResult<LazyInteger> {
1026 debug_assert!(
1027 RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
1028 "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}"
1029 );
1030 let start = self.cur_pos();
1031
1032 let mut not_octal = false;
1033 let mut read_any = false;
1034 let mut has_underscore = false;
1035
1036 self.read_digits::<_, (), RADIX>(
1037 |_, _, v| {
1038 read_any = true;
1039
1040 if v == 8 || v == 9 {
1041 not_octal = true;
1042 }
1043
1044 Ok(((), true))
1045 },
1046 true,
1047 &mut has_underscore,
1048 )?;
1049
1050 if !read_any {
1051 self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?;
1052 }
1053
1054 Ok(LazyInteger {
1055 start,
1056 end: self.cur_pos(),
1057 not_octal,
1058 has_underscore,
1059 })
1060 }
1061
1062 fn read_number<const START_WITH_DOT: bool, const START_WITH_ZERO: bool>(
1064 &mut self,
1065 ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
1066 debug_assert!(!(START_WITH_DOT && START_WITH_ZERO));
1067 debug_assert!(self.cur().is_some());
1068
1069 let start = self.cur_pos();
1070 let mut has_underscore = false;
1071
1072 let lazy_integer = if START_WITH_DOT {
1073 debug_assert!(
1075 self.cur().is_some_and(|c| c == '.'),
1076 "read_number<START_WITH_DOT = true> expects current char to be '.'"
1077 );
1078 LazyInteger {
1079 start,
1080 end: start,
1081 not_octal: true,
1082 has_underscore: false,
1083 }
1084 } else {
1085 debug_assert!(!START_WITH_DOT);
1086 debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');
1087
1088 let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
1090 let s = unsafe {
1091 self.input_slice_to_cur(lazy_integer.start)
1093 };
1094
1095 if (!START_WITH_ZERO || lazy_integer.end - lazy_integer.start == BytePos(1))
1097 && self.eat(b'n')
1098 {
1099 let raw = unsafe {
1100 self.input_slice_to_cur(start)
1102 };
1103 let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), 10).unwrap();
1104 return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
1105 }
1106
1107 if START_WITH_ZERO {
1108 if s.as_bytes().iter().all(|&c| c == b'0') {
1111 if start.0 != self.last_pos().0 - 1 {
1118 let raw = unsafe {
1119 self.input_slice_to_cur(start)
1121 };
1122 let raw = self.atom(raw);
1123 return self
1124 .make_legacy_octal(start, 0f64)
1125 .map(|value| Either::Left((value, raw)));
1126 }
1127 } else if lazy_integer.not_octal {
1128 self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal);
1130 } else {
1131 let s = remove_underscore(s, lazy_integer.has_underscore);
1133 let val = parse_integer::<8>(&s);
1134 let raw = unsafe {
1135 self.input_slice_to_cur(start)
1137 };
1138 let raw = self.atom(raw);
1139 return self
1140 .make_legacy_octal(start, val)
1141 .map(|value| Either::Left((value, raw)));
1142 }
1143 }
1144
1145 lazy_integer
1146 };
1147
1148 has_underscore |= lazy_integer.has_underscore;
1149 let has_dot = self.cur() == Some('.');
1152 if has_dot {
1156 self.bump();
1157
1158 debug_assert!(!START_WITH_DOT || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
1160
1161 self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
1163 }
1164
1165 let has_e = self.cur().is_some_and(|c| c == 'e' || c == 'E');
1166 if has_e {
1173 self.bump(); let next = match self.cur() {
1176 Some(next) => next,
1177 None => {
1178 let pos = self.cur_pos();
1179 self.error(pos, SyntaxError::NumLitTerminatedWithExp)?
1180 }
1181 };
1182
1183 if next == '+' || next == '-' {
1184 self.bump(); }
1186
1187 let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
1188 has_underscore |= lazy_integer.has_underscore;
1189 }
1190
1191 let val = if has_dot || has_e {
1192 let raw = unsafe {
1193 self.input_slice_to_cur(start)
1195 };
1196
1197 let raw = remove_underscore(raw, has_underscore);
1198 raw.parse().expect("failed to parse float literal")
1199 } else {
1200 let s = unsafe { self.input_slice(lazy_integer.start, lazy_integer.end) };
1201 let s = remove_underscore(s, has_underscore);
1202 parse_integer::<10>(&s)
1203 };
1204
1205 self.ensure_not_ident()?;
1206
1207 let raw_str = unsafe {
1208 self.input_slice_to_cur(start)
1210 };
1211 Ok(Either::Left((val, raw_str.into())))
1212 }
1213
1214 fn read_int_u32<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<u32>> {
1215 let start = self.state().start();
1216
1217 let mut count = 0;
1218 let v = self.read_digits::<_, Option<u32>, RADIX>(
1219 |opt: Option<u32>, radix, val| {
1220 count += 1;
1221
1222 let total = opt
1223 .unwrap_or_default()
1224 .checked_mul(radix as u32)
1225 .and_then(|v| v.checked_add(val))
1226 .ok_or_else(|| {
1227 let span = Span::new_with_checked(start, start);
1228 crate::error::Error::new(span, SyntaxError::InvalidUnicodeEscape)
1229 })?;
1230
1231 Ok((Some(total), count != len))
1232 },
1233 true,
1234 &mut false,
1235 )?;
1236 if len != 0 && count != len {
1237 Ok(None)
1238 } else {
1239 Ok(v)
1240 }
1241 }
1242
1243 fn read_radix_number<const RADIX: u8>(
1245 &mut self,
1246 ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
1247 debug_assert!(
1248 RADIX == 2 || RADIX == 8 || RADIX == 16,
1249 "radix should be one of 2, 8, 16, but got {RADIX}"
1250 );
1251 let start = self.cur_pos();
1252
1253 debug_assert_eq!(self.cur(), Some('0'));
1254 self.bump();
1255
1256 debug_assert!(self
1257 .cur()
1258 .is_some_and(|c| matches!(c, 'b' | 'B' | 'o' | 'O' | 'x' | 'X')));
1259 self.bump();
1260
1261 let lazy_integer = self.read_number_no_dot_as_str::<RADIX>()?;
1262 let has_underscore = lazy_integer.has_underscore;
1263
1264 let s = unsafe {
1265 self.input_slice_to_cur(lazy_integer.start)
1267 };
1268 if self.eat(b'n') {
1269 let raw = unsafe {
1270 self.input_slice_to_cur(start)
1272 };
1273
1274 let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), RADIX as _).unwrap();
1275 return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
1276 }
1277 let s = remove_underscore(s, has_underscore);
1278 let val = parse_integer::<RADIX>(&s);
1279
1280 self.ensure_not_ident()?;
1281
1282 let raw = unsafe {
1283 self.input_slice_to_cur(start)
1285 };
1286
1287 Ok(Either::Left((val, self.atom(raw))))
1288 }
1289
1290 #[cold]
1294 #[inline(never)]
1295 fn consume_pending_comments(&mut self) {
1296 if let Some(comments) = self.comments() {
1297 let last = self.state().prev_hi();
1298 let start_pos = self.start_pos();
1299 let comments_buffer = self.comments_buffer_mut().unwrap();
1300
1301 let kind = if last == start_pos {
1305 BufferedCommentKind::Leading
1306 } else {
1307 BufferedCommentKind::Trailing
1308 };
1309 comments_buffer.pending_to_comment(kind, last);
1311
1312 for comment in comments_buffer.take_comments() {
1314 match comment.kind {
1315 BufferedCommentKind::Leading => {
1316 comments.add_leading(comment.pos, comment.comment);
1317 }
1318 BufferedCommentKind::Trailing => {
1319 comments.add_trailing(comment.pos, comment.comment);
1320 }
1321 }
1322 }
1323 }
1324 }
1325
1326 fn read_jsx_entity(&mut self) -> LexResult<(char, String)> {
1327 debug_assert!(self.syntax().jsx());
1328
1329 fn from_code(s: &str, radix: u32) -> LexResult<char> {
1330 let c = char::from_u32(
1332 u32::from_str_radix(s, radix).expect("failed to parse string as number"),
1333 )
1334 .expect("failed to parse number as char");
1335
1336 Ok(c)
1337 }
1338
1339 fn is_hex(s: &str) -> bool {
1340 s.chars().all(|c| c.is_ascii_hexdigit())
1341 }
1342
1343 fn is_dec(s: &str) -> bool {
1344 s.chars().all(|c| c.is_ascii_digit())
1345 }
1346
1347 let mut s = SmartString::<LazyCompact>::default();
1348
1349 debug_assert!(self.input().cur().is_some_and(|c| c == '&'));
1350 self.bump();
1351
1352 let start_pos = self.input().cur_pos();
1353
1354 for _ in 0..10 {
1355 let c = match self.input().cur() {
1356 Some(c) => c,
1357 None => break,
1358 };
1359 self.bump();
1360
1361 if c == ';' {
1362 if let Some(stripped) = s.strip_prefix('#') {
1363 if stripped.starts_with('x') {
1364 if is_hex(&s[2..]) {
1365 let value = from_code(&s[2..], 16)?;
1366
1367 return Ok((value, format!("&{s};")));
1368 }
1369 } else if is_dec(stripped) {
1370 let value = from_code(stripped, 10)?;
1371
1372 return Ok((value, format!("&{s};")));
1373 }
1374 } else if let Some(entity) = xhtml(&s) {
1375 return Ok((entity, format!("&{s};")));
1376 }
1377
1378 break;
1379 }
1380
1381 s.push(c)
1382 }
1383
1384 unsafe {
1385 self.input_mut().reset_to(start_pos);
1387 }
1388
1389 Ok(('&', "&".to_string()))
1390 }
1391
1392 fn read_jsx_new_line(&mut self, normalize_crlf: bool) -> LexResult<Either<&'static str, char>> {
1393 debug_assert!(self.syntax().jsx());
1394 let ch = self.input().cur().unwrap();
1395 self.bump();
1396
1397 let out = if ch == '\r' && self.input().cur() == Some('\n') {
1398 self.bump(); Either::Left(if normalize_crlf { "\n" } else { "\r\n" })
1400 } else {
1401 Either::Right(ch)
1402 };
1403 Ok(out)
1404 }
1405
1406 fn read_jsx_str(&mut self, quote: char) -> LexResult<Token> {
1407 debug_assert!(self.syntax().jsx());
1408 let start = self.input().cur_pos();
1409 unsafe {
1410 self.input_mut().bump(); }
1413 let mut out = String::new();
1414 let mut chunk_start = self.input().cur_pos();
1415 loop {
1416 let ch = match self.input().cur() {
1417 Some(c) => c,
1418 None => {
1419 self.emit_error(start, SyntaxError::UnterminatedStrLit);
1420 break;
1421 }
1422 };
1423 let cur_pos = self.input().cur_pos();
1424 if ch == '\\' {
1425 let value = unsafe {
1426 self.input_slice_to_cur(chunk_start)
1428 };
1429
1430 out.push_str(value);
1431 out.push('\\');
1432
1433 self.bump();
1434
1435 chunk_start = self.input().cur_pos();
1436
1437 continue;
1438 }
1439
1440 if ch == quote {
1441 break;
1442 }
1443
1444 if ch == '&' {
1445 let value = unsafe {
1446 self.input_slice_to_cur(chunk_start)
1448 };
1449
1450 out.push_str(value);
1451
1452 let jsx_entity = self.read_jsx_entity()?;
1453
1454 out.push(jsx_entity.0);
1455
1456 chunk_start = self.input().cur_pos();
1457 } else if ch.is_line_terminator() {
1458 let value = unsafe {
1459 self.input_slice_to_cur(chunk_start)
1461 };
1462
1463 out.push_str(value);
1464
1465 match self.read_jsx_new_line(false)? {
1466 Either::Left(s) => {
1467 out.push_str(s);
1468 }
1469 Either::Right(c) => {
1470 out.push(c);
1471 }
1472 }
1473
1474 chunk_start = cur_pos + BytePos(ch.len_utf8() as _);
1475 } else {
1476 unsafe {
1477 self.input_mut().bump();
1479 }
1480 }
1481 }
1482 let s = unsafe {
1483 self.input_slice_to_cur(chunk_start)
1485 };
1486 let value = if out.is_empty() {
1487 self.atom(s)
1489 } else {
1490 out.push_str(s);
1491 self.atom(out)
1492 };
1493
1494 if self.input().peek_ahead().is_some() {
1497 self.bump();
1498 }
1499
1500 let raw = unsafe {
1501 self.input_slice_to_cur(start)
1503 };
1504 let raw = self.atom(raw);
1505 Ok(Token::str(value.into(), raw, self))
1506 }
1507
1508 fn read_unicode_code_unit(&mut self) -> LexResult<Option<UnicodeEscape>> {
1518 const MIN_HIGH: u32 = 0xd800;
1519 const MAX_HIGH: u32 = 0xdbff;
1520 const MIN_LOW: u32 = 0xdc00;
1521 const MAX_LOW: u32 = 0xdfff;
1522
1523 let Some(high) = self.read_int_u32::<16>(4)? else {
1524 return Ok(None);
1525 };
1526 if let Some(ch) = char::from_u32(high) {
1527 return Ok(Some(UnicodeEscape::CodePoint(ch)));
1528 }
1529
1530 debug_assert!(high >= MIN_HIGH);
1535 let is_pair = high <= MAX_HIGH
1536 && self.input().cur() == Some('\\')
1537 && self.input().peek() == Some('u');
1538 if !is_pair {
1539 return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
1540 }
1541
1542 let before_second = self.input().cur_pos();
1543
1544 self.input_mut().bump_bytes(2);
1546
1547 let Some(low) = self.read_int_u32::<16>(4)? else {
1548 return Ok(None);
1549 };
1550
1551 if !(MIN_LOW..=MAX_LOW).contains(&low) {
1556 unsafe {
1557 self.input_mut().reset_to(before_second);
1559 }
1560 return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
1561 }
1562
1563 let code_point = pair_to_code_point(high, low);
1564 let ch = unsafe { char::from_u32_unchecked(code_point) };
1567 Ok(Some(UnicodeEscape::SurrogatePair(ch)))
1568 }
1569
1570 fn read_unicode_escape(&mut self) -> LexResult<UnicodeEscape> {
1571 debug_assert_eq!(self.cur(), Some('u'));
1572
1573 let mut is_curly = false;
1574
1575 self.bump(); if self.eat(b'{') {
1578 is_curly = true;
1579 }
1580
1581 let state = self.input().cur_pos();
1582 let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
1583 Ok(Some(val)) => {
1584 if 0x0010_ffff >= val {
1585 char::from_u32(val)
1586 } else {
1587 let start = self.cur_pos();
1588
1589 self.error(
1590 start,
1591 SyntaxError::BadCharacterEscapeSequence {
1592 expected: if is_curly {
1593 "1-6 hex characters in the range 0 to 10FFFF."
1594 } else {
1595 "4 hex characters"
1596 },
1597 },
1598 )?
1599 }
1600 }
1601 _ => {
1602 let start = self.cur_pos();
1603
1604 self.error(
1605 start,
1606 SyntaxError::BadCharacterEscapeSequence {
1607 expected: if is_curly {
1608 "1-6 hex characters"
1609 } else {
1610 "4 hex characters"
1611 },
1612 },
1613 )?
1614 }
1615 };
1616
1617 match c {
1618 Some(c) => {
1619 if is_curly && !self.eat(b'}') {
1620 self.error(state, SyntaxError::InvalidUnicodeEscape)?
1621 }
1622
1623 Ok(UnicodeEscape::CodePoint(c))
1624 }
1625 _ => {
1626 unsafe {
1627 self.input_mut().reset_to(state);
1629 }
1630
1631 let Some(value) = self.read_unicode_code_unit()? else {
1632 self.error(
1633 state,
1634 SyntaxError::BadCharacterEscapeSequence {
1635 expected: if is_curly {
1636 "1-6 hex characters"
1637 } else {
1638 "4 hex characters"
1639 },
1640 },
1641 )?
1642 };
1643
1644 if is_curly && !self.eat(b'}') {
1645 self.error(state, SyntaxError::InvalidUnicodeEscape)?
1646 }
1647
1648 Ok(value)
1649 }
1650 }
1651 }
1652
1653 #[cold]
1654 fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
1655 if self.input().cur() != Some('#') || self.input().peek() != Some('!') {
1656 return Ok(None);
1657 }
1658 self.bump(); self.bump(); let s = self.input_uncons_while(|c| !c.is_line_terminator());
1661 Ok(Some(self.atom(s)))
1662 }
1663
1664 fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<CodePoint>> {
1668 debug_assert_eq!(self.cur(), Some('\\'));
1669
1670 let start = self.cur_pos();
1671
1672 self.bump(); let c = match self.cur() {
1675 Some(c) => c,
1676 None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
1677 };
1678
1679 let c = match c {
1680 '\\' => '\\',
1681 'n' => '\n',
1682 'r' => '\r',
1683 't' => '\t',
1684 'b' => '\u{0008}',
1685 'v' => '\u{000b}',
1686 'f' => '\u{000c}',
1687 '\r' => {
1688 self.bump(); self.eat(b'\n');
1691
1692 return Ok(None);
1693 }
1694 '\n' | '\u{2028}' | '\u{2029}' => {
1695 self.bump();
1696
1697 return Ok(None);
1698 }
1699
1700 'x' => {
1702 self.bump(); match self.read_int_u32::<16>(2)? {
1705 Some(val) => return Ok(CodePoint::from_u32(val)),
1706 None => self.error(
1707 start,
1708 SyntaxError::BadCharacterEscapeSequence {
1709 expected: "2 hex characters",
1710 },
1711 )?,
1712 }
1713 }
1714
1715 'u' => match self.read_unicode_escape() {
1717 Ok(value) => {
1718 return Ok(Some(value.into()));
1719 }
1720 Err(err) => self.error(start, err.into_kind())?,
1721 },
1722
1723 '0'..='7' => {
1725 self.bump();
1726
1727 let first_c = if c == '0' {
1728 match self.cur() {
1729 Some(next) if next.is_digit(8) => c,
1730 _ => return Ok(Some(CodePoint::from_char('\u{0000}'))),
1732 }
1733 } else {
1734 c
1735 };
1736
1737 if in_template {
1739 self.error(start, SyntaxError::LegacyOctal)?
1740 }
1741
1742 self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
1743
1744 let mut value: u8 = first_c.to_digit(8).unwrap() as u8;
1745
1746 macro_rules! one {
1747 ($check:expr) => {{
1748 let cur = self.cur();
1749
1750 match cur.and_then(|c| c.to_digit(8)) {
1751 Some(v) => {
1752 value = if $check {
1753 let new_val = value
1754 .checked_mul(8)
1755 .and_then(|value| value.checked_add(v as u8));
1756 match new_val {
1757 Some(val) => val,
1758 None => return Ok(CodePoint::from_u32(value as u32)),
1759 }
1760 } else {
1761 value * 8 + v as u8
1762 };
1763
1764 self.bump();
1765 }
1766 _ => return Ok(CodePoint::from_u32(value as u32)),
1767 }
1768 }};
1769 }
1770
1771 one!(false);
1772 one!(true);
1773
1774 return Ok(CodePoint::from_u32(value as u32));
1775 }
1776 _ => c,
1777 };
1778
1779 unsafe {
1780 self.input_mut().bump();
1782 }
1783
1784 Ok(CodePoint::from_u32(c as u32))
1785 }
1786
1787 fn read_regexp(&mut self, start: BytePos) -> LexResult<Token> {
1789 unsafe {
1790 self.input_mut().reset_to(start);
1792 }
1793
1794 debug_assert_eq!(self.cur(), Some('/'));
1795
1796 let start = self.cur_pos();
1797
1798 self.bump(); let slice_start = self.cur_pos();
1801
1802 let (mut escaped, mut in_class) = (false, false);
1803
1804 while let Some(c) = self.cur() {
1805 if c.is_line_terminator() {
1808 let span = self.span(start);
1809
1810 return Err(crate::error::Error::new(
1811 span,
1812 SyntaxError::UnterminatedRegExp,
1813 ));
1814 }
1815
1816 if escaped {
1817 escaped = false;
1818 } else {
1819 match c {
1820 '[' => in_class = true,
1821 ']' if in_class => in_class = false,
1822 '/' if !in_class => break,
1824 _ => {}
1825 }
1826
1827 escaped = c == '\\';
1828 }
1829
1830 self.bump();
1831 }
1832
1833 let content = {
1834 let s = unsafe { self.input_slice_to_cur(slice_start) };
1835 self.atom(s)
1836 };
1837
1838 if !self.is(b'/') {
1840 let span = self.span(start);
1841
1842 return Err(crate::error::Error::new(
1843 span,
1844 SyntaxError::UnterminatedRegExp,
1845 ));
1846 }
1847
1848 self.bump(); let flags = {
1857 match self.cur() {
1858 Some(c) if c.is_ident_start() => self
1859 .read_word_as_str_with()
1860 .map(|(s, _)| Some(self.atom(s))),
1861 _ => Ok(None),
1862 }
1863 }?
1864 .unwrap_or_default();
1865
1866 Ok(Token::regexp(content, flags, self))
1867 }
1868
1869 fn read_word_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
1871 debug_assert!(self.cur().is_some());
1872 let slice_start = self.cur_pos();
1873
1874 if let Some(c) = self.input().cur_as_ascii() {
1876 if Ident::is_valid_ascii_start(c) {
1877 self.bump();
1879
1880 let next_byte = byte_search! {
1882 lexer: self,
1883 table: NOT_ASCII_ID_CONTINUE_TABLE,
1884 handle_eof: {
1885 let s = unsafe {
1887 self.input_slice_to_cur(slice_start)
1890 };
1891
1892 return Ok((Cow::Borrowed(s), false));
1893 },
1894 };
1895
1896 if !next_byte.is_ascii() {
1898 return self.read_word_as_str_with_slow_path(slice_start);
1900 } else if next_byte == b'\\' {
1901 return self.read_word_as_str_with_slow_path(slice_start);
1903 } else {
1904 let s = unsafe {
1906 self.input_slice_to_cur(slice_start)
1909 };
1910
1911 return Ok((Cow::Borrowed(s), false));
1912 }
1913 }
1914 }
1915
1916 self.read_word_as_str_with_slow_path(slice_start)
1918 }
1919
1920 #[cold]
1922 fn read_word_as_str_with_slow_path(
1923 &mut self,
1924 mut slice_start: BytePos,
1925 ) -> LexResult<(Cow<'a, str>, bool)> {
1926 let mut first = true;
1927 let mut has_escape = false;
1928
1929 let mut buf = String::with_capacity(16);
1930 loop {
1931 if let Some(c) = self.input().cur_as_ascii() {
1932 if Ident::is_valid_ascii_continue(c) {
1933 self.bump();
1934 continue;
1935 } else if first && Ident::is_valid_ascii_start(c) {
1936 self.bump();
1937 first = false;
1938 continue;
1939 }
1940
1941 if c == b'\\' {
1943 first = false;
1944 has_escape = true;
1945 let start = self.cur_pos();
1946 self.bump();
1947
1948 if !self.is(b'u') {
1949 self.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)?
1950 }
1951
1952 {
1953 let end = self.input().cur_pos();
1954 let s = unsafe {
1955 self.input_slice(slice_start, start)
1958 };
1959 buf.push_str(s);
1960 unsafe {
1961 self.input_mut().reset_to(end);
1963 }
1964 }
1965
1966 let value = self.read_unicode_escape()?;
1967
1968 match value {
1969 UnicodeEscape::CodePoint(ch) => {
1970 let valid = if first {
1971 ch.is_ident_start()
1972 } else {
1973 ch.is_ident_part()
1974 };
1975 if !valid {
1976 self.emit_error(start, SyntaxError::InvalidIdentChar);
1977 }
1978 buf.push(ch);
1979 }
1980 UnicodeEscape::SurrogatePair(ch) => {
1981 buf.push(ch);
1982 self.emit_error(start, SyntaxError::InvalidIdentChar);
1983 }
1984 UnicodeEscape::LoneSurrogate(code_point) => {
1985 buf.push_str(format!("\\u{code_point:04X}").as_str());
1986 self.emit_error(start, SyntaxError::InvalidIdentChar);
1987 }
1988 };
1989
1990 slice_start = self.cur_pos();
1991 continue;
1992 }
1993
1994 break;
1996 } else if let Some(c) = self.input().cur() {
1997 if Ident::is_valid_non_ascii_continue(c) {
1998 self.bump();
1999 continue;
2000 } else if first && Ident::is_valid_non_ascii_start(c) {
2001 self.bump();
2002 first = false;
2003 continue;
2004 }
2005 }
2006
2007 break;
2008 }
2009
2010 let end = self.cur_pos();
2011 let s = unsafe {
2012 self.input_slice(slice_start, end)
2015 };
2016 let value = if !has_escape {
2017 Cow::Borrowed(s)
2019 } else {
2020 buf.push_str(s);
2021 Cow::Owned(buf)
2022 };
2023
2024 Ok((value, has_escape))
2025 }
2026
2027 fn read_token_number_sign(&mut self) -> LexResult<Token> {
2029 debug_assert!(self.cur().is_some_and(|c| c == '#'));
2030
2031 self.bump(); debug_assert!(
2036 !self.input().is_at_start() || self.cur() != Some('!'),
2037 "#! should have already been handled by read_shebang()"
2038 );
2039 Ok(Token::Hash)
2040 }
2041
2042 fn read_token_dot(&mut self) -> LexResult<Token> {
2046 debug_assert!(self.cur().is_some_and(|c| c == '.'));
2047 let next = match self.input().peek() {
2049 Some(next) => next,
2050 None => {
2051 self.bump(); return Ok(Token::Dot);
2053 }
2054 };
2055 if next.is_ascii_digit() {
2056 return self.read_number::<true, false>().map(|v| match v {
2057 Left((value, raw)) => Token::num(value, raw, self),
2058 Right(_) => unreachable!("read_number should not return bigint for leading dot"),
2059 });
2060 }
2061
2062 self.bump(); if next == '.' && self.input().peek() == Some('.') {
2065 self.bump(); self.bump(); return Ok(Token::DotDotDot);
2069 }
2070
2071 Ok(Token::Dot)
2072 }
2073
2074 fn read_token_question_mark(&mut self) -> LexResult<Token> {
2078 debug_assert!(self.cur().is_some_and(|c| c == '?'));
2079 self.bump();
2080 if self.input_mut().eat_byte(b'?') {
2081 if self.input_mut().eat_byte(b'=') {
2082 Ok(Token::NullishEq)
2083 } else {
2084 Ok(Token::NullishCoalescing)
2085 }
2086 } else {
2087 Ok(Token::QuestionMark)
2088 }
2089 }
2090
2091 fn read_token_colon(&mut self) -> LexResult<Token> {
2095 debug_assert!(self.cur().is_some_and(|c| c == ':'));
2096 self.bump(); Ok(Token::Colon)
2098 }
2099
2100 fn read_token_zero(&mut self) -> LexResult<Token> {
2104 debug_assert_eq!(self.cur(), Some('0'));
2105 let next = self.input().peek();
2106
2107 let bigint = match next {
2108 Some('x') | Some('X') => self.read_radix_number::<16>(),
2109 Some('o') | Some('O') => self.read_radix_number::<8>(),
2110 Some('b') | Some('B') => self.read_radix_number::<2>(),
2111 _ => {
2112 return self.read_number::<false, true>().map(|v| match v {
2113 Left((value, raw)) => Token::num(value, raw, self),
2114 Right((value, raw)) => Token::bigint(value, raw, self),
2115 });
2116 }
2117 };
2118
2119 bigint.map(|v| match v {
2120 Left((value, raw)) => Token::num(value, raw, self),
2121 Right((value, raw)) => Token::bigint(value, raw, self),
2122 })
2123 }
2124
2125 fn read_token_logical<const C: u8>(&mut self) -> LexResult<Token> {
2129 debug_assert!(C == b'|' || C == b'&');
2130 let is_bit_and = C == b'&';
2131 let had_line_break_before_last = self.had_line_break_before_last();
2132 let start = self.cur_pos();
2133
2134 unsafe {
2135 self.input_mut().bump();
2137 }
2138 let token = if is_bit_and {
2139 Token::Ampersand
2140 } else {
2141 Token::Pipe
2142 };
2143
2144 if self.input_mut().eat_byte(b'=') {
2146 return Ok(if is_bit_and {
2147 Token::BitAndEq
2148 } else {
2149 debug_assert!(token == Token::Pipe);
2150 Token::BitOrEq
2151 });
2152 }
2153
2154 if self.input().cur() == Some(C as char) {
2156 unsafe {
2157 self.input_mut().bump();
2159 }
2160
2161 if self.input().cur() == Some('=') {
2162 unsafe {
2163 self.input_mut().bump();
2165 }
2166
2167 return Ok(if is_bit_and {
2168 Token::LogicalAndEq
2169 } else {
2170 debug_assert!(token == Token::Pipe);
2171 Token::LogicalOrEq
2172 });
2173 }
2174
2175 if had_line_break_before_last && !is_bit_and && self.is_str("||||| ") {
2178 let span = fixed_len_span(start, 7);
2179 self.emit_error_span(span, SyntaxError::TS1185);
2180 self.skip_line_comment(5);
2181 self.skip_space::<true>();
2182 return self.error_span(span, SyntaxError::TS1185);
2183 }
2184
2185 return Ok(if is_bit_and {
2186 Token::LogicalAnd
2187 } else {
2188 debug_assert!(token == Token::Pipe);
2189 Token::LogicalOr
2190 });
2191 }
2192
2193 Ok(token)
2194 }
2195
2196 fn read_token_mul_mod<const IS_MUL: bool>(&mut self) -> LexResult<Token> {
2200 debug_assert!(self.cur().is_some_and(|c| c == '*' || c == '%'));
2201 self.bump();
2202 let token = if IS_MUL {
2203 if self.input_mut().eat_byte(b'*') {
2204 Token::Exp
2206 } else {
2207 Token::Asterisk
2208 }
2209 } else {
2210 Token::Percent
2211 };
2212
2213 Ok(if self.input_mut().eat_byte(b'=') {
2214 if token == Token::Asterisk {
2215 Token::MulEq
2216 } else if token == Token::Percent {
2217 Token::ModEq
2218 } else {
2219 debug_assert!(token == Token::Exp);
2220 Token::ExpEq
2221 }
2222 } else {
2223 token
2224 })
2225 }
2226
2227 fn read_slash(&mut self) -> LexResult<Token> {
2228 debug_assert_eq!(self.cur(), Some('/'));
2229 self.bump(); Ok(if self.eat(b'=') {
2231 Token::DivEq
2232 } else {
2233 Token::Slash
2234 })
2235 }
2236
2237 fn read_ident_unknown(&mut self) -> LexResult<Token> {
2240 debug_assert!(self.cur().is_some());
2241
2242 let (s, has_escape) = self.read_word_as_str_with()?;
2243 let atom = self.atom(s);
2244 let word = Token::unknown_ident(atom, self);
2245
2246 if has_escape {
2247 self.update_token_flags(|flags| *flags |= TokenFlags::UNICODE);
2248 }
2249
2250 Ok(word)
2251 }
2252
2253 fn read_str_lit(&mut self) -> LexResult<Token> {
2256 debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
2257 let start = self.cur_pos();
2258 let quote = self.cur().unwrap() as u8;
2259
2260 self.bump(); let mut slice_start = self.input().cur_pos();
2263
2264 let mut buf: Option<Wtf8Buf> = None;
2265
2266 loop {
2267 let table = if quote == b'"' {
2268 &DOUBLE_QUOTE_STRING_END_TABLE
2269 } else {
2270 &SINGLE_QUOTE_STRING_END_TABLE
2271 };
2272
2273 let fast_path_result = byte_search! {
2274 lexer: self,
2275 table: table,
2276 handle_eof: {
2277 let value_end = self.cur_pos();
2278 let s = unsafe {
2279 self.input_slice(slice_start, value_end)
2282 };
2283
2284 self.emit_error(start, SyntaxError::UnterminatedStrLit);
2285
2286 let end = self.cur_pos();
2287 let raw = unsafe { self.input_slice(start, end) };
2288 return Ok(Token::str(self.wtf8_atom(Wtf8::from_str(s)), self.atom(raw), self));
2289 },
2290 };
2291 match fast_path_result {
2294 b'"' | b'\'' if fast_path_result == quote => {
2295 let value_end = self.cur_pos();
2296
2297 let value = if let Some(buf) = buf.as_mut() {
2298 debug_assert!(unsafe { self.input_slice(start, value_end).contains('\\') });
2300 let s = unsafe {
2301 self.input_slice(slice_start, value_end)
2304 };
2305 buf.push_str(s);
2306 self.wtf8_atom(&**buf)
2307 } else {
2308 let s = unsafe { self.input_slice(slice_start, value_end) };
2309 self.wtf8_atom(Wtf8::from_str(s))
2310 };
2311
2312 unsafe {
2313 self.input_mut().bump();
2315 }
2316
2317 let end = self.cur_pos();
2318 let raw = unsafe {
2319 self.input_slice(start, end)
2322 };
2323 let raw = self.atom(raw);
2324 return Ok(Token::str(value, raw, self));
2325 }
2326 b'\\' => {
2327 let end = self.cur_pos();
2328 let s = unsafe {
2329 self.input_slice(slice_start, end)
2332 };
2333
2334 if buf.is_none() {
2335 buf = Some(Wtf8Buf::from_str(s));
2336 } else {
2337 buf.as_mut().unwrap().push_str(s);
2338 }
2339
2340 if let Some(escaped) = self.read_escaped_char(false)? {
2341 buf.as_mut().unwrap().push(escaped);
2342 }
2343
2344 slice_start = self.cur_pos();
2345 continue;
2346 }
2347 b'\n' | b'\r' => {
2348 let end = self.cur_pos();
2349 let s = unsafe {
2350 self.input_slice(slice_start, end)
2353 };
2354
2355 self.emit_error(start, SyntaxError::UnterminatedStrLit);
2356
2357 let end = self.cur_pos();
2358
2359 let raw = unsafe {
2360 self.input_slice(start, end)
2363 };
2364 return Ok(Token::str(
2365 self.wtf8_atom(Wtf8::from_str(s)),
2366 self.atom(raw),
2367 self,
2368 ));
2369 }
2370 _ => self.bump(),
2371 }
2372 }
2373 }
2374
2375 fn read_keyword_with(&mut self, convert: &dyn Fn(&str) -> Option<Token>) -> LexResult<Token> {
2376 debug_assert!(self.cur().is_some());
2377
2378 let start = self.cur_pos();
2379 let (s, has_escape) = self.read_keyword_as_str_with()?;
2380 if let Some(word) = convert(s.as_ref()) {
2381 if has_escape && word.is_reserved(self.ctx()) {
2386 self.error(
2387 start,
2388 SyntaxError::EscapeInReservedWord { word: Atom::new(s) },
2389 )
2390 } else {
2391 Ok(word)
2392 }
2393 } else {
2394 let atom = self.atom(s);
2395 Ok(Token::unknown_ident(atom, self))
2396 }
2397 }
2398
2399 fn read_keyword_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
2403 let slice_start = self.cur_pos();
2404
2405 self.bump();
2409
2410 let next_byte = byte_search! {
2412 lexer: self,
2413 table: NOT_ASCII_ID_CONTINUE_TABLE,
2414 handle_eof: {
2415 let s = unsafe {
2417 self.input_slice_to_cur(slice_start)
2420 };
2421
2422 return Ok((Cow::Borrowed(s), false));
2423 },
2424 };
2425
2426 if !next_byte.is_ascii() || next_byte == b'\\' {
2428 self.read_word_as_str_with_slow_path(slice_start)
2431 } else {
2432 let s = unsafe {
2434 self.input_slice_to_cur(slice_start)
2437 };
2438
2439 Ok((Cow::Borrowed(s), false))
2440 }
2441 }
2442}
2443
2444fn pos_span(p: BytePos) -> Span {
2445 Span::new_with_checked(p, p)
2446}
2447
2448fn fixed_len_span(p: BytePos, len: u32) -> Span {
2449 Span::new_with_checked(p, p + BytePos(len))
2450}