1use std::borrow::Cow;
2
3use char::CharExt;
4use either::Either::{self, Left, Right};
5use num_bigint::BigInt as BigIntValue;
6use smartstring::{LazyCompact, SmartString};
7use state::State;
8use swc_atoms::{
9 wtf8::{CodePoint, Wtf8, Wtf8Buf},
10 Atom,
11};
12use swc_common::{
13 comments::{Comment, CommentKind},
14 input::{Input, StringInput},
15 BytePos, Span,
16};
17use swc_ecma_ast::{EsVersion, Ident};
18
19use self::jsx::xhtml;
20use super::{context::Context, input::Tokens};
21use crate::{
22 common::lexer::{
23 comments_buffer::{BufferedComment, BufferedCommentKind, CommentsBufferTrait},
24 number::{parse_integer, LazyInteger},
25 },
26 error::SyntaxError,
27 lexer::TokenFlags,
28};
29
30pub mod char;
31pub mod comments_buffer;
32mod jsx;
33pub mod number;
34mod search;
35pub mod state;
36pub mod token;
37pub mod whitespace;
38
39use token::TokenFactory;
40
41use self::search::SafeByteMatchTable;
43use crate::{byte_search, safe_byte_match_table};
44
45const LS_OR_PS_FIRST: u8 = 0xe2;
48const LS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa8];
49const PS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa9];
50
51static LINE_BREAK_TABLE: SafeByteMatchTable =
52 safe_byte_match_table!(|b| matches!(b, b'\n' | b'\r' | LS_OR_PS_FIRST));
53
54static BLOCK_COMMENT_SCAN_TABLE: SafeByteMatchTable =
55 safe_byte_match_table!(|b| { matches!(b, b'*' | b'\n' | b'\r' | LS_OR_PS_FIRST) });
56
57static DOUBLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
58 safe_byte_match_table!(|b| matches!(b, b'"' | b'\n' | b'\\' | b'\r'));
59static SINGLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
60 safe_byte_match_table!(|b| matches!(b, b'\'' | b'\n' | b'\\' | b'\r'));
61
62static NOT_ASCII_ID_CONTINUE_TABLE: SafeByteMatchTable =
63 safe_byte_match_table!(|b| !(b.is_ascii_alphanumeric() || b == b'_' || b == b'$'));
64
65static TEMPLATE_LITERAL_TABLE: SafeByteMatchTable =
66 safe_byte_match_table!(|b| matches!(b, b'$' | b'`' | b'\\' | b'\r'));
67
68#[inline]
71const fn pair_to_code_point(high: u32, low: u32) -> u32 {
72 (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000
73}
74
75#[derive(Debug)]
79pub enum UnicodeEscape {
80 CodePoint(char),
83 SurrogatePair(char),
86 LoneSurrogate(u32),
89}
90
91impl From<UnicodeEscape> for CodePoint {
92 fn from(value: UnicodeEscape) -> Self {
93 match value {
94 UnicodeEscape::CodePoint(c) | UnicodeEscape::SurrogatePair(c) => {
95 CodePoint::from_char(c)
96 }
97 UnicodeEscape::LoneSurrogate(u) => unsafe { CodePoint::from_u32_unchecked(u) },
98 }
99 }
100}
101
102pub type LexResult<T> = swc_ecma_parser::lexer::LexResult<T>;
103
104fn remove_underscore(s: &str, has_underscore: bool) -> Cow<'_, str> {
105 if has_underscore {
106 debug_assert!(s.contains('_'));
107 s.chars().filter(|&c| c != '_').collect::<String>().into()
108 } else {
109 debug_assert!(!s.contains('_'));
110 Cow::Borrowed(s)
111 }
112}
113
114pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
115 type State: self::state::State;
116 type Token: token::TokenFactory<'a, TokenAndSpan, Self, Lexer = Self>;
117 type CommentsBuffer: CommentsBufferTrait;
118
119 fn input(&self) -> &StringInput<'a>;
120 fn input_mut(&mut self) -> &mut StringInput<'a>;
121 fn state(&self) -> &Self::State;
122 fn state_mut(&mut self) -> &mut Self::State;
123 fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments>;
124 fn comments_buffer(&self) -> Option<&Self::CommentsBuffer>;
125 fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer>;
126 unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str;
130 fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str;
131 fn atom<'b>(&self, s: impl Into<Cow<'b, str>>) -> swc_atoms::Atom;
132 fn wtf8_atom<'b>(&self, s: impl Into<Cow<'b, Wtf8>>) -> swc_atoms::Wtf8Atom;
133 fn push_error(&mut self, error: crate::error::Error);
134
135 #[inline(always)]
136 #[allow(clippy::misnamed_getters)]
137 fn had_line_break_before_last(&self) -> bool {
138 self.state().had_line_break()
139 }
140
141 #[inline(always)]
142 fn span(&self, start: BytePos) -> Span {
143 let end = self.last_pos();
144 if cfg!(debug_assertions) && start > end {
145 unreachable!(
146 "assertion failed: (span.start <= span.end).
147 start = {}, end = {}",
148 start.0, end.0
149 )
150 }
151 Span { lo: start, hi: end }
152 }
153
154 #[inline(always)]
155 fn bump(&mut self) {
156 unsafe {
157 self.input_mut().bump()
159 }
160 }
161
162 #[inline(always)]
163 fn is(&self, c: u8) -> bool {
164 self.input().is_byte(c)
165 }
166
167 #[inline(always)]
168 fn is_str(&self, s: &str) -> bool {
169 self.input().is_str(s)
170 }
171
172 #[inline(always)]
173 fn eat(&mut self, c: u8) -> bool {
174 self.input_mut().eat_byte(c)
175 }
176
177 #[inline(always)]
178 fn cur(&self) -> Option<char> {
179 self.input().cur()
180 }
181
182 #[inline(always)]
183 fn peek(&self) -> Option<char> {
184 self.input().peek()
185 }
186
187 #[inline(always)]
188 fn peek_ahead(&self) -> Option<char> {
189 self.input().peek_ahead()
190 }
191
192 #[inline(always)]
193 fn cur_pos(&self) -> BytePos {
194 self.input().cur_pos()
195 }
196
197 #[inline(always)]
198 fn last_pos(&self) -> BytePos {
199 self.input().last_pos()
200 }
201
202 #[cold]
204 #[inline(never)]
205 fn error<T>(&self, start: BytePos, kind: SyntaxError) -> LexResult<T> {
206 let span = self.span(start);
207 self.error_span(span, kind)
208 }
209
210 #[cold]
211 #[inline(never)]
212 fn error_span<T>(&self, span: Span, kind: SyntaxError) -> LexResult<T> {
213 Err(crate::error::Error::new(span, kind))
214 }
215
216 #[cold]
217 #[inline(never)]
218 fn emit_error(&mut self, start: BytePos, kind: SyntaxError) {
219 let span = self.span(start);
220 self.emit_error_span(span, kind)
221 }
222
223 #[cold]
224 #[inline(never)]
225 fn emit_error_span(&mut self, span: Span, kind: SyntaxError) {
226 if self.ctx().contains(Context::IgnoreError) {
227 return;
228 }
229 tracing::warn!("Lexer error at {:?}", span);
230 let err = crate::error::Error::new(span, kind);
231 self.push_error(err);
232 }
233
234 #[cold]
235 #[inline(never)]
236 fn emit_strict_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
237 let span = self.span(start);
238 if self.ctx().contains(Context::Strict) {
239 self.emit_error_span(span, kind);
240 } else {
241 let err = crate::error::Error::new(span, kind);
242 self.add_module_mode_error(err);
243 }
244 }
245
246 #[cold]
247 #[inline(never)]
248 fn emit_module_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
249 let span = self.span(start);
250 let err = crate::error::Error::new(span, kind);
251 self.add_module_mode_error(err);
252 }
253
254 #[inline(never)]
255 fn skip_line_comment(&mut self, start_skip: usize) {
256 let start = self.cur_pos();
258 self.input_mut().bump_bytes(start_skip);
259 let slice_start = self.cur_pos();
260
261 let is_for_next =
269 self.state().had_line_break() || !self.state().can_have_trailing_line_comment();
270
271 byte_search! {
273 lexer: self,
274 table: LINE_BREAK_TABLE,
275 continue_if: (matched_byte, pos_offset) {
276 if matched_byte != LS_OR_PS_FIRST {
277 false
279 } else {
280 let current_slice = self.input().as_str();
283 let byte_pos = pos_offset;
284 if byte_pos + 2 < current_slice.len() {
285 let bytes = current_slice.as_bytes();
286 let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
287 if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
288 false
290 } else {
291 true
293 }
294 } else {
295 true
297 }
298 }
299 },
300 handle_eof: {
301 let end = self.input().end_pos();
303
304 if self.comments_buffer().is_some() {
305 let s = unsafe { self.input_slice(slice_start, end) };
306 let cmt = swc_common::comments::Comment {
307 kind: swc_common::comments::CommentKind::Line,
308 span: Span::new_with_checked(start, end),
309 text: self.atom(s),
310 };
311
312 if is_for_next {
313 self.comments_buffer_mut().unwrap().push_pending(cmt);
314 } else {
315 let pos = self.state().prev_hi();
316 self.comments_buffer_mut().unwrap().push_comment(BufferedComment {
317 kind: BufferedCommentKind::Trailing,
318 pos,
319 comment: cmt,
320 });
321 }
322 }
323
324 return;
325 }
326 };
327
328 let end = self.cur_pos();
330
331 if self.comments_buffer().is_some() {
333 let s = unsafe {
334 self.input_slice(slice_start, end)
336 };
337 let cmt = swc_common::comments::Comment {
338 kind: swc_common::comments::CommentKind::Line,
339 span: Span::new_with_checked(start, end),
340 text: self.atom(s),
341 };
342
343 if is_for_next {
344 self.comments_buffer_mut().unwrap().push_pending(cmt);
345 } else {
346 let pos = self.state().prev_hi();
347 self.comments_buffer_mut()
348 .unwrap()
349 .push_comment(BufferedComment {
350 kind: BufferedCommentKind::Trailing,
351 pos,
352 comment: cmt,
353 });
354 }
355 }
356
357 unsafe {
358 self.input_mut().reset_to(end);
360 }
361 }
362
363 fn skip_block_comment(&mut self) {
365 let start = self.cur_pos();
366
367 debug_assert_eq!(self.cur(), Some('/'));
368 debug_assert_eq!(self.peek(), Some('*'));
369
370 self.input_mut().bump_bytes(2);
372
373 let slice_start = self.cur_pos();
375
376 let had_line_break_before_last = self.had_line_break_before_last();
377 let mut should_mark_had_line_break = false;
378
379 loop {
380 let matched_byte = byte_search! {
381 lexer: self,
382 table: BLOCK_COMMENT_SCAN_TABLE,
383 continue_if: (matched_byte, pos_offset) {
384 if matched_byte == LS_OR_PS_FIRST {
385 let current_slice = self.input().as_str();
387 let byte_pos = pos_offset;
388 if byte_pos + 2 < current_slice.len() {
389 let bytes = current_slice.as_bytes();
390 let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
391 if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
392 false
394 } else {
395 true
397 }
398 } else {
399 true
401 }
402 } else {
403 false
405 }
406 },
407 handle_eof: {
408 if should_mark_had_line_break {
409 self.state_mut().mark_had_line_break();
410 }
411 let end_pos = self.input().end_pos();
412 let span = Span::new_with_checked(end_pos, end_pos);
413 self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
414 return;
415 }
416 };
417
418 match matched_byte {
419 b'*' => {
420 if self.peek() == Some('/') {
421 self.input_mut().bump_bytes(2);
423
424 if should_mark_had_line_break {
425 self.state_mut().mark_had_line_break();
426 }
427
428 let end = self.cur_pos();
429
430 let mut is_for_next =
432 had_line_break_before_last || !self.state().can_have_trailing_comment();
433
434 if !had_line_break_before_last && self.input().is_byte(b';') {
436 is_for_next = false;
437 }
438
439 if self.comments_buffer().is_some() {
440 let src = unsafe {
441 self.input_mut().slice(slice_start, end)
444 };
445 let s = &src[..src.len() - 2];
446 let cmt = Comment {
447 kind: CommentKind::Block,
448 span: Span::new_with_checked(start, end),
449 text: self.atom(s),
450 };
451
452 if is_for_next {
453 self.comments_buffer_mut().unwrap().push_pending(cmt);
454 } else {
455 let pos = self.state().prev_hi();
456 self.comments_buffer_mut()
457 .unwrap()
458 .push_comment(BufferedComment {
459 kind: BufferedCommentKind::Trailing,
460 pos,
461 comment: cmt,
462 });
463 }
464 }
465
466 return;
467 } else {
468 self.bump();
470 }
471 }
472 b'\n' => {
473 should_mark_had_line_break = true;
474 self.bump();
475 }
476 b'\r' => {
477 should_mark_had_line_break = true;
478 self.bump();
479 if self.peek() == Some('\n') {
480 self.bump();
481 }
482 }
483 _ => {
484 if let Some('\u{2028}' | '\u{2029}') = self.cur() {
486 should_mark_had_line_break = true;
487 }
488 self.bump();
489 }
490 }
491 }
492 }
493
494 #[inline(never)]
498 fn skip_space<const LEX_COMMENTS: bool>(&mut self) {
499 loop {
500 let (offset, newline) = {
501 let mut skip = self::whitespace::SkipWhitespace {
502 input: self.input().as_str(),
503 newline: false,
504 offset: 0,
505 };
506
507 skip.scan();
508
509 (skip.offset, skip.newline)
510 };
511
512 self.input_mut().bump_bytes(offset as usize);
513 if newline {
514 self.state_mut().mark_had_line_break();
515 }
516
517 if LEX_COMMENTS && self.input().is_byte(b'/') {
518 if let Some(c) = self.peek() {
519 if c == '/' {
520 self.skip_line_comment(2);
521 continue;
522 } else if c == '*' {
523 self.skip_block_comment();
524 continue;
525 }
526 }
527 }
528
529 break;
530 }
531 }
532
533 fn ensure_not_ident(&mut self) -> LexResult<()> {
535 match self.cur() {
536 Some(c) if c.is_ident_start() => {
537 let span = pos_span(self.cur_pos());
538 self.error_span(span, SyntaxError::IdentAfterNum)?
539 }
540 _ => Ok(()),
541 }
542 }
543
544 fn make_legacy_octal(&mut self, start: BytePos, val: f64) -> LexResult<f64> {
545 self.ensure_not_ident()?;
546 if self.syntax().typescript() && self.target() >= EsVersion::Es5 {
547 self.emit_error(start, SyntaxError::TS1085);
548 }
549 self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
550 Ok(val)
551 }
552
553 fn read_digits<F, Ret, const RADIX: u8>(
555 &mut self,
556 mut op: F,
557 allow_num_separator: bool,
558 has_underscore: &mut bool,
559 ) -> LexResult<Ret>
560 where
561 F: FnMut(Ret, u8, u32) -> LexResult<(Ret, bool)>,
562 Ret: Copy + Default,
563 {
564 debug_assert!(
565 RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
566 "radix for read_int should be one of 2, 8, 10, 16, but got {RADIX}"
567 );
568
569 if cfg!(feature = "debug") {
570 tracing::trace!("read_digits(radix = {}), cur = {:?}", RADIX, self.cur());
571 }
572
573 let start = self.cur_pos();
574 let mut total: Ret = Default::default();
575 let mut prev = None;
576
577 while let Some(c) = self.cur() {
578 if c == '_' {
579 *has_underscore = true;
580 if allow_num_separator {
581 let is_allowed = |c: Option<char>| {
582 let Some(c) = c else {
583 return false;
584 };
585 c.is_digit(RADIX as _)
586 };
587 let is_forbidden = |c: Option<char>| {
588 let Some(c) = c else {
589 return false;
590 };
591
592 if RADIX == 16 {
593 matches!(c, '.' | 'X' | '_' | 'x')
594 } else {
595 matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
596 }
597 };
598
599 let next = self.input().peek();
600
601 if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) {
602 self.emit_error(
603 start,
604 SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits,
605 );
606 }
607
608 unsafe {
610 self.input_mut().bump();
612 }
613
614 continue;
615 }
616 }
617
618 let val = if let Some(val) = c.to_digit(RADIX as _) {
620 val
621 } else {
622 return Ok(total);
623 };
624
625 self.bump();
626
627 let (t, cont) = op(total, RADIX, val)?;
628
629 total = t;
630
631 if !cont {
632 return Ok(total);
633 }
634
635 prev = Some(c);
636 }
637
638 Ok(total)
639 }
640
641 fn read_number_no_dot_as_str<const RADIX: u8>(&mut self) -> LexResult<LazyInteger> {
646 debug_assert!(
647 RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
648 "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}"
649 );
650 let start = self.cur_pos();
651
652 let mut not_octal = false;
653 let mut read_any = false;
654 let mut has_underscore = false;
655
656 self.read_digits::<_, (), RADIX>(
657 |_, _, v| {
658 read_any = true;
659
660 if v == 8 || v == 9 {
661 not_octal = true;
662 }
663
664 Ok(((), true))
665 },
666 true,
667 &mut has_underscore,
668 )?;
669
670 if !read_any {
671 self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?;
672 }
673
674 Ok(LazyInteger {
675 start,
676 end: self.cur_pos(),
677 not_octal,
678 has_underscore,
679 })
680 }
681
682 fn read_number<const START_WITH_DOT: bool, const START_WITH_ZERO: bool>(
684 &mut self,
685 ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
686 debug_assert!(!(START_WITH_DOT && START_WITH_ZERO));
687 debug_assert!(self.cur().is_some());
688
689 let start = self.cur_pos();
690 let mut has_underscore = false;
691
692 let lazy_integer = if START_WITH_DOT {
693 debug_assert!(
695 self.cur().is_some_and(|c| c == '.'),
696 "read_number<START_WITH_DOT = true> expects current char to be '.'"
697 );
698 LazyInteger {
699 start,
700 end: start,
701 not_octal: true,
702 has_underscore: false,
703 }
704 } else {
705 debug_assert!(!START_WITH_DOT);
706 debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');
707
708 let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
710 let s = unsafe {
711 self.input_slice(lazy_integer.start, lazy_integer.end)
713 };
714
715 if (!START_WITH_ZERO || lazy_integer.end - lazy_integer.start == BytePos(1))
717 && self.eat(b'n')
718 {
719 let end = self.cur_pos();
720 let raw = unsafe {
721 self.input_slice(start, end)
723 };
724 let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), 10).unwrap();
725 return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
726 }
727
728 if START_WITH_ZERO {
729 if s.as_bytes().iter().all(|&c| c == b'0') {
732 if start.0 != self.last_pos().0 - 1 {
739 let end = self.cur_pos();
740 let raw = unsafe {
741 self.input_slice(start, end)
743 };
744 let raw = self.atom(raw);
745 return self
746 .make_legacy_octal(start, 0f64)
747 .map(|value| Either::Left((value, raw)));
748 }
749 } else if lazy_integer.not_octal {
750 self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal);
752 } else {
753 let s = remove_underscore(s, lazy_integer.has_underscore);
755 let val = parse_integer::<8>(&s);
756 let end = self.cur_pos();
757 let raw = unsafe {
758 self.input_slice(start, end)
760 };
761 let raw = self.atom(raw);
762 return self
763 .make_legacy_octal(start, val)
764 .map(|value| Either::Left((value, raw)));
765 }
766 }
767
768 lazy_integer
769 };
770
771 has_underscore |= lazy_integer.has_underscore;
772 let has_dot = self.cur() == Some('.');
775 if has_dot {
779 self.bump();
780
781 debug_assert!(!START_WITH_DOT || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
783
784 self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
786 }
787
788 let has_e = self.cur().is_some_and(|c| c == 'e' || c == 'E');
789 if has_e {
796 self.bump(); let next = match self.cur() {
799 Some(next) => next,
800 None => {
801 let pos = self.cur_pos();
802 self.error(pos, SyntaxError::NumLitTerminatedWithExp)?
803 }
804 };
805
806 if next == '+' || next == '-' {
807 self.bump(); }
809
810 let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
811 has_underscore |= lazy_integer.has_underscore;
812 }
813
814 let val = if has_dot || has_e {
815 let end = self.cur_pos();
816 let raw = unsafe {
817 self.input_slice(start, end)
819 };
820
821 let raw = remove_underscore(raw, has_underscore);
822 raw.parse().expect("failed to parse float literal")
823 } else {
824 let s = unsafe { self.input_slice(lazy_integer.start, lazy_integer.end) };
825 let s = remove_underscore(s, has_underscore);
826 parse_integer::<10>(&s)
827 };
828
829 self.ensure_not_ident()?;
830
831 let end = self.cur_pos();
832 let raw_str = unsafe {
833 self.input_slice(start, end)
835 };
836 Ok(Either::Left((val, raw_str.into())))
837 }
838
839 fn read_int_u32<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<u32>> {
840 let start = self.state().start();
841
842 let mut count = 0;
843 let v = self.read_digits::<_, Option<u32>, RADIX>(
844 |opt: Option<u32>, radix, val| {
845 count += 1;
846
847 let total = opt
848 .unwrap_or_default()
849 .checked_mul(radix as u32)
850 .and_then(|v| v.checked_add(val))
851 .ok_or_else(|| {
852 let span = Span::new_with_checked(start, start);
853 crate::error::Error::new(span, SyntaxError::InvalidUnicodeEscape)
854 })?;
855
856 Ok((Some(total), count != len))
857 },
858 true,
859 &mut false,
860 )?;
861 if len != 0 && count != len {
862 Ok(None)
863 } else {
864 Ok(v)
865 }
866 }
867
868 fn read_radix_number<const RADIX: u8>(
870 &mut self,
871 ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
872 debug_assert!(
873 RADIX == 2 || RADIX == 8 || RADIX == 16,
874 "radix should be one of 2, 8, 16, but got {RADIX}"
875 );
876 let start = self.cur_pos();
877
878 debug_assert_eq!(self.cur(), Some('0'));
879 self.bump();
880
881 debug_assert!(self
882 .cur()
883 .is_some_and(|c| matches!(c, 'b' | 'B' | 'o' | 'O' | 'x' | 'X')));
884 self.bump();
885
886 let lazy_integer = self.read_number_no_dot_as_str::<RADIX>()?;
887 let has_underscore = lazy_integer.has_underscore;
888
889 let s = unsafe {
890 self.input_slice(lazy_integer.start, lazy_integer.end)
892 };
893 if self.eat(b'n') {
894 let end = self.cur_pos();
895 let raw = unsafe {
896 self.input_slice(start, end)
898 };
899
900 let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), RADIX as _).unwrap();
901 return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
902 }
903 let s = remove_underscore(s, has_underscore);
904 let val = parse_integer::<RADIX>(&s);
905
906 self.ensure_not_ident()?;
907
908 let end = self.cur_pos();
909 let raw = unsafe {
910 self.input_slice(start, end)
912 };
913
914 Ok(Either::Left((val, self.atom(raw))))
915 }
916
917 #[cold]
921 #[inline(never)]
922 fn consume_pending_comments(&mut self) {
923 if let Some(comments) = self.comments() {
924 let last = self.state().prev_hi();
925 let start_pos = self.start_pos();
926 let comments_buffer = self.comments_buffer_mut().unwrap();
927
928 let kind = if last == start_pos {
932 BufferedCommentKind::Leading
933 } else {
934 BufferedCommentKind::Trailing
935 };
936 comments_buffer.pending_to_comment(kind, last);
938
939 for comment in comments_buffer.take_comments() {
941 match comment.kind {
942 BufferedCommentKind::Leading => {
943 comments.add_leading(comment.pos, comment.comment);
944 }
945 BufferedCommentKind::Trailing => {
946 comments.add_trailing(comment.pos, comment.comment);
947 }
948 }
949 }
950 }
951 }
952
953 fn read_jsx_word(&mut self) -> LexResult<Self::Token> {
960 debug_assert!(self.syntax().jsx());
961 debug_assert!(self.input().cur().is_some_and(|c| c.is_ident_start()));
962
963 let mut first = true;
964 let slice = self.input_uncons_while(|c| {
965 if first {
966 first = false;
967 c.is_ident_start()
968 } else {
969 c.is_ident_part() || c == '-'
970 }
971 });
972
973 Ok(Self::Token::jsx_name(slice, self))
974 }
975
976 fn read_jsx_entity(&mut self) -> LexResult<(char, String)> {
977 debug_assert!(self.syntax().jsx());
978
979 fn from_code(s: &str, radix: u32) -> LexResult<char> {
980 let c = char::from_u32(
982 u32::from_str_radix(s, radix).expect("failed to parse string as number"),
983 )
984 .expect("failed to parse number as char");
985
986 Ok(c)
987 }
988
989 fn is_hex(s: &str) -> bool {
990 s.chars().all(|c| c.is_ascii_hexdigit())
991 }
992
993 fn is_dec(s: &str) -> bool {
994 s.chars().all(|c| c.is_ascii_digit())
995 }
996
997 let mut s = SmartString::<LazyCompact>::default();
998
999 debug_assert!(self.input().cur().is_some_and(|c| c == '&'));
1000 self.bump();
1001
1002 let start_pos = self.input().cur_pos();
1003
1004 for _ in 0..10 {
1005 let c = match self.input().cur() {
1006 Some(c) => c,
1007 None => break,
1008 };
1009 self.bump();
1010
1011 if c == ';' {
1012 if let Some(stripped) = s.strip_prefix('#') {
1013 if stripped.starts_with('x') {
1014 if is_hex(&s[2..]) {
1015 let value = from_code(&s[2..], 16)?;
1016
1017 return Ok((value, format!("&{s};")));
1018 }
1019 } else if is_dec(stripped) {
1020 let value = from_code(stripped, 10)?;
1021
1022 return Ok((value, format!("&{s};")));
1023 }
1024 } else if let Some(entity) = xhtml(&s) {
1025 return Ok((entity, format!("&{s};")));
1026 }
1027
1028 break;
1029 }
1030
1031 s.push(c)
1032 }
1033
1034 unsafe {
1035 self.input_mut().reset_to(start_pos);
1037 }
1038
1039 Ok(('&', "&".to_string()))
1040 }
1041
1042 fn read_jsx_new_line(&mut self, normalize_crlf: bool) -> LexResult<Either<&'static str, char>> {
1043 debug_assert!(self.syntax().jsx());
1044 let ch = self.input().cur().unwrap();
1045 self.bump();
1046
1047 let out = if ch == '\r' && self.input().cur() == Some('\n') {
1048 self.bump(); Either::Left(if normalize_crlf { "\n" } else { "\r\n" })
1050 } else {
1051 Either::Right(ch)
1052 };
1053 Ok(out)
1054 }
1055
1056 fn read_jsx_str(&mut self, quote: char) -> LexResult<Self::Token> {
1057 debug_assert!(self.syntax().jsx());
1058 let start = self.input().cur_pos();
1059 unsafe {
1060 self.input_mut().bump(); }
1063 let mut out = String::new();
1064 let mut chunk_start = self.input().cur_pos();
1065 loop {
1066 let ch = match self.input().cur() {
1067 Some(c) => c,
1068 None => {
1069 self.emit_error(start, SyntaxError::UnterminatedStrLit);
1070 break;
1071 }
1072 };
1073 let cur_pos = self.input().cur_pos();
1074 if ch == '\\' {
1075 let value = unsafe {
1076 self.input_slice(chunk_start, cur_pos)
1078 };
1079
1080 out.push_str(value);
1081 out.push('\\');
1082
1083 self.bump();
1084
1085 chunk_start = self.input().cur_pos();
1086
1087 continue;
1088 }
1089
1090 if ch == quote {
1091 break;
1092 }
1093
1094 if ch == '&' {
1095 let value = unsafe {
1096 self.input_slice(chunk_start, cur_pos)
1098 };
1099
1100 out.push_str(value);
1101
1102 let jsx_entity = self.read_jsx_entity()?;
1103
1104 out.push(jsx_entity.0);
1105
1106 chunk_start = self.input().cur_pos();
1107 } else if ch.is_line_terminator() {
1108 let value = unsafe {
1109 self.input_slice(chunk_start, cur_pos)
1111 };
1112
1113 out.push_str(value);
1114
1115 match self.read_jsx_new_line(false)? {
1116 Either::Left(s) => {
1117 out.push_str(s);
1118 }
1119 Either::Right(c) => {
1120 out.push(c);
1121 }
1122 }
1123
1124 chunk_start = cur_pos + BytePos(ch.len_utf8() as _);
1125 } else {
1126 unsafe {
1127 self.input_mut().bump();
1129 }
1130 }
1131 }
1132 let cur_pos = self.input().cur_pos();
1133 let s = unsafe {
1134 self.input_slice(chunk_start, cur_pos)
1136 };
1137 let value = if out.is_empty() {
1138 self.atom(s)
1140 } else {
1141 out.push_str(s);
1142 self.atom(out)
1143 };
1144
1145 if self.input().peek_ahead().is_some() {
1148 self.bump();
1149 }
1150
1151 let end = self.input().cur_pos();
1152 let raw = unsafe {
1153 self.input_slice(start, end)
1155 };
1156 let raw = self.atom(raw);
1157 Ok(Self::Token::str(value.into(), raw, self))
1158 }
1159
1160 fn read_unicode_code_unit(&mut self) -> LexResult<Option<UnicodeEscape>> {
1170 const MIN_HIGH: u32 = 0xd800;
1171 const MAX_HIGH: u32 = 0xdbff;
1172 const MIN_LOW: u32 = 0xdc00;
1173 const MAX_LOW: u32 = 0xdfff;
1174
1175 let Some(high) = self.read_int_u32::<16>(4)? else {
1176 return Ok(None);
1177 };
1178 if let Some(ch) = char::from_u32(high) {
1179 return Ok(Some(UnicodeEscape::CodePoint(ch)));
1180 }
1181
1182 debug_assert!(high >= MIN_HIGH);
1187 let is_pair = high <= MAX_HIGH
1188 && self.input().cur() == Some('\\')
1189 && self.input().peek() == Some('u');
1190 if !is_pair {
1191 return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
1192 }
1193
1194 let before_second = self.input().cur_pos();
1195
1196 self.input_mut().bump_bytes(2);
1198
1199 let Some(low) = self.read_int_u32::<16>(4)? else {
1200 return Ok(None);
1201 };
1202
1203 if !(MIN_LOW..=MAX_LOW).contains(&low) {
1208 unsafe {
1209 self.input_mut().reset_to(before_second);
1211 }
1212 return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
1213 }
1214
1215 let code_point = pair_to_code_point(high, low);
1216 let ch = unsafe { char::from_u32_unchecked(code_point) };
1219 Ok(Some(UnicodeEscape::SurrogatePair(ch)))
1220 }
1221
1222 fn read_unicode_escape(&mut self) -> LexResult<UnicodeEscape> {
1223 debug_assert_eq!(self.cur(), Some('u'));
1224
1225 let mut is_curly = false;
1226
1227 self.bump(); if self.eat(b'{') {
1230 is_curly = true;
1231 }
1232
1233 let state = self.input().cur_pos();
1234 let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
1235 Ok(Some(val)) => {
1236 if 0x0010_ffff >= val {
1237 char::from_u32(val)
1238 } else {
1239 let start = self.cur_pos();
1240
1241 self.error(
1242 start,
1243 SyntaxError::BadCharacterEscapeSequence {
1244 expected: if is_curly {
1245 "1-6 hex characters in the range 0 to 10FFFF."
1246 } else {
1247 "4 hex characters"
1248 },
1249 },
1250 )?
1251 }
1252 }
1253 _ => {
1254 let start = self.cur_pos();
1255
1256 self.error(
1257 start,
1258 SyntaxError::BadCharacterEscapeSequence {
1259 expected: if is_curly {
1260 "1-6 hex characters"
1261 } else {
1262 "4 hex characters"
1263 },
1264 },
1265 )?
1266 }
1267 };
1268
1269 match c {
1270 Some(c) => {
1271 if is_curly && !self.eat(b'}') {
1272 self.error(state, SyntaxError::InvalidUnicodeEscape)?
1273 }
1274
1275 Ok(UnicodeEscape::CodePoint(c))
1276 }
1277 _ => {
1278 unsafe {
1279 self.input_mut().reset_to(state);
1281 }
1282
1283 let Some(value) = self.read_unicode_code_unit()? else {
1284 self.error(
1285 state,
1286 SyntaxError::BadCharacterEscapeSequence {
1287 expected: if is_curly {
1288 "1-6 hex characters"
1289 } else {
1290 "4 hex characters"
1291 },
1292 },
1293 )?
1294 };
1295
1296 if is_curly && !self.eat(b'}') {
1297 self.error(state, SyntaxError::InvalidUnicodeEscape)?
1298 }
1299
1300 Ok(value)
1301 }
1302 }
1303 }
1304
1305 #[cold]
1306 fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
1307 if self.input().cur() != Some('#') || self.input().peek() != Some('!') {
1308 return Ok(None);
1309 }
1310 self.bump(); self.bump(); let s = self.input_uncons_while(|c| !c.is_line_terminator());
1313 Ok(Some(self.atom(s)))
1314 }
1315
1316 fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult<Self::Token> {
1317 let start = self.cur_pos();
1318
1319 let mut cooked = Ok(Wtf8Buf::new());
1320 let mut cooked_slice_start = start;
1321 let raw_slice_start = start;
1322
1323 macro_rules! consume_cooked {
1324 () => {{
1325 if let Ok(cooked) = &mut cooked {
1326 let last_pos = self.cur_pos();
1327 cooked.push_str(unsafe {
1328 self.input_slice(cooked_slice_start, last_pos)
1331 });
1332 }
1333 }};
1334 }
1335
1336 if start == self.cur_pos() && self.state().last_was_tpl_element() {
1338 if let Some(c) = self.cur() {
1339 if c == '$' && self.peek() == Some('{') {
1340 self.bump(); self.bump(); return Ok(Self::Token::DOLLAR_LBRACE);
1343 } else if c == '`' {
1344 self.bump(); return Ok(Self::Token::BACKQUOTE);
1346 }
1347 }
1348 }
1349
1350 loop {
1352 let matched_byte = byte_search! {
1353 lexer: self,
1354 table: TEMPLATE_LITERAL_TABLE,
1355 handle_eof: {
1356 self.error(start_of_tpl, SyntaxError::UnterminatedTpl)?
1358 }
1359 };
1360
1361 match matched_byte {
1362 b'$' => {
1363 if self.peek() == Some('{') {
1365 let cooked = if cooked_slice_start == raw_slice_start {
1367 let last_pos = self.cur_pos();
1368 let s = unsafe {
1369 self.input_slice(cooked_slice_start, last_pos)
1372 };
1373 Ok(self.wtf8_atom(Wtf8::from_str(s)))
1374 } else {
1375 consume_cooked!();
1376 cooked.map(|s| self.wtf8_atom(&*s))
1377 };
1378
1379 let end = self.input().cur_pos();
1380 let raw = unsafe {
1381 self.input_slice(raw_slice_start, end)
1384 };
1385 let raw = self.atom(raw);
1386 return Ok(Self::Token::template(cooked, raw, self));
1387 } else {
1388 self.bump();
1390 continue;
1391 }
1392 }
1393 b'`' => {
1394 let cooked = if cooked_slice_start == raw_slice_start {
1396 let last_pos = self.cur_pos();
1397 let s = unsafe { self.input_slice(cooked_slice_start, last_pos) };
1398 Ok(self.wtf8_atom(Wtf8::from_str(s)))
1399 } else {
1400 consume_cooked!();
1401 cooked.map(|s| self.wtf8_atom(&*s))
1402 };
1403
1404 let end = self.input().cur_pos();
1405 let raw = unsafe { self.input_slice(raw_slice_start, end) };
1406 let raw = self.atom(raw);
1407 return Ok(Self::Token::template(cooked, raw, self));
1408 }
1409 b'\r' => {
1410 self.state_mut().mark_had_line_break();
1412 consume_cooked!();
1413
1414 self.bump(); if self.peek() == Some('\n') {
1417 self.bump(); }
1419
1420 if let Ok(ref mut cooked) = cooked {
1421 cooked.push_char('\n');
1422 }
1423 cooked_slice_start = self.cur_pos();
1424 }
1425 b'\\' => {
1426 consume_cooked!();
1428
1429 match self.read_escaped_char(true) {
1430 Ok(Some(escaped)) => {
1431 if let Ok(ref mut cooked) = cooked {
1432 cooked.push(escaped);
1433 }
1434 }
1435 Ok(None) => {}
1436 Err(error) => {
1437 cooked = Err(error);
1438 }
1439 }
1440
1441 cooked_slice_start = self.cur_pos();
1442 }
1443 _ => unreachable!(),
1444 }
1445 }
1446 }
1447
1448 fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<CodePoint>> {
1452 debug_assert_eq!(self.cur(), Some('\\'));
1453
1454 let start = self.cur_pos();
1455
1456 self.bump(); let c = match self.cur() {
1459 Some(c) => c,
1460 None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
1461 };
1462
1463 let c = match c {
1464 '\\' => '\\',
1465 'n' => '\n',
1466 'r' => '\r',
1467 't' => '\t',
1468 'b' => '\u{0008}',
1469 'v' => '\u{000b}',
1470 'f' => '\u{000c}',
1471 '\r' => {
1472 self.bump(); self.eat(b'\n');
1475
1476 return Ok(None);
1477 }
1478 '\n' | '\u{2028}' | '\u{2029}' => {
1479 self.bump();
1480
1481 return Ok(None);
1482 }
1483
1484 'x' => {
1486 self.bump(); match self.read_int_u32::<16>(2)? {
1489 Some(val) => return Ok(CodePoint::from_u32(val)),
1490 None => self.error(
1491 start,
1492 SyntaxError::BadCharacterEscapeSequence {
1493 expected: "2 hex characters",
1494 },
1495 )?,
1496 }
1497 }
1498
1499 'u' => match self.read_unicode_escape() {
1501 Ok(value) => {
1502 return Ok(Some(value.into()));
1503 }
1504 Err(err) => self.error(start, err.into_kind())?,
1505 },
1506
1507 '0'..='7' => {
1509 self.bump();
1510
1511 let first_c = if c == '0' {
1512 match self.cur() {
1513 Some(next) if next.is_digit(8) => c,
1514 _ => return Ok(Some(CodePoint::from_char('\u{0000}'))),
1516 }
1517 } else {
1518 c
1519 };
1520
1521 if in_template {
1523 self.error(start, SyntaxError::LegacyOctal)?
1524 }
1525
1526 self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
1527
1528 let mut value: u8 = first_c.to_digit(8).unwrap() as u8;
1529
1530 macro_rules! one {
1531 ($check:expr) => {{
1532 let cur = self.cur();
1533
1534 match cur.and_then(|c| c.to_digit(8)) {
1535 Some(v) => {
1536 value = if $check {
1537 let new_val = value
1538 .checked_mul(8)
1539 .and_then(|value| value.checked_add(v as u8));
1540 match new_val {
1541 Some(val) => val,
1542 None => return Ok(CodePoint::from_u32(value as u32)),
1543 }
1544 } else {
1545 value * 8 + v as u8
1546 };
1547
1548 self.bump();
1549 }
1550 _ => return Ok(CodePoint::from_u32(value as u32)),
1551 }
1552 }};
1553 }
1554
1555 one!(false);
1556 one!(true);
1557
1558 return Ok(CodePoint::from_u32(value as u32));
1559 }
1560 _ => c,
1561 };
1562
1563 unsafe {
1564 self.input_mut().bump();
1566 }
1567
1568 Ok(CodePoint::from_u32(c as u32))
1569 }
1570
1571 fn read_regexp(&mut self, start: BytePos) -> LexResult<Self::Token> {
1573 unsafe {
1574 self.input_mut().reset_to(start);
1576 }
1577
1578 debug_assert_eq!(self.cur(), Some('/'));
1579
1580 let start = self.cur_pos();
1581
1582 self.bump(); let slice_start = self.cur_pos();
1585
1586 let (mut escaped, mut in_class) = (false, false);
1587
1588 while let Some(c) = self.cur() {
1589 if c.is_line_terminator() {
1592 let span = self.span(start);
1593
1594 return Err(crate::error::Error::new(
1595 span,
1596 SyntaxError::UnterminatedRegExp,
1597 ));
1598 }
1599
1600 if escaped {
1601 escaped = false;
1602 } else {
1603 match c {
1604 '[' => in_class = true,
1605 ']' if in_class => in_class = false,
1606 '/' if !in_class => break,
1608 _ => {}
1609 }
1610
1611 escaped = c == '\\';
1612 }
1613
1614 self.bump();
1615 }
1616
1617 let content = {
1618 let end = self.cur_pos();
1619 let s = unsafe { self.input_slice(slice_start, end) };
1620 self.atom(s)
1621 };
1622
1623 if !self.is(b'/') {
1625 let span = self.span(start);
1626
1627 return Err(crate::error::Error::new(
1628 span,
1629 SyntaxError::UnterminatedRegExp,
1630 ));
1631 }
1632
1633 self.bump(); let flags = {
1642 match self.cur() {
1643 Some(c) if c.is_ident_start() => self
1644 .read_word_as_str_with()
1645 .map(|(s, _)| Some(self.atom(s))),
1646 _ => Ok(None),
1647 }
1648 }?
1649 .unwrap_or_default();
1650
1651 Ok(Self::Token::regexp(content, flags, self))
1652 }
1653
1654 fn read_word_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
1656 debug_assert!(self.cur().is_some());
1657 let slice_start = self.cur_pos();
1658
1659 if let Some(c) = self.input().cur_as_ascii() {
1661 if Ident::is_valid_ascii_start(c) {
1662 self.bump();
1664
1665 let next_byte = byte_search! {
1667 lexer: self,
1668 table: NOT_ASCII_ID_CONTINUE_TABLE,
1669 handle_eof: {
1670 let end = self.cur_pos();
1672 let s = unsafe {
1673 self.input_slice(slice_start, end)
1676 };
1677
1678 return Ok((Cow::Borrowed(s), false));
1679 },
1680 };
1681
1682 if !next_byte.is_ascii() {
1684 return self.read_word_as_str_with_slow_path(slice_start);
1686 } else if next_byte == b'\\' {
1687 return self.read_word_as_str_with_slow_path(slice_start);
1689 } else {
1690 let end = self.cur_pos();
1692 let s = unsafe {
1693 self.input_slice(slice_start, end)
1696 };
1697
1698 return Ok((Cow::Borrowed(s), false));
1699 }
1700 }
1701 }
1702
1703 self.read_word_as_str_with_slow_path(slice_start)
1705 }
1706
1707 #[cold]
1709 fn read_word_as_str_with_slow_path(
1710 &mut self,
1711 mut slice_start: BytePos,
1712 ) -> LexResult<(Cow<'a, str>, bool)> {
1713 let mut first = true;
1714 let mut has_escape = false;
1715
1716 let mut buf = String::with_capacity(16);
1717 loop {
1718 if let Some(c) = self.input().cur_as_ascii() {
1719 if Ident::is_valid_ascii_continue(c) {
1720 self.bump();
1721 continue;
1722 } else if first && Ident::is_valid_ascii_start(c) {
1723 self.bump();
1724 first = false;
1725 continue;
1726 }
1727
1728 if c == b'\\' {
1730 first = false;
1731 has_escape = true;
1732 let start = self.cur_pos();
1733 self.bump();
1734
1735 if !self.is(b'u') {
1736 self.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)?
1737 }
1738
1739 {
1740 let end = self.input().cur_pos();
1741 let s = unsafe {
1742 self.input_slice(slice_start, start)
1745 };
1746 buf.push_str(s);
1747 unsafe {
1748 self.input_mut().reset_to(end);
1750 }
1751 }
1752
1753 let value = self.read_unicode_escape()?;
1754
1755 match value {
1756 UnicodeEscape::CodePoint(ch) => {
1757 let valid = if first {
1758 ch.is_ident_start()
1759 } else {
1760 ch.is_ident_part()
1761 };
1762 if !valid {
1763 self.emit_error(start, SyntaxError::InvalidIdentChar);
1764 }
1765 buf.push(ch);
1766 }
1767 UnicodeEscape::SurrogatePair(ch) => {
1768 buf.push(ch);
1769 self.emit_error(start, SyntaxError::InvalidIdentChar);
1770 }
1771 UnicodeEscape::LoneSurrogate(code_point) => {
1772 buf.push_str(format!("\\u{code_point:04X}").as_str());
1773 self.emit_error(start, SyntaxError::InvalidIdentChar);
1774 }
1775 };
1776
1777 slice_start = self.cur_pos();
1778 continue;
1779 }
1780
1781 break;
1783 } else if let Some(c) = self.input().cur() {
1784 if Ident::is_valid_non_ascii_continue(c) {
1785 self.bump();
1786 continue;
1787 } else if first && Ident::is_valid_non_ascii_start(c) {
1788 self.bump();
1789 first = false;
1790 continue;
1791 }
1792 }
1793
1794 break;
1795 }
1796
1797 let end = self.cur_pos();
1798 let s = unsafe {
1799 self.input_slice(slice_start, end)
1802 };
1803 let value = if !has_escape {
1804 Cow::Borrowed(s)
1806 } else {
1807 buf.push_str(s);
1808 Cow::Owned(buf)
1809 };
1810
1811 Ok((value, has_escape))
1812 }
1813
1814 fn read_token_number_sign(&mut self) -> LexResult<Self::Token> {
1816 debug_assert!(self.cur().is_some_and(|c| c == '#'));
1817
1818 self.bump(); debug_assert!(
1823 !self.input().is_at_start() || self.cur() != Some('!'),
1824 "#! should have already been handled by read_shebang()"
1825 );
1826 Ok(Self::Token::HASH)
1827 }
1828
1829 #[inline(never)]
1833 fn read_token_dot(&mut self) -> LexResult<Self::Token> {
1834 debug_assert!(self.cur().is_some_and(|c| c == '.'));
1835 let next = match self.input().peek() {
1837 Some(next) => next,
1838 None => {
1839 self.bump(); return Ok(Self::Token::DOT);
1841 }
1842 };
1843 if next.is_ascii_digit() {
1844 return self.read_number::<true, false>().map(|v| match v {
1845 Left((value, raw)) => Self::Token::num(value, raw, self),
1846 Right(_) => unreachable!("read_number should not return bigint for leading dot"),
1847 });
1848 }
1849
1850 self.bump(); if next == '.' && self.input().peek() == Some('.') {
1853 self.bump(); self.bump(); return Ok(Self::Token::DOTDOTDOT);
1857 }
1858
1859 Ok(Self::Token::DOT)
1860 }
1861
1862 #[inline(never)]
1866 fn read_token_question_mark(&mut self) -> LexResult<Self::Token> {
1867 debug_assert!(self.cur().is_some_and(|c| c == '?'));
1868 self.bump();
1869 if self.input_mut().eat_byte(b'?') {
1870 if self.input_mut().eat_byte(b'=') {
1871 Ok(Self::Token::NULLISH_ASSIGN)
1872 } else {
1873 Ok(Self::Token::NULLISH_COALESCING)
1874 }
1875 } else {
1876 Ok(Self::Token::QUESTION)
1877 }
1878 }
1879
1880 #[inline(never)]
1884 fn read_token_colon(&mut self) -> LexResult<Self::Token> {
1885 debug_assert!(self.cur().is_some_and(|c| c == ':'));
1886 self.bump(); Ok(Self::Token::COLON)
1888 }
1889
1890 #[inline(never)]
1894 fn read_token_zero(&mut self) -> LexResult<Self::Token> {
1895 debug_assert_eq!(self.cur(), Some('0'));
1896 let next = self.input().peek();
1897
1898 let bigint = match next {
1899 Some('x') | Some('X') => self.read_radix_number::<16>(),
1900 Some('o') | Some('O') => self.read_radix_number::<8>(),
1901 Some('b') | Some('B') => self.read_radix_number::<2>(),
1902 _ => {
1903 return self.read_number::<false, true>().map(|v| match v {
1904 Left((value, raw)) => Self::Token::num(value, raw, self),
1905 Right((value, raw)) => Self::Token::bigint(value, raw, self),
1906 });
1907 }
1908 };
1909
1910 bigint.map(|v| match v {
1911 Left((value, raw)) => Self::Token::num(value, raw, self),
1912 Right((value, raw)) => Self::Token::bigint(value, raw, self),
1913 })
1914 }
1915
1916 #[inline(never)]
1920 fn read_token_logical<const C: u8>(&mut self) -> LexResult<Self::Token> {
1921 debug_assert!(C == b'|' || C == b'&');
1922 let is_bit_and = C == b'&';
1923 let had_line_break_before_last = self.had_line_break_before_last();
1924 let start = self.cur_pos();
1925
1926 unsafe {
1927 self.input_mut().bump();
1929 }
1930 let token = if is_bit_and {
1931 Self::Token::BIT_AND
1932 } else {
1933 Self::Token::BIT_OR
1934 };
1935
1936 if self.input_mut().eat_byte(b'=') {
1938 return Ok(if is_bit_and {
1939 Self::Token::BIT_AND_EQ
1940 } else {
1941 debug_assert!(token.is_bit_or());
1942 Self::Token::BIT_OR_EQ
1943 });
1944 }
1945
1946 if self.input().cur() == Some(C as char) {
1948 unsafe {
1949 self.input_mut().bump();
1951 }
1952
1953 if self.input().cur() == Some('=') {
1954 unsafe {
1955 self.input_mut().bump();
1957 }
1958
1959 return Ok(if is_bit_and {
1960 Self::Token::LOGICAL_AND_EQ
1961 } else {
1962 debug_assert!(token.is_bit_or());
1963 Self::Token::LOGICAL_OR_EQ
1964 });
1965 }
1966
1967 if had_line_break_before_last && !is_bit_and && self.is_str("||||| ") {
1970 let span = fixed_len_span(start, 7);
1971 self.emit_error_span(span, SyntaxError::TS1185);
1972 self.skip_line_comment(5);
1973 self.skip_space::<true>();
1974 return self.error_span(span, SyntaxError::TS1185);
1975 }
1976
1977 return Ok(if is_bit_and {
1978 Self::Token::LOGICAL_AND
1979 } else {
1980 debug_assert!(token.is_bit_or());
1981 Self::Token::LOGICAL_OR
1982 });
1983 }
1984
1985 Ok(token)
1986 }
1987
1988 #[inline(never)]
1992 fn read_token_mul_mod(&mut self, is_mul: bool) -> LexResult<Self::Token> {
1993 debug_assert!(self.cur().is_some_and(|c| c == '*' || c == '%'));
1994 self.bump();
1995 let token = if is_mul {
1996 if self.input_mut().eat_byte(b'*') {
1997 Self::Token::EXP
1999 } else {
2000 Self::Token::MUL
2001 }
2002 } else {
2003 Self::Token::MOD
2004 };
2005
2006 Ok(if self.input_mut().eat_byte(b'=') {
2007 if token.is_star() {
2008 Self::Token::MUL_EQ
2009 } else if token.is_mod() {
2010 Self::Token::MOD_EQ
2011 } else {
2012 debug_assert!(token.is_exp());
2013 Self::Token::EXP_EQ
2014 }
2015 } else {
2016 token
2017 })
2018 }
2019
2020 #[inline(never)]
2021 fn read_slash(&mut self) -> LexResult<Self::Token> {
2022 debug_assert_eq!(self.cur(), Some('/'));
2023 self.bump(); Ok(if self.eat(b'=') {
2025 Self::Token::DIV_EQ
2026 } else {
2027 Self::Token::DIV
2028 })
2029 }
2030
2031 fn read_ident_unknown(&mut self) -> LexResult<Self::Token> {
2034 debug_assert!(self.cur().is_some());
2035
2036 let (s, has_escape) = self.read_word_as_str_with()?;
2037 let atom = self.atom(s);
2038 let word = Self::Token::unknown_ident(atom, self);
2039
2040 if has_escape {
2041 self.update_token_flags(|flags| *flags |= TokenFlags::UNICODE);
2042 }
2043
2044 Ok(word)
2045 }
2046
2047 fn read_str_lit(&mut self) -> LexResult<Self::Token> {
2050 debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
2051 let start = self.cur_pos();
2052 let quote = self.cur().unwrap() as u8;
2053
2054 self.bump(); let mut slice_start = self.input().cur_pos();
2057
2058 let mut buf: Option<Wtf8Buf> = None;
2059
2060 loop {
2061 let table = if quote == b'"' {
2062 &DOUBLE_QUOTE_STRING_END_TABLE
2063 } else {
2064 &SINGLE_QUOTE_STRING_END_TABLE
2065 };
2066
2067 let fast_path_result = byte_search! {
2068 lexer: self,
2069 table: table,
2070 handle_eof: {
2071 let value_end = self.cur_pos();
2072 let s = unsafe {
2073 self.input_slice(slice_start, value_end)
2076 };
2077
2078 self.emit_error(start, SyntaxError::UnterminatedStrLit);
2079
2080 let end = self.cur_pos();
2081 let raw = unsafe { self.input_slice(start, end) };
2082 return Ok(Self::Token::str(self.wtf8_atom(Wtf8::from_str(s)), self.atom(raw), self));
2083 },
2084 };
2085 match fast_path_result {
2088 b'"' | b'\'' if fast_path_result == quote => {
2089 let value_end = self.cur_pos();
2090
2091 let value = if let Some(buf) = buf.as_mut() {
2092 debug_assert!(unsafe { self.input_slice(start, value_end).contains('\\') });
2094 let s = unsafe {
2095 self.input_slice(slice_start, value_end)
2098 };
2099 buf.push_str(s);
2100 self.wtf8_atom(&**buf)
2101 } else {
2102 let s = unsafe { self.input_slice(slice_start, value_end) };
2103 self.wtf8_atom(Wtf8::from_str(s))
2104 };
2105
2106 unsafe {
2107 self.input_mut().bump();
2109 }
2110
2111 let end = self.cur_pos();
2112 let raw = unsafe {
2113 self.input_slice(start, end)
2116 };
2117 let raw = self.atom(raw);
2118 return Ok(Self::Token::str(value, raw, self));
2119 }
2120 b'\\' => {
2121 let end = self.cur_pos();
2122 let s = unsafe {
2123 self.input_slice(slice_start, end)
2126 };
2127
2128 if buf.is_none() {
2129 buf = Some(Wtf8Buf::from_str(s));
2130 } else {
2131 buf.as_mut().unwrap().push_str(s);
2132 }
2133
2134 if let Some(escaped) = self.read_escaped_char(false)? {
2135 buf.as_mut().unwrap().push(escaped);
2136 }
2137
2138 slice_start = self.cur_pos();
2139 continue;
2140 }
2141 b'\n' | b'\r' => {
2142 let end = self.cur_pos();
2143 let s = unsafe {
2144 self.input_slice(slice_start, end)
2147 };
2148
2149 self.emit_error(start, SyntaxError::UnterminatedStrLit);
2150
2151 let end = self.cur_pos();
2152
2153 let raw = unsafe {
2154 self.input_slice(start, end)
2157 };
2158 return Ok(Self::Token::str(
2159 self.wtf8_atom(Wtf8::from_str(s)),
2160 self.atom(raw),
2161 self,
2162 ));
2163 }
2164 _ => self.bump(),
2165 }
2166 }
2167 }
2168
2169 fn read_keyword_with(
2170 &mut self,
2171 convert: &dyn Fn(&str) -> Option<Self::Token>,
2172 ) -> LexResult<Self::Token> {
2173 debug_assert!(self.cur().is_some());
2174
2175 let start = self.cur_pos();
2176 let (s, has_escape) = self.read_keyword_as_str_with()?;
2177 if let Some(word) = convert(s.as_ref()) {
2178 if has_escape && word.is_reserved(self.ctx()) {
2183 self.error(
2184 start,
2185 SyntaxError::EscapeInReservedWord { word: Atom::new(s) },
2186 )
2187 } else {
2188 Ok(word)
2189 }
2190 } else {
2191 let atom = self.atom(s);
2192 Ok(Self::Token::unknown_ident(atom, self))
2193 }
2194 }
2195
2196 fn read_keyword_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
2200 let slice_start = self.cur_pos();
2201
2202 self.bump();
2206
2207 let next_byte = byte_search! {
2209 lexer: self,
2210 table: NOT_ASCII_ID_CONTINUE_TABLE,
2211 handle_eof: {
2212 let end = self.cur_pos();
2214 let s = unsafe {
2215 self.input_slice(slice_start, end)
2218 };
2219
2220 return Ok((Cow::Borrowed(s), false));
2221 },
2222 };
2223
2224 if !next_byte.is_ascii() || next_byte == b'\\' {
2226 self.read_word_as_str_with_slow_path(slice_start)
2229 } else {
2230 let end = self.cur_pos();
2232 let s = unsafe {
2233 self.input_slice(slice_start, end)
2236 };
2237
2238 Ok((Cow::Borrowed(s), false))
2239 }
2240 }
2241}
2242
2243pub fn pos_span(p: BytePos) -> Span {
2244 Span::new_with_checked(p, p)
2245}
2246
2247pub fn fixed_len_span(p: BytePos, len: u32) -> Span {
2248 Span::new_with_checked(p, p + BytePos(len))
2249}