1use std::borrow::Cow;
2
3use char::{Char, CharExt};
4use either::Either::{self, Left, Right};
5use num_bigint::BigInt as BigIntValue;
6use smartstring::{LazyCompact, SmartString};
7use state::State;
8use swc_atoms::Atom;
9use swc_common::{
10 comments::{Comment, CommentKind},
11 input::{Input, StringInput},
12 BytePos, Span,
13};
14use swc_ecma_ast::{EsVersion, Ident};
15
16use self::jsx::xhtml;
17use super::{context::Context, input::Tokens};
18use crate::{
19 common::lexer::{
20 comments_buffer::{BufferedComment, BufferedCommentKind, CommentsBufferTrait},
21 number::{parse_integer, LazyInteger},
22 },
23 error::SyntaxError,
24 lexer::TokenFlags,
25};
26
27pub mod char;
28pub mod comments_buffer;
29mod jsx;
30pub mod number;
31mod search;
32pub mod state;
33pub mod token;
34pub mod whitespace;
35
36use token::TokenFactory;
37
38use self::search::SafeByteMatchTable;
40use crate::{byte_search, safe_byte_match_table};
41
42const LS_OR_PS_FIRST: u8 = 0xe2;
45const LS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa8];
46const PS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa9];
47
48static LINE_BREAK_TABLE: SafeByteMatchTable =
49 safe_byte_match_table!(|b| matches!(b, b'\n' | b'\r' | LS_OR_PS_FIRST));
50
51static BLOCK_COMMENT_SCAN_TABLE: SafeByteMatchTable =
52 safe_byte_match_table!(|b| { matches!(b, b'*' | b'\n' | b'\r' | LS_OR_PS_FIRST) });
53
54static DOUBLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
55 safe_byte_match_table!(|b| matches!(b, b'"' | b'\n' | b'\\' | b'\r'));
56static SINGLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
57 safe_byte_match_table!(|b| matches!(b, b'\'' | b'\n' | b'\\' | b'\r'));
58
59static NOT_ASCII_ID_CONTINUE_TABLE: SafeByteMatchTable =
60 safe_byte_match_table!(|b| !(b.is_ascii_alphanumeric() || b == b'_' || b == b'$'));
61
62static TEMPLATE_LITERAL_TABLE: SafeByteMatchTable =
63 safe_byte_match_table!(|b| matches!(b, b'$' | b'`' | b'\\' | b'\r'));
64
65pub type LexResult<T> = Result<T, crate::error::Error>;
66
67fn remove_underscore(s: &str, has_underscore: bool) -> Cow<'_, str> {
68 if has_underscore {
69 debug_assert!(s.contains('_'));
70 s.chars().filter(|&c| c != '_').collect::<String>().into()
71 } else {
72 debug_assert!(!s.contains('_'));
73 Cow::Borrowed(s)
74 }
75}
76
77pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
78 type State: self::state::State;
79 type Token: token::TokenFactory<'a, TokenAndSpan, Self, Lexer = Self>;
80 type CommentsBuffer: CommentsBufferTrait;
81
82 fn input(&self) -> &StringInput<'a>;
83 fn input_mut(&mut self) -> &mut StringInput<'a>;
84 fn state(&self) -> &Self::State;
85 fn state_mut(&mut self) -> &mut Self::State;
86 fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments>;
87 fn comments_buffer(&self) -> Option<&Self::CommentsBuffer>;
88 fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer>;
89 unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str;
93 fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str;
94 fn atom<'b>(&self, s: impl Into<Cow<'b, str>>) -> swc_atoms::Atom;
95 fn push_error(&mut self, error: crate::error::Error);
96
97 #[inline(always)]
98 #[allow(clippy::misnamed_getters)]
99 fn had_line_break_before_last(&self) -> bool {
100 self.state().had_line_break()
101 }
102
103 #[inline(always)]
104 fn span(&self, start: BytePos) -> Span {
105 let end = self.last_pos();
106 if cfg!(debug_assertions) && start > end {
107 unreachable!(
108 "assertion failed: (span.start <= span.end).
109 start = {}, end = {}",
110 start.0, end.0
111 )
112 }
113 Span { lo: start, hi: end }
114 }
115
116 #[inline(always)]
117 fn bump(&mut self) {
118 unsafe {
119 self.input_mut().bump()
121 }
122 }
123
124 #[inline(always)]
125 fn is(&self, c: u8) -> bool {
126 self.input().is_byte(c)
127 }
128
129 #[inline(always)]
130 fn is_str(&self, s: &str) -> bool {
131 self.input().is_str(s)
132 }
133
134 #[inline(always)]
135 fn eat(&mut self, c: u8) -> bool {
136 self.input_mut().eat_byte(c)
137 }
138
139 #[inline(always)]
140 fn cur(&self) -> Option<char> {
141 self.input().cur()
142 }
143
144 #[inline(always)]
145 fn peek(&self) -> Option<char> {
146 self.input().peek()
147 }
148
149 #[inline(always)]
150 fn peek_ahead(&self) -> Option<char> {
151 self.input().peek_ahead()
152 }
153
154 #[inline(always)]
155 fn cur_pos(&self) -> BytePos {
156 self.input().cur_pos()
157 }
158
159 #[inline(always)]
160 fn last_pos(&self) -> BytePos {
161 self.input().last_pos()
162 }
163
164 #[cold]
166 #[inline(never)]
167 fn error<T>(&self, start: BytePos, kind: SyntaxError) -> LexResult<T> {
168 let span = self.span(start);
169 self.error_span(span, kind)
170 }
171
172 #[cold]
173 #[inline(never)]
174 fn error_span<T>(&self, span: Span, kind: SyntaxError) -> LexResult<T> {
175 Err(crate::error::Error::new(span, kind))
176 }
177
178 #[cold]
179 #[inline(never)]
180 fn emit_error(&mut self, start: BytePos, kind: SyntaxError) {
181 let span = self.span(start);
182 self.emit_error_span(span, kind)
183 }
184
185 #[cold]
186 #[inline(never)]
187 fn emit_error_span(&mut self, span: Span, kind: SyntaxError) {
188 if self.ctx().contains(Context::IgnoreError) {
189 return;
190 }
191 tracing::warn!("Lexer error at {:?}", span);
192 let err = crate::error::Error::new(span, kind);
193 self.push_error(err);
194 }
195
196 #[cold]
197 #[inline(never)]
198 fn emit_strict_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
199 let span = self.span(start);
200 if self.ctx().contains(Context::Strict) {
201 self.emit_error_span(span, kind);
202 } else {
203 let err = crate::error::Error::new(span, kind);
204 self.add_module_mode_error(err);
205 }
206 }
207
208 #[cold]
209 #[inline(never)]
210 fn emit_module_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
211 let span = self.span(start);
212 let err = crate::error::Error::new(span, kind);
213 self.add_module_mode_error(err);
214 }
215
216 #[inline(never)]
217 fn skip_line_comment(&mut self, start_skip: usize) {
218 let start = self.cur_pos();
220 self.input_mut().bump_bytes(start_skip);
221 let slice_start = self.cur_pos();
222
223 let is_for_next =
231 self.state().had_line_break() || !self.state().can_have_trailing_line_comment();
232
233 byte_search! {
235 lexer: self,
236 table: LINE_BREAK_TABLE,
237 continue_if: (matched_byte, pos_offset) {
238 if matched_byte != LS_OR_PS_FIRST {
239 false
241 } else {
242 let current_slice = self.input().as_str();
245 let byte_pos = pos_offset;
246 if byte_pos + 2 < current_slice.len() {
247 let bytes = current_slice.as_bytes();
248 let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
249 if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
250 false
252 } else {
253 true
255 }
256 } else {
257 true
259 }
260 }
261 },
262 handle_eof: {
263 let end = self.input().end_pos();
265
266 if self.comments_buffer().is_some() {
267 let s = unsafe { self.input_slice(slice_start, end) };
268 let cmt = swc_common::comments::Comment {
269 kind: swc_common::comments::CommentKind::Line,
270 span: Span::new_with_checked(start, end),
271 text: self.atom(s),
272 };
273
274 if is_for_next {
275 self.comments_buffer_mut().unwrap().push_pending(cmt);
276 } else {
277 let pos = self.state().prev_hi();
278 self.comments_buffer_mut().unwrap().push_comment(BufferedComment {
279 kind: BufferedCommentKind::Trailing,
280 pos,
281 comment: cmt,
282 });
283 }
284 }
285
286 return;
287 }
288 };
289
290 let end = self.cur_pos();
292
293 if self.comments_buffer().is_some() {
295 let s = unsafe {
296 self.input_slice(slice_start, end)
298 };
299 let cmt = swc_common::comments::Comment {
300 kind: swc_common::comments::CommentKind::Line,
301 span: Span::new_with_checked(start, end),
302 text: self.atom(s),
303 };
304
305 if is_for_next {
306 self.comments_buffer_mut().unwrap().push_pending(cmt);
307 } else {
308 let pos = self.state().prev_hi();
309 self.comments_buffer_mut()
310 .unwrap()
311 .push_comment(BufferedComment {
312 kind: BufferedCommentKind::Trailing,
313 pos,
314 comment: cmt,
315 });
316 }
317 }
318
319 unsafe {
320 self.input_mut().reset_to(end);
322 }
323 }
324
325 fn skip_block_comment(&mut self) {
327 let start = self.cur_pos();
328
329 debug_assert_eq!(self.cur(), Some('/'));
330 debug_assert_eq!(self.peek(), Some('*'));
331
332 self.input_mut().bump_bytes(2);
334
335 let slice_start = self.cur_pos();
337
338 let had_line_break_before_last = self.had_line_break_before_last();
339 let mut should_mark_had_line_break = false;
340
341 loop {
342 let matched_byte = byte_search! {
343 lexer: self,
344 table: BLOCK_COMMENT_SCAN_TABLE,
345 continue_if: (matched_byte, pos_offset) {
346 if matched_byte == LS_OR_PS_FIRST {
347 let current_slice = self.input().as_str();
349 let byte_pos = pos_offset;
350 if byte_pos + 2 < current_slice.len() {
351 let bytes = current_slice.as_bytes();
352 let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
353 if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
354 false
356 } else {
357 true
359 }
360 } else {
361 true
363 }
364 } else {
365 false
367 }
368 },
369 handle_eof: {
370 if should_mark_had_line_break {
371 self.state_mut().mark_had_line_break();
372 }
373 let end_pos = self.input().end_pos();
374 let span = Span::new_with_checked(end_pos, end_pos);
375 self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
376 return;
377 }
378 };
379
380 match matched_byte {
381 b'*' => {
382 if self.peek() == Some('/') {
383 self.input_mut().bump_bytes(2);
385
386 if should_mark_had_line_break {
387 self.state_mut().mark_had_line_break();
388 }
389
390 let end = self.cur_pos();
391
392 let mut is_for_next =
394 had_line_break_before_last || !self.state().can_have_trailing_comment();
395
396 if !had_line_break_before_last && self.input().is_byte(b';') {
398 is_for_next = false;
399 }
400
401 if self.comments_buffer().is_some() {
402 let src = unsafe {
403 self.input_mut().slice(slice_start, end)
406 };
407 let s = &src[..src.len() - 2];
408 let cmt = Comment {
409 kind: CommentKind::Block,
410 span: Span::new_with_checked(start, end),
411 text: self.atom(s),
412 };
413
414 if is_for_next {
415 self.comments_buffer_mut().unwrap().push_pending(cmt);
416 } else {
417 let pos = self.state().prev_hi();
418 self.comments_buffer_mut()
419 .unwrap()
420 .push_comment(BufferedComment {
421 kind: BufferedCommentKind::Trailing,
422 pos,
423 comment: cmt,
424 });
425 }
426 }
427
428 return;
429 } else {
430 self.bump();
432 }
433 }
434 b'\n' => {
435 should_mark_had_line_break = true;
436 self.bump();
437 }
438 b'\r' => {
439 should_mark_had_line_break = true;
440 self.bump();
441 if self.peek() == Some('\n') {
442 self.bump();
443 }
444 }
445 _ => {
446 if let Some('\u{2028}' | '\u{2029}') = self.cur() {
448 should_mark_had_line_break = true;
449 }
450 self.bump();
451 }
452 }
453 }
454 }
455
456 #[inline(never)]
460 fn skip_space<const LEX_COMMENTS: bool>(&mut self) {
461 loop {
462 let (offset, newline) = {
463 let mut skip = self::whitespace::SkipWhitespace {
464 input: self.input().as_str(),
465 newline: false,
466 offset: 0,
467 };
468
469 skip.scan();
470
471 (skip.offset, skip.newline)
472 };
473
474 self.input_mut().bump_bytes(offset as usize);
475 if newline {
476 self.state_mut().mark_had_line_break();
477 }
478
479 if LEX_COMMENTS && self.input().is_byte(b'/') {
480 if let Some(c) = self.peek() {
481 if c == '/' {
482 self.skip_line_comment(2);
483 continue;
484 } else if c == '*' {
485 self.skip_block_comment();
486 continue;
487 }
488 }
489 }
490
491 break;
492 }
493 }
494
495 fn ensure_not_ident(&mut self) -> LexResult<()> {
497 match self.cur() {
498 Some(c) if c.is_ident_start() => {
499 let span = pos_span(self.cur_pos());
500 self.error_span(span, SyntaxError::IdentAfterNum)?
501 }
502 _ => Ok(()),
503 }
504 }
505
506 fn make_legacy_octal(&mut self, start: BytePos, val: f64) -> LexResult<f64> {
507 self.ensure_not_ident()?;
508 if self.syntax().typescript() && self.target() >= EsVersion::Es5 {
509 self.emit_error(start, SyntaxError::TS1085);
510 }
511 self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
512 Ok(val)
513 }
514
515 fn read_digits<F, Ret, const RADIX: u8>(
517 &mut self,
518 mut op: F,
519 allow_num_separator: bool,
520 has_underscore: &mut bool,
521 ) -> LexResult<Ret>
522 where
523 F: FnMut(Ret, u8, u32) -> LexResult<(Ret, bool)>,
524 Ret: Copy + Default,
525 {
526 debug_assert!(
527 RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
528 "radix for read_int should be one of 2, 8, 10, 16, but got {RADIX}"
529 );
530
531 if cfg!(feature = "debug") {
532 tracing::trace!("read_digits(radix = {}), cur = {:?}", RADIX, self.cur());
533 }
534
535 let start = self.cur_pos();
536 let mut total: Ret = Default::default();
537 let mut prev = None;
538
539 while let Some(c) = self.cur() {
540 if c == '_' {
541 *has_underscore = true;
542 if allow_num_separator {
543 let is_allowed = |c: Option<char>| {
544 let Some(c) = c else {
545 return false;
546 };
547 c.is_digit(RADIX as _)
548 };
549 let is_forbidden = |c: Option<char>| {
550 let Some(c) = c else {
551 return false;
552 };
553
554 if RADIX == 16 {
555 matches!(c, '.' | 'X' | '_' | 'x')
556 } else {
557 matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
558 }
559 };
560
561 let next = self.input().peek();
562
563 if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) {
564 self.emit_error(
565 start,
566 SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits,
567 );
568 }
569
570 unsafe {
572 self.input_mut().bump();
574 }
575
576 continue;
577 }
578 }
579
580 let val = if let Some(val) = c.to_digit(RADIX as _) {
582 val
583 } else {
584 return Ok(total);
585 };
586
587 self.bump();
588
589 let (t, cont) = op(total, RADIX, val)?;
590
591 total = t;
592
593 if !cont {
594 return Ok(total);
595 }
596
597 prev = Some(c);
598 }
599
600 Ok(total)
601 }
602
603 fn read_number_no_dot_as_str<const RADIX: u8>(&mut self) -> LexResult<LazyInteger> {
608 debug_assert!(
609 RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
610 "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}"
611 );
612 let start = self.cur_pos();
613
614 let mut not_octal = false;
615 let mut read_any = false;
616 let mut has_underscore = false;
617
618 self.read_digits::<_, (), RADIX>(
619 |_, _, v| {
620 read_any = true;
621
622 if v == 8 || v == 9 {
623 not_octal = true;
624 }
625
626 Ok(((), true))
627 },
628 true,
629 &mut has_underscore,
630 )?;
631
632 if !read_any {
633 self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?;
634 }
635
636 Ok(LazyInteger {
637 start,
638 end: self.cur_pos(),
639 not_octal,
640 has_underscore,
641 })
642 }
643
644 fn read_number<const START_WITH_DOT: bool, const START_WITH_ZERO: bool>(
646 &mut self,
647 ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
648 debug_assert!(!(START_WITH_DOT && START_WITH_ZERO));
649 debug_assert!(self.cur().is_some());
650
651 let start = self.cur_pos();
652 let mut has_underscore = false;
653
654 let lazy_integer = if START_WITH_DOT {
655 debug_assert!(
657 self.cur().is_some_and(|c| c == '.'),
658 "read_number<START_WITH_DOT = true> expects current char to be '.'"
659 );
660 LazyInteger {
661 start,
662 end: start,
663 not_octal: true,
664 has_underscore: false,
665 }
666 } else {
667 debug_assert!(!START_WITH_DOT);
668 debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');
669
670 let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
672 let s = unsafe {
673 self.input_slice(lazy_integer.start, lazy_integer.end)
675 };
676
677 if (!START_WITH_ZERO || lazy_integer.end - lazy_integer.start == BytePos(1))
679 && self.eat(b'n')
680 {
681 let end = self.cur_pos();
682 let raw = unsafe {
683 self.input_slice(start, end)
685 };
686 let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), 10).unwrap();
687 return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
688 }
689
690 if START_WITH_ZERO {
691 if s.as_bytes().iter().all(|&c| c == b'0') {
694 if start.0 != self.last_pos().0 - 1 {
701 let end = self.cur_pos();
702 let raw = unsafe {
703 self.input_slice(start, end)
705 };
706 let raw = self.atom(raw);
707 return self
708 .make_legacy_octal(start, 0f64)
709 .map(|value| Either::Left((value, raw)));
710 }
711 } else if lazy_integer.not_octal {
712 self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal);
714 } else {
715 let s = remove_underscore(s, lazy_integer.has_underscore);
717 let val = parse_integer::<8>(&s);
718 let end = self.cur_pos();
719 let raw = unsafe {
720 self.input_slice(start, end)
722 };
723 let raw = self.atom(raw);
724 return self
725 .make_legacy_octal(start, val)
726 .map(|value| Either::Left((value, raw)));
727 }
728 }
729
730 lazy_integer
731 };
732
733 has_underscore |= lazy_integer.has_underscore;
734 let has_dot = self.cur() == Some('.');
737 if has_dot {
741 self.bump();
742
743 debug_assert!(!START_WITH_DOT || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
745
746 self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
748 }
749
750 let has_e = self.cur().is_some_and(|c| c == 'e' || c == 'E');
751 if has_e {
758 self.bump(); let next = match self.cur() {
761 Some(next) => next,
762 None => {
763 let pos = self.cur_pos();
764 self.error(pos, SyntaxError::NumLitTerminatedWithExp)?
765 }
766 };
767
768 if next == '+' || next == '-' {
769 self.bump(); }
771
772 let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
773 has_underscore |= lazy_integer.has_underscore;
774 }
775
776 let val = if has_dot || has_e {
777 let end = self.cur_pos();
778 let raw = unsafe {
779 self.input_slice(start, end)
781 };
782
783 let raw = remove_underscore(raw, has_underscore);
784 raw.parse().expect("failed to parse float literal")
785 } else {
786 let s = unsafe { self.input_slice(lazy_integer.start, lazy_integer.end) };
787 let s = remove_underscore(s, has_underscore);
788 parse_integer::<10>(&s)
789 };
790
791 self.ensure_not_ident()?;
792
793 let end = self.cur_pos();
794 let raw_str = unsafe {
795 self.input_slice(start, end)
797 };
798 Ok(Either::Left((val, raw_str.into())))
799 }
800
801 fn read_int_u32<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<u32>> {
802 let start = self.state().start();
803
804 let mut count = 0;
805 let v = self.read_digits::<_, Option<u32>, RADIX>(
806 |opt: Option<u32>, radix, val| {
807 count += 1;
808
809 let total = opt
810 .unwrap_or_default()
811 .checked_mul(radix as u32)
812 .and_then(|v| v.checked_add(val))
813 .ok_or_else(|| {
814 let span = Span::new_with_checked(start, start);
815 crate::error::Error::new(span, SyntaxError::InvalidUnicodeEscape)
816 })?;
817
818 Ok((Some(total), count != len))
819 },
820 true,
821 &mut false,
822 )?;
823 if len != 0 && count != len {
824 Ok(None)
825 } else {
826 Ok(v)
827 }
828 }
829
830 fn read_radix_number<const RADIX: u8>(
832 &mut self,
833 ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
834 debug_assert!(
835 RADIX == 2 || RADIX == 8 || RADIX == 16,
836 "radix should be one of 2, 8, 16, but got {RADIX}"
837 );
838 let start = self.cur_pos();
839
840 debug_assert_eq!(self.cur(), Some('0'));
841 self.bump();
842
843 debug_assert!(self
844 .cur()
845 .is_some_and(|c| matches!(c, 'b' | 'B' | 'o' | 'O' | 'x' | 'X')));
846 self.bump();
847
848 let lazy_integer = self.read_number_no_dot_as_str::<RADIX>()?;
849 let has_underscore = lazy_integer.has_underscore;
850
851 let s = unsafe {
852 self.input_slice(lazy_integer.start, lazy_integer.end)
854 };
855 if self.eat(b'n') {
856 let end = self.cur_pos();
857 let raw = unsafe {
858 self.input_slice(start, end)
860 };
861
862 let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), RADIX as _).unwrap();
863 return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
864 }
865 let s = remove_underscore(s, has_underscore);
866 let val = parse_integer::<RADIX>(&s);
867
868 self.ensure_not_ident()?;
869
870 let end = self.cur_pos();
871 let raw = unsafe {
872 self.input_slice(start, end)
874 };
875
876 Ok(Either::Left((val, self.atom(raw))))
877 }
878
879 #[cold]
883 #[inline(never)]
884 fn consume_pending_comments(&mut self) {
885 if let Some(comments) = self.comments() {
886 let last = self.state().prev_hi();
887 let start_pos = self.start_pos();
888 let comments_buffer = self.comments_buffer_mut().unwrap();
889
890 let kind = if last == start_pos {
894 BufferedCommentKind::Leading
895 } else {
896 BufferedCommentKind::Trailing
897 };
898 comments_buffer.pending_to_comment(kind, last);
900
901 for comment in comments_buffer.take_comments() {
903 match comment.kind {
904 BufferedCommentKind::Leading => {
905 comments.add_leading(comment.pos, comment.comment);
906 }
907 BufferedCommentKind::Trailing => {
908 comments.add_trailing(comment.pos, comment.comment);
909 }
910 }
911 }
912 }
913 }
914
915 fn read_jsx_word(&mut self) -> LexResult<Self::Token> {
922 debug_assert!(self.syntax().jsx());
923 debug_assert!(self.input().cur().is_some_and(|c| c.is_ident_start()));
924
925 let mut first = true;
926 let slice = self.input_uncons_while(|c| {
927 if first {
928 first = false;
929 c.is_ident_start()
930 } else {
931 c.is_ident_part() || c == '-'
932 }
933 });
934
935 Ok(Self::Token::jsx_name(slice, self))
936 }
937
938 fn read_jsx_entity(&mut self) -> LexResult<(char, String)> {
939 debug_assert!(self.syntax().jsx());
940
941 fn from_code(s: &str, radix: u32) -> LexResult<char> {
942 let c = char::from_u32(
944 u32::from_str_radix(s, radix).expect("failed to parse string as number"),
945 )
946 .expect("failed to parse number as char");
947
948 Ok(c)
949 }
950
951 fn is_hex(s: &str) -> bool {
952 s.chars().all(|c| c.is_ascii_hexdigit())
953 }
954
955 fn is_dec(s: &str) -> bool {
956 s.chars().all(|c| c.is_ascii_digit())
957 }
958
959 let mut s = SmartString::<LazyCompact>::default();
960
961 debug_assert!(self.input().cur().is_some_and(|c| c == '&'));
962 self.bump();
963
964 let start_pos = self.input().cur_pos();
965
966 for _ in 0..10 {
967 let c = match self.input().cur() {
968 Some(c) => c,
969 None => break,
970 };
971 self.bump();
972
973 if c == ';' {
974 if let Some(stripped) = s.strip_prefix('#') {
975 if stripped.starts_with('x') {
976 if is_hex(&s[2..]) {
977 let value = from_code(&s[2..], 16)?;
978
979 return Ok((value, format!("&{s};")));
980 }
981 } else if is_dec(stripped) {
982 let value = from_code(stripped, 10)?;
983
984 return Ok((value, format!("&{s};")));
985 }
986 } else if let Some(entity) = xhtml(&s) {
987 return Ok((entity, format!("&{s};")));
988 }
989
990 break;
991 }
992
993 s.push(c)
994 }
995
996 unsafe {
997 self.input_mut().reset_to(start_pos);
999 }
1000
1001 Ok(('&', "&".to_string()))
1002 }
1003
1004 fn read_jsx_new_line(&mut self, normalize_crlf: bool) -> LexResult<Either<&'static str, char>> {
1005 debug_assert!(self.syntax().jsx());
1006 let ch = self.input().cur().unwrap();
1007 self.bump();
1008
1009 let out = if ch == '\r' && self.input().cur() == Some('\n') {
1010 self.bump(); Either::Left(if normalize_crlf { "\n" } else { "\r\n" })
1012 } else {
1013 Either::Right(ch)
1014 };
1015 Ok(out)
1016 }
1017
1018 fn read_jsx_str(&mut self, quote: char) -> LexResult<Self::Token> {
1019 debug_assert!(self.syntax().jsx());
1020 let start = self.input().cur_pos();
1021 unsafe {
1022 self.input_mut().bump(); }
1025 let mut out = String::new();
1026 let mut chunk_start = self.input().cur_pos();
1027 loop {
1028 let ch = match self.input().cur() {
1029 Some(c) => c,
1030 None => {
1031 self.emit_error(start, SyntaxError::UnterminatedStrLit);
1032 break;
1033 }
1034 };
1035 let cur_pos = self.input().cur_pos();
1036 if ch == '\\' {
1037 let value = unsafe {
1038 self.input_slice(chunk_start, cur_pos)
1040 };
1041
1042 out.push_str(value);
1043 out.push('\\');
1044
1045 self.bump();
1046
1047 chunk_start = self.input().cur_pos();
1048
1049 continue;
1050 }
1051
1052 if ch == quote {
1053 break;
1054 }
1055
1056 if ch == '&' {
1057 let value = unsafe {
1058 self.input_slice(chunk_start, cur_pos)
1060 };
1061
1062 out.push_str(value);
1063
1064 let jsx_entity = self.read_jsx_entity()?;
1065
1066 out.push(jsx_entity.0);
1067
1068 chunk_start = self.input().cur_pos();
1069 } else if ch.is_line_terminator() {
1070 let value = unsafe {
1071 self.input_slice(chunk_start, cur_pos)
1073 };
1074
1075 out.push_str(value);
1076
1077 match self.read_jsx_new_line(false)? {
1078 Either::Left(s) => {
1079 out.push_str(s);
1080 }
1081 Either::Right(c) => {
1082 out.push(c);
1083 }
1084 }
1085
1086 chunk_start = cur_pos + BytePos(ch.len_utf8() as _);
1087 } else {
1088 unsafe {
1089 self.input_mut().bump();
1091 }
1092 }
1093 }
1094 let cur_pos = self.input().cur_pos();
1095 let s = unsafe {
1096 self.input_slice(chunk_start, cur_pos)
1098 };
1099 let value = if out.is_empty() {
1100 self.atom(s)
1102 } else {
1103 out.push_str(s);
1104 self.atom(out)
1105 };
1106
1107 if self.input().peek_ahead().is_some() {
1110 self.bump();
1111 }
1112
1113 let end = self.input().cur_pos();
1114 let raw = unsafe {
1115 self.input_slice(start, end)
1117 };
1118 let raw = self.atom(raw);
1119 Ok(Self::Token::str(value, raw, self))
1120 }
1121
1122 fn read_unicode_escape(&mut self) -> LexResult<Vec<Char>> {
1123 debug_assert_eq!(self.cur(), Some('u'));
1124
1125 let mut chars = Vec::with_capacity(4);
1126 let mut is_curly = false;
1127
1128 self.bump(); if self.eat(b'{') {
1131 is_curly = true;
1132 }
1133
1134 let state = self.input().cur_pos();
1135 let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
1136 Ok(Some(val)) => {
1137 if 0x0010_ffff >= val {
1138 char::from_u32(val)
1139 } else {
1140 let start = self.cur_pos();
1141
1142 self.error(
1143 start,
1144 SyntaxError::BadCharacterEscapeSequence {
1145 expected: if is_curly {
1146 "1-6 hex characters in the range 0 to 10FFFF."
1147 } else {
1148 "4 hex characters"
1149 },
1150 },
1151 )?
1152 }
1153 }
1154 _ => {
1155 let start = self.cur_pos();
1156
1157 self.error(
1158 start,
1159 SyntaxError::BadCharacterEscapeSequence {
1160 expected: if is_curly {
1161 "1-6 hex characters"
1162 } else {
1163 "4 hex characters"
1164 },
1165 },
1166 )?
1167 }
1168 };
1169
1170 match c {
1171 Some(c) => {
1172 chars.push(c.into());
1173 }
1174 _ => {
1175 unsafe {
1176 self.input_mut().reset_to(state);
1178 }
1179
1180 chars.push(Char::from('\\'));
1181 chars.push(Char::from('u'));
1182
1183 if is_curly {
1184 chars.push(Char::from('{'));
1185
1186 for _ in 0..6 {
1187 if let Some(c) = self.input().cur() {
1188 if c == '}' {
1189 break;
1190 }
1191
1192 self.bump();
1193
1194 chars.push(Char::from(c));
1195 } else {
1196 break;
1197 }
1198 }
1199
1200 chars.push(Char::from('}'));
1201 } else {
1202 for _ in 0..4 {
1203 if let Some(c) = self.input().cur() {
1204 self.bump();
1205
1206 chars.push(Char::from(c));
1207 }
1208 }
1209 }
1210 }
1211 }
1212
1213 if is_curly && !self.eat(b'}') {
1214 self.error(state, SyntaxError::InvalidUnicodeEscape)?
1215 }
1216
1217 Ok(chars)
1218 }
1219
1220 #[cold]
1221 fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
1222 if self.input().cur() != Some('#') || self.input().peek() != Some('!') {
1223 return Ok(None);
1224 }
1225 self.bump(); self.bump(); let s = self.input_uncons_while(|c| !c.is_line_terminator());
1228 Ok(Some(self.atom(s)))
1229 }
1230
1231 fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult<Self::Token> {
1232 let start = self.cur_pos();
1233
1234 let mut cooked = Ok(String::new());
1235 let mut cooked_slice_start = start;
1236 let raw_slice_start = start;
1237
1238 macro_rules! consume_cooked {
1239 () => {{
1240 if let Ok(cooked) = &mut cooked {
1241 let last_pos = self.cur_pos();
1242 cooked.push_str(unsafe {
1243 self.input_slice(cooked_slice_start, last_pos)
1246 });
1247 }
1248 }};
1249 }
1250
1251 if start == self.cur_pos() && self.state().last_was_tpl_element() {
1253 if let Some(c) = self.cur() {
1254 if c == '$' && self.peek() == Some('{') {
1255 self.bump(); self.bump(); return Ok(Self::Token::DOLLAR_LBRACE);
1258 } else if c == '`' {
1259 self.bump(); return Ok(Self::Token::BACKQUOTE);
1261 }
1262 }
1263 }
1264
1265 loop {
1267 let matched_byte = byte_search! {
1268 lexer: self,
1269 table: TEMPLATE_LITERAL_TABLE,
1270 handle_eof: {
1271 self.error(start_of_tpl, SyntaxError::UnterminatedTpl)?
1273 }
1274 };
1275
1276 match matched_byte {
1277 b'$' => {
1278 if self.peek() == Some('{') {
1280 let cooked = if cooked_slice_start == raw_slice_start {
1282 let last_pos = self.cur_pos();
1283 let s = unsafe {
1284 self.input_slice(cooked_slice_start, last_pos)
1287 };
1288 Ok(self.atom(s))
1289 } else {
1290 consume_cooked!();
1291 cooked.map(|s| self.atom(s))
1292 };
1293
1294 let end = self.input().cur_pos();
1295 let raw = unsafe {
1296 self.input_slice(raw_slice_start, end)
1299 };
1300 let raw = self.atom(raw);
1301 return Ok(Self::Token::template(cooked, raw, self));
1302 } else {
1303 self.bump();
1305 continue;
1306 }
1307 }
1308 b'`' => {
1309 let cooked = if cooked_slice_start == raw_slice_start {
1311 let last_pos = self.cur_pos();
1312 let s = unsafe { self.input_slice(cooked_slice_start, last_pos) };
1313 Ok(self.atom(s))
1314 } else {
1315 consume_cooked!();
1316 cooked.map(|s| self.atom(s))
1317 };
1318
1319 let end = self.input().cur_pos();
1320 let raw = unsafe { self.input_slice(raw_slice_start, end) };
1321 let raw = self.atom(raw);
1322 return Ok(Self::Token::template(cooked, raw, self));
1323 }
1324 b'\r' => {
1325 self.state_mut().mark_had_line_break();
1327 consume_cooked!();
1328
1329 self.bump(); if self.peek() == Some('\n') {
1332 self.bump(); }
1334
1335 if let Ok(ref mut cooked) = cooked {
1336 cooked.push('\n');
1337 }
1338 cooked_slice_start = self.cur_pos();
1339 }
1340 b'\\' => {
1341 consume_cooked!();
1343
1344 match self.read_escaped_char(true) {
1345 Ok(Some(chars)) => {
1346 if let Ok(ref mut cooked) = cooked {
1347 for c in chars {
1348 cooked.extend(c);
1349 }
1350 }
1351 }
1352 Ok(None) => {}
1353 Err(error) => {
1354 cooked = Err(error);
1355 }
1356 }
1357
1358 cooked_slice_start = self.cur_pos();
1359 }
1360 _ => unreachable!(),
1361 }
1362 }
1363 }
1364
1365 fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<Vec<Char>>> {
1369 debug_assert_eq!(self.cur(), Some('\\'));
1370
1371 let start = self.cur_pos();
1372
1373 self.bump(); let c = match self.cur() {
1376 Some(c) => c,
1377 None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
1378 };
1379
1380 let c = match c {
1381 '\\' => '\\',
1382 'n' => '\n',
1383 'r' => '\r',
1384 't' => '\t',
1385 'b' => '\u{0008}',
1386 'v' => '\u{000b}',
1387 'f' => '\u{000c}',
1388 '\r' => {
1389 self.bump(); self.eat(b'\n');
1392
1393 return Ok(None);
1394 }
1395 '\n' | '\u{2028}' | '\u{2029}' => {
1396 self.bump();
1397
1398 return Ok(None);
1399 }
1400
1401 'x' => {
1403 self.bump(); match self.read_int_u32::<16>(2)? {
1406 Some(val) => return Ok(Some(vec![Char::from(val)])),
1407 None => self.error(
1408 start,
1409 SyntaxError::BadCharacterEscapeSequence {
1410 expected: "2 hex characters",
1411 },
1412 )?,
1413 }
1414 }
1415
1416 'u' => match self.read_unicode_escape() {
1418 Ok(chars) => return Ok(Some(chars)),
1419 Err(err) => self.error(start, err.into_kind())?,
1420 },
1421
1422 '0'..='7' => {
1424 self.bump();
1425
1426 let first_c = if c == '0' {
1427 match self.cur() {
1428 Some(next) if next.is_digit(8) => c,
1429 _ => return Ok(Some(vec!['\u{0000}'.into()])),
1431 }
1432 } else {
1433 c
1434 };
1435
1436 if in_template {
1438 self.error(start, SyntaxError::LegacyOctal)?
1439 }
1440
1441 self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
1442
1443 let mut value: u8 = first_c.to_digit(8).unwrap() as u8;
1444
1445 macro_rules! one {
1446 ($check:expr) => {{
1447 let cur = self.cur();
1448
1449 match cur.and_then(|c| c.to_digit(8)) {
1450 Some(v) => {
1451 value = if $check {
1452 let new_val = value
1453 .checked_mul(8)
1454 .and_then(|value| value.checked_add(v as u8));
1455 match new_val {
1456 Some(val) => val,
1457 None => return Ok(Some(vec![Char::from(value as char)])),
1458 }
1459 } else {
1460 value * 8 + v as u8
1461 };
1462
1463 self.bump();
1464 }
1465 _ => return Ok(Some(vec![Char::from(value as u32)])),
1466 }
1467 }};
1468 }
1469
1470 one!(false);
1471 one!(true);
1472
1473 return Ok(Some(vec![Char::from(value as char)]));
1474 }
1475 _ => c,
1476 };
1477
1478 unsafe {
1479 self.input_mut().bump();
1481 }
1482
1483 Ok(Some(vec![c.into()]))
1484 }
1485
1486 fn read_regexp(&mut self, start: BytePos) -> LexResult<Self::Token> {
1488 unsafe {
1489 self.input_mut().reset_to(start);
1491 }
1492
1493 debug_assert_eq!(self.cur(), Some('/'));
1494
1495 let start = self.cur_pos();
1496
1497 self.bump(); let slice_start = self.cur_pos();
1500
1501 let (mut escaped, mut in_class) = (false, false);
1502
1503 while let Some(c) = self.cur() {
1504 if c.is_line_terminator() {
1507 let span = self.span(start);
1508
1509 return Err(crate::error::Error::new(
1510 span,
1511 SyntaxError::UnterminatedRegExp,
1512 ));
1513 }
1514
1515 if escaped {
1516 escaped = false;
1517 } else {
1518 match c {
1519 '[' => in_class = true,
1520 ']' if in_class => in_class = false,
1521 '/' if !in_class => break,
1523 _ => {}
1524 }
1525
1526 escaped = c == '\\';
1527 }
1528
1529 self.bump();
1530 }
1531
1532 let content = {
1533 let end = self.cur_pos();
1534 let s = unsafe { self.input_slice(slice_start, end) };
1535 self.atom(s)
1536 };
1537
1538 if !self.is(b'/') {
1540 let span = self.span(start);
1541
1542 return Err(crate::error::Error::new(
1543 span,
1544 SyntaxError::UnterminatedRegExp,
1545 ));
1546 }
1547
1548 self.bump(); let flags = {
1557 match self.cur() {
1558 Some(c) if c.is_ident_start() => self
1559 .read_word_as_str_with()
1560 .map(|(s, _)| Some(self.atom(s))),
1561 _ => Ok(None),
1562 }
1563 }?
1564 .unwrap_or_default();
1565
1566 Ok(Self::Token::regexp(content, flags, self))
1567 }
1568
1569 fn read_word_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
1571 debug_assert!(self.cur().is_some());
1572 let slice_start = self.cur_pos();
1573
1574 if let Some(c) = self.input().cur_as_ascii() {
1576 if Ident::is_valid_ascii_start(c) {
1577 self.bump();
1579
1580 let next_byte = byte_search! {
1582 lexer: self,
1583 table: NOT_ASCII_ID_CONTINUE_TABLE,
1584 handle_eof: {
1585 let end = self.cur_pos();
1587 let s = unsafe {
1588 self.input_slice(slice_start, end)
1591 };
1592
1593 return Ok((Cow::Borrowed(s), false));
1594 },
1595 };
1596
1597 if !next_byte.is_ascii() {
1599 return self.read_word_as_str_with_slow_path(slice_start);
1601 } else if next_byte == b'\\' {
1602 return self.read_word_as_str_with_slow_path(slice_start);
1604 } else {
1605 let end = self.cur_pos();
1607 let s = unsafe {
1608 self.input_slice(slice_start, end)
1611 };
1612
1613 return Ok((Cow::Borrowed(s), false));
1614 }
1615 }
1616 }
1617
1618 self.read_word_as_str_with_slow_path(slice_start)
1620 }
1621
1622 #[cold]
1624 fn read_word_as_str_with_slow_path(
1625 &mut self,
1626 mut slice_start: BytePos,
1627 ) -> LexResult<(Cow<'a, str>, bool)> {
1628 let mut first = true;
1629 let mut has_escape = false;
1630
1631 let mut buf = String::with_capacity(16);
1632 loop {
1633 if let Some(c) = self.input().cur_as_ascii() {
1634 if Ident::is_valid_ascii_continue(c) {
1635 self.bump();
1636 continue;
1637 } else if first && Ident::is_valid_ascii_start(c) {
1638 self.bump();
1639 first = false;
1640 continue;
1641 }
1642
1643 if c == b'\\' {
1645 first = false;
1646 has_escape = true;
1647 let start = self.cur_pos();
1648 self.bump();
1649
1650 if !self.is(b'u') {
1651 self.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)?
1652 }
1653
1654 {
1655 let end = self.input().cur_pos();
1656 let s = unsafe {
1657 self.input_slice(slice_start, start)
1660 };
1661 buf.push_str(s);
1662 unsafe {
1663 self.input_mut().reset_to(end);
1665 }
1666 }
1667
1668 let chars = self.read_unicode_escape()?;
1669
1670 if let Some(c) = chars.first() {
1671 let valid = if first {
1672 c.is_ident_start()
1673 } else {
1674 c.is_ident_part()
1675 };
1676
1677 if !valid {
1678 self.emit_error(start, SyntaxError::InvalidIdentChar);
1679 }
1680 }
1681
1682 for c in chars {
1683 buf.extend(c);
1684 }
1685
1686 slice_start = self.cur_pos();
1687 continue;
1688 }
1689
1690 break;
1692 } else if let Some(c) = self.input().cur() {
1693 if Ident::is_valid_non_ascii_continue(c) {
1694 self.bump();
1695 continue;
1696 } else if first && Ident::is_valid_non_ascii_start(c) {
1697 self.bump();
1698 first = false;
1699 continue;
1700 }
1701 }
1702
1703 break;
1704 }
1705
1706 let end = self.cur_pos();
1707 let s = unsafe {
1708 self.input_slice(slice_start, end)
1711 };
1712 let value = if !has_escape {
1713 Cow::Borrowed(s)
1715 } else {
1716 buf.push_str(s);
1717 Cow::Owned(buf)
1718 };
1719
1720 Ok((value, has_escape))
1721 }
1722
1723 fn read_token_number_sign(&mut self) -> LexResult<Self::Token> {
1725 debug_assert!(self.cur().is_some_and(|c| c == '#'));
1726
1727 self.bump(); debug_assert!(
1732 !self.input().is_at_start() || self.cur() != Some('!'),
1733 "#! should have already been handled by read_shebang()"
1734 );
1735 Ok(Self::Token::HASH)
1736 }
1737
1738 #[inline(never)]
1742 fn read_token_dot(&mut self) -> LexResult<Self::Token> {
1743 debug_assert!(self.cur().is_some_and(|c| c == '.'));
1744 let next = match self.input().peek() {
1746 Some(next) => next,
1747 None => {
1748 self.bump(); return Ok(Self::Token::DOT);
1750 }
1751 };
1752 if next.is_ascii_digit() {
1753 return self.read_number::<true, false>().map(|v| match v {
1754 Left((value, raw)) => Self::Token::num(value, raw, self),
1755 Right(_) => unreachable!("read_number should not return bigint for leading dot"),
1756 });
1757 }
1758
1759 self.bump(); if next == '.' && self.input().peek() == Some('.') {
1762 self.bump(); self.bump(); return Ok(Self::Token::DOTDOTDOT);
1766 }
1767
1768 Ok(Self::Token::DOT)
1769 }
1770
1771 #[inline(never)]
1775 fn read_token_question_mark(&mut self) -> LexResult<Self::Token> {
1776 debug_assert!(self.cur().is_some_and(|c| c == '?'));
1777 self.bump();
1778 if self.input_mut().eat_byte(b'?') {
1779 if self.input_mut().eat_byte(b'=') {
1780 Ok(Self::Token::NULLISH_ASSIGN)
1781 } else {
1782 Ok(Self::Token::NULLISH_COALESCING)
1783 }
1784 } else {
1785 Ok(Self::Token::QUESTION)
1786 }
1787 }
1788
1789 #[inline(never)]
1793 fn read_token_colon(&mut self) -> LexResult<Self::Token> {
1794 debug_assert!(self.cur().is_some_and(|c| c == ':'));
1795 self.bump(); Ok(Self::Token::COLON)
1797 }
1798
1799 #[inline(never)]
1803 fn read_token_zero(&mut self) -> LexResult<Self::Token> {
1804 debug_assert_eq!(self.cur(), Some('0'));
1805 let next = self.input().peek();
1806
1807 let bigint = match next {
1808 Some('x') | Some('X') => self.read_radix_number::<16>(),
1809 Some('o') | Some('O') => self.read_radix_number::<8>(),
1810 Some('b') | Some('B') => self.read_radix_number::<2>(),
1811 _ => {
1812 return self.read_number::<false, true>().map(|v| match v {
1813 Left((value, raw)) => Self::Token::num(value, raw, self),
1814 Right((value, raw)) => Self::Token::bigint(value, raw, self),
1815 });
1816 }
1817 };
1818
1819 bigint.map(|v| match v {
1820 Left((value, raw)) => Self::Token::num(value, raw, self),
1821 Right((value, raw)) => Self::Token::bigint(value, raw, self),
1822 })
1823 }
1824
1825 #[inline(never)]
1829 fn read_token_logical<const C: u8>(&mut self) -> LexResult<Self::Token> {
1830 debug_assert!(C == b'|' || C == b'&');
1831 let is_bit_and = C == b'&';
1832 let had_line_break_before_last = self.had_line_break_before_last();
1833 let start = self.cur_pos();
1834
1835 unsafe {
1836 self.input_mut().bump();
1838 }
1839 let token = if is_bit_and {
1840 Self::Token::BIT_AND
1841 } else {
1842 Self::Token::BIT_OR
1843 };
1844
1845 if self.input_mut().eat_byte(b'=') {
1847 return Ok(if is_bit_and {
1848 Self::Token::BIT_AND_EQ
1849 } else {
1850 debug_assert!(token.is_bit_or());
1851 Self::Token::BIT_OR_EQ
1852 });
1853 }
1854
1855 if self.input().cur() == Some(C as char) {
1857 unsafe {
1858 self.input_mut().bump();
1860 }
1861
1862 if self.input().cur() == Some('=') {
1863 unsafe {
1864 self.input_mut().bump();
1866 }
1867
1868 return Ok(if is_bit_and {
1869 Self::Token::LOGICAL_AND_EQ
1870 } else {
1871 debug_assert!(token.is_bit_or());
1872 Self::Token::LOGICAL_OR_EQ
1873 });
1874 }
1875
1876 if had_line_break_before_last && !is_bit_and && self.is_str("||||| ") {
1879 let span = fixed_len_span(start, 7);
1880 self.emit_error_span(span, SyntaxError::TS1185);
1881 self.skip_line_comment(5);
1882 self.skip_space::<true>();
1883 return self.error_span(span, SyntaxError::TS1185);
1884 }
1885
1886 return Ok(if is_bit_and {
1887 Self::Token::LOGICAL_AND
1888 } else {
1889 debug_assert!(token.is_bit_or());
1890 Self::Token::LOGICAL_OR
1891 });
1892 }
1893
1894 Ok(token)
1895 }
1896
1897 #[inline(never)]
1901 fn read_token_mul_mod(&mut self, is_mul: bool) -> LexResult<Self::Token> {
1902 debug_assert!(self.cur().is_some_and(|c| c == '*' || c == '%'));
1903 self.bump();
1904 let token = if is_mul {
1905 if self.input_mut().eat_byte(b'*') {
1906 Self::Token::EXP
1908 } else {
1909 Self::Token::MUL
1910 }
1911 } else {
1912 Self::Token::MOD
1913 };
1914
1915 Ok(if self.input_mut().eat_byte(b'=') {
1916 if token.is_star() {
1917 Self::Token::MUL_EQ
1918 } else if token.is_mod() {
1919 Self::Token::MOD_EQ
1920 } else {
1921 debug_assert!(token.is_exp());
1922 Self::Token::EXP_EQ
1923 }
1924 } else {
1925 token
1926 })
1927 }
1928
1929 #[inline(never)]
1930 fn read_slash(&mut self) -> LexResult<Self::Token> {
1931 debug_assert_eq!(self.cur(), Some('/'));
1932 self.bump(); Ok(if self.eat(b'=') {
1934 Self::Token::DIV_EQ
1935 } else {
1936 Self::Token::DIV
1937 })
1938 }
1939
1940 fn read_ident_unknown(&mut self) -> LexResult<Self::Token> {
1943 debug_assert!(self.cur().is_some());
1944
1945 let (s, has_escape) = self.read_word_as_str_with()?;
1946 let atom = self.atom(s);
1947 let word = Self::Token::unknown_ident(atom, self);
1948
1949 if has_escape {
1950 self.update_token_flags(|flags| *flags |= TokenFlags::UNICODE);
1951 }
1952
1953 Ok(word)
1954 }
1955
1956 fn read_str_lit(&mut self) -> LexResult<Self::Token> {
1959 debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
1960 let start = self.cur_pos();
1961 let quote = self.cur().unwrap() as u8;
1962
1963 self.bump(); let mut slice_start = self.input().cur_pos();
1966
1967 let mut buf: Option<String> = None;
1968
1969 loop {
1970 let table = if quote == b'"' {
1971 &DOUBLE_QUOTE_STRING_END_TABLE
1972 } else {
1973 &SINGLE_QUOTE_STRING_END_TABLE
1974 };
1975
1976 let fast_path_result = byte_search! {
1977 lexer: self,
1978 table: table,
1979 handle_eof: {
1980 let value_end = self.cur_pos();
1981 let s = unsafe {
1982 self.input_slice(slice_start, value_end)
1985 };
1986
1987 self.emit_error(start, SyntaxError::UnterminatedStrLit);
1988
1989 let end = self.cur_pos();
1990 let raw = unsafe { self.input_slice(start, end) };
1991 return Ok(Self::Token::str(self.atom(s), self.atom(raw), self));
1992 },
1993 };
1994
1995 match fast_path_result {
1996 b'"' | b'\'' if fast_path_result == quote => {
1997 let value_end = self.cur_pos();
1998
1999 let value = if let Some(buf) = buf.as_mut() {
2000 debug_assert!(unsafe { self.input_slice(start, value_end).contains('\\') });
2002 let s = unsafe {
2003 self.input_slice(slice_start, value_end)
2006 };
2007 buf.push_str(s);
2008 self.atom(&*buf)
2009 } else {
2010 let s = unsafe { self.input_slice(slice_start, value_end) };
2011 self.atom(s)
2012 };
2013
2014 unsafe {
2015 self.input_mut().bump();
2017 }
2018
2019 let end = self.cur_pos();
2020 let raw = unsafe {
2021 self.input_slice(start, end)
2024 };
2025 let raw = self.atom(raw);
2026 return Ok(Self::Token::str(value, raw, self));
2027 }
2028 b'\\' => {
2029 let end = self.cur_pos();
2030 let s = unsafe {
2031 self.input_slice(slice_start, end)
2034 };
2035
2036 if buf.is_none() {
2037 buf = Some(s.to_string());
2038 } else {
2039 buf.as_mut().unwrap().push_str(s);
2040 }
2041
2042 if let Some(chars) = self.read_escaped_char(false)? {
2043 for c in chars {
2044 buf.as_mut().unwrap().extend(c);
2045 }
2046 }
2047
2048 slice_start = self.cur_pos();
2049 continue;
2050 }
2051 b'\n' | b'\r' => {
2052 let end = self.cur_pos();
2053 let s = unsafe {
2054 self.input_slice(slice_start, end)
2057 };
2058
2059 self.emit_error(start, SyntaxError::UnterminatedStrLit);
2060
2061 let end = self.cur_pos();
2062
2063 let raw = unsafe {
2064 self.input_slice(start, end)
2067 };
2068 return Ok(Self::Token::str(self.atom(s), self.atom(raw), self));
2069 }
2070 _ => self.bump(),
2071 }
2072 }
2073 }
2074
2075 fn read_keyword_with(
2076 &mut self,
2077 convert: &dyn Fn(&str) -> Option<Self::Token>,
2078 ) -> LexResult<Self::Token> {
2079 debug_assert!(self.cur().is_some());
2080
2081 let start = self.cur_pos();
2082 let (s, has_escape) = self.read_keyword_as_str_with()?;
2083 if let Some(word) = convert(s.as_ref()) {
2084 if has_escape && word.is_reserved(self.ctx()) {
2089 self.error(
2090 start,
2091 SyntaxError::EscapeInReservedWord { word: Atom::new(s) },
2092 )
2093 } else {
2094 Ok(word)
2095 }
2096 } else {
2097 let atom = self.atom(s);
2098 Ok(Self::Token::unknown_ident(atom, self))
2099 }
2100 }
2101
2102 fn read_keyword_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
2106 let slice_start = self.cur_pos();
2107
2108 self.bump();
2112
2113 let next_byte = byte_search! {
2115 lexer: self,
2116 table: NOT_ASCII_ID_CONTINUE_TABLE,
2117 handle_eof: {
2118 let end = self.cur_pos();
2120 let s = unsafe {
2121 self.input_slice(slice_start, end)
2124 };
2125
2126 return Ok((Cow::Borrowed(s), false));
2127 },
2128 };
2129
2130 if !next_byte.is_ascii() || next_byte == b'\\' {
2132 self.read_word_as_str_with_slow_path(slice_start)
2135 } else {
2136 let end = self.cur_pos();
2138 let s = unsafe {
2139 self.input_slice(slice_start, end)
2142 };
2143
2144 Ok((Cow::Borrowed(s), false))
2145 }
2146 }
2147}
2148
2149pub fn pos_span(p: BytePos) -> Span {
2150 Span::new_with_checked(p, p)
2151}
2152
2153pub fn fixed_len_span(p: BytePos, len: u32) -> Span {
2154 Span::new_with_checked(p, p + BytePos(len))
2155}