swc_ecma_lexer/common/lexer/
mod.rs

1use std::borrow::Cow;
2
3use char::CharExt;
4use either::Either::{self, Left, Right};
5use num_bigint::BigInt as BigIntValue;
6use smartstring::{LazyCompact, SmartString};
7use state::State;
8use swc_atoms::{
9    wtf8::{CodePoint, Wtf8, Wtf8Buf},
10    Atom,
11};
12use swc_common::{
13    comments::{Comment, CommentKind},
14    input::{Input, StringInput},
15    BytePos, Span,
16};
17use swc_ecma_ast::{EsVersion, Ident};
18
19use self::jsx::xhtml;
20use super::{context::Context, input::Tokens};
21use crate::{
22    common::lexer::{
23        comments_buffer::{BufferedComment, BufferedCommentKind, CommentsBufferTrait},
24        number::{parse_integer, LazyInteger},
25    },
26    error::SyntaxError,
27    lexer::TokenFlags,
28};
29
30pub mod char;
31pub mod comments_buffer;
32mod jsx;
33pub mod number;
34mod search;
35pub mod state;
36pub mod token;
37pub mod whitespace;
38
39use token::TokenFactory;
40
41// Byte-search utilities
42use self::search::SafeByteMatchTable;
43use crate::{byte_search, safe_byte_match_table};
44
45// ===== Byte match tables for comment scanning =====
46// Irregular line breaks - '\u{2028}' (LS) and '\u{2029}' (PS)
47const LS_OR_PS_FIRST: u8 = 0xe2;
48const LS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa8];
49const PS_BYTES_2_AND_3: [u8; 2] = [0x80, 0xa9];
50
51static LINE_BREAK_TABLE: SafeByteMatchTable =
52    safe_byte_match_table!(|b| matches!(b, b'\n' | b'\r' | LS_OR_PS_FIRST));
53
54static BLOCK_COMMENT_SCAN_TABLE: SafeByteMatchTable =
55    safe_byte_match_table!(|b| { matches!(b, b'*' | b'\n' | b'\r' | LS_OR_PS_FIRST) });
56
57static DOUBLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
58    safe_byte_match_table!(|b| matches!(b, b'"' | b'\n' | b'\\' | b'\r'));
59static SINGLE_QUOTE_STRING_END_TABLE: SafeByteMatchTable =
60    safe_byte_match_table!(|b| matches!(b, b'\'' | b'\n' | b'\\' | b'\r'));
61
62static NOT_ASCII_ID_CONTINUE_TABLE: SafeByteMatchTable =
63    safe_byte_match_table!(|b| !(b.is_ascii_alphanumeric() || b == b'_' || b == b'$'));
64
65static TEMPLATE_LITERAL_TABLE: SafeByteMatchTable =
66    safe_byte_match_table!(|b| matches!(b, b'$' | b'`' | b'\\' | b'\r'));
67
68/// Converts UTF-16 surrogate pair to Unicode code point.
69/// `https://tc39.es/ecma262/#sec-utf16decodesurrogatepair`
70#[inline]
71const fn pair_to_code_point(high: u32, low: u32) -> u32 {
72    (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000
73}
74
75/// A Unicode escape sequence.
76///
77/// `\u Hex4Digits`, `\u Hex4Digits \u Hex4Digits`, or `\u{ HexDigits }`.
78#[derive(Debug)]
79pub enum UnicodeEscape {
80    // `\u Hex4Digits` or `\u{ HexDigits }`, which forms a valid Unicode code point.
81    // Char cannot be in range 0xD800..=0xDFFF.
82    CodePoint(char),
83    // `\u Hex4Digits \u Hex4Digits`, which forms a valid Unicode astral code point.
84    // Char is in the range 0x10000..=0x10FFFF.
85    SurrogatePair(char),
86    // `\u Hex4Digits` or `\u{ HexDigits }`, which forms an invalid Unicode code point.
87    // Code unit is in the range 0xD800..=0xDFFF.
88    LoneSurrogate(u32),
89}
90
91impl From<UnicodeEscape> for CodePoint {
92    fn from(value: UnicodeEscape) -> Self {
93        match value {
94            UnicodeEscape::CodePoint(c) | UnicodeEscape::SurrogatePair(c) => {
95                CodePoint::from_char(c)
96            }
97            UnicodeEscape::LoneSurrogate(u) => unsafe { CodePoint::from_u32_unchecked(u) },
98        }
99    }
100}
101
102pub type LexResult<T> = swc_ecma_parser::lexer::LexResult<T>;
103
104fn remove_underscore(s: &str, has_underscore: bool) -> Cow<'_, str> {
105    if has_underscore {
106        debug_assert!(s.contains('_'));
107        s.chars().filter(|&c| c != '_').collect::<String>().into()
108    } else {
109        debug_assert!(!s.contains('_'));
110        Cow::Borrowed(s)
111    }
112}
113
114pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
115    type State: self::state::State;
116    type Token: token::TokenFactory<'a, TokenAndSpan, Self, Lexer = Self>;
117    type CommentsBuffer: CommentsBufferTrait;
118
119    fn input(&self) -> &StringInput<'a>;
120    fn input_mut(&mut self) -> &mut StringInput<'a>;
121    fn state(&self) -> &Self::State;
122    fn state_mut(&mut self) -> &mut Self::State;
123    fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments>;
124    fn comments_buffer(&self) -> Option<&Self::CommentsBuffer>;
125    fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer>;
126    /// # Safety
127    ///
128    /// We know that the start and the end are valid
129    unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str;
130    fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str;
131    fn atom<'b>(&self, s: impl Into<Cow<'b, str>>) -> swc_atoms::Atom;
132    fn wtf8_atom<'b>(&self, s: impl Into<Cow<'b, Wtf8>>) -> swc_atoms::Wtf8Atom;
133    fn push_error(&mut self, error: crate::error::Error);
134
135    #[inline(always)]
136    #[allow(clippy::misnamed_getters)]
137    fn had_line_break_before_last(&self) -> bool {
138        self.state().had_line_break()
139    }
140
141    #[inline(always)]
142    fn span(&self, start: BytePos) -> Span {
143        let end = self.last_pos();
144        if cfg!(debug_assertions) && start > end {
145            unreachable!(
146                "assertion failed: (span.start <= span.end).
147 start = {}, end = {}",
148                start.0, end.0
149            )
150        }
151        Span { lo: start, hi: end }
152    }
153
154    #[inline(always)]
155    fn bump(&mut self) {
156        unsafe {
157            // Safety: Actually this is not safe but this is an internal method.
158            self.input_mut().bump()
159        }
160    }
161
162    #[inline(always)]
163    fn is(&self, c: u8) -> bool {
164        self.input().is_byte(c)
165    }
166
167    #[inline(always)]
168    fn is_str(&self, s: &str) -> bool {
169        self.input().is_str(s)
170    }
171
172    #[inline(always)]
173    fn eat(&mut self, c: u8) -> bool {
174        self.input_mut().eat_byte(c)
175    }
176
177    #[inline(always)]
178    fn cur(&self) -> Option<char> {
179        self.input().cur()
180    }
181
182    #[inline(always)]
183    fn peek(&self) -> Option<char> {
184        self.input().peek()
185    }
186
187    #[inline(always)]
188    fn peek_ahead(&self) -> Option<char> {
189        self.input().peek_ahead()
190    }
191
192    #[inline(always)]
193    fn cur_pos(&self) -> BytePos {
194        self.input().cur_pos()
195    }
196
197    #[inline(always)]
198    fn last_pos(&self) -> BytePos {
199        self.input().last_pos()
200    }
201
202    /// Shorthand for `let span = self.span(start); self.error_span(span)`
203    #[cold]
204    #[inline(never)]
205    fn error<T>(&self, start: BytePos, kind: SyntaxError) -> LexResult<T> {
206        let span = self.span(start);
207        self.error_span(span, kind)
208    }
209
210    #[cold]
211    #[inline(never)]
212    fn error_span<T>(&self, span: Span, kind: SyntaxError) -> LexResult<T> {
213        Err(crate::error::Error::new(span, kind))
214    }
215
216    #[cold]
217    #[inline(never)]
218    fn emit_error(&mut self, start: BytePos, kind: SyntaxError) {
219        let span = self.span(start);
220        self.emit_error_span(span, kind)
221    }
222
223    #[cold]
224    #[inline(never)]
225    fn emit_error_span(&mut self, span: Span, kind: SyntaxError) {
226        if self.ctx().contains(Context::IgnoreError) {
227            return;
228        }
229        tracing::warn!("Lexer error at {:?}", span);
230        let err = crate::error::Error::new(span, kind);
231        self.push_error(err);
232    }
233
234    #[cold]
235    #[inline(never)]
236    fn emit_strict_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
237        let span = self.span(start);
238        if self.ctx().contains(Context::Strict) {
239            self.emit_error_span(span, kind);
240        } else {
241            let err = crate::error::Error::new(span, kind);
242            self.add_module_mode_error(err);
243        }
244    }
245
246    #[cold]
247    #[inline(never)]
248    fn emit_module_mode_error(&mut self, start: BytePos, kind: SyntaxError) {
249        let span = self.span(start);
250        let err = crate::error::Error::new(span, kind);
251        self.add_module_mode_error(err);
252    }
253
254    #[inline(never)]
255    fn skip_line_comment(&mut self, start_skip: usize) {
256        // Position after the initial `//` (or similar)
257        let start = self.cur_pos();
258        self.input_mut().bump_bytes(start_skip);
259        let slice_start = self.cur_pos();
260
261        // foo // comment for foo
262        // bar
263        //
264        // foo
265        // // comment for bar
266        // bar
267        //
268        let is_for_next =
269            self.state().had_line_break() || !self.state().can_have_trailing_line_comment();
270
271        // Fast search for line-terminator
272        byte_search! {
273            lexer: self,
274            table: LINE_BREAK_TABLE,
275            continue_if: (matched_byte, pos_offset) {
276                if matched_byte != LS_OR_PS_FIRST {
277                    // '\r' or '\n' - definitely a line terminator
278                    false
279                } else {
280                    // 0xE2 - could be LS/PS or some other Unicode character
281                    // Check the next 2 bytes to see if it's really LS/PS
282                    let current_slice = self.input().as_str();
283                    let byte_pos = pos_offset;
284                    if byte_pos + 2 < current_slice.len() {
285                        let bytes = current_slice.as_bytes();
286                        let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
287                        if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
288                            // It's a real line terminator
289                            false
290                        } else {
291                            // Some other Unicode character starting with 0xE2
292                            true
293                        }
294                    } else {
295                        // Not enough bytes for full LS/PS sequence
296                        true
297                    }
298                }
299            },
300            handle_eof: {
301                // Reached EOF – entire remainder is comment
302                let end = self.input().end_pos();
303
304                if self.comments_buffer().is_some() {
305                    let s = unsafe { self.input_slice(slice_start, end) };
306                    let cmt = swc_common::comments::Comment {
307                        kind: swc_common::comments::CommentKind::Line,
308                        span: Span::new_with_checked(start, end),
309                        text: self.atom(s),
310                    };
311
312                    if is_for_next {
313                        self.comments_buffer_mut().unwrap().push_pending(cmt);
314                    } else {
315                        let pos = self.state().prev_hi();
316                        self.comments_buffer_mut().unwrap().push_comment(BufferedComment {
317                            kind: BufferedCommentKind::Trailing,
318                            pos,
319                            comment: cmt,
320                        });
321                    }
322                }
323
324                return;
325            }
326        };
327
328        // Current position is at the line terminator
329        let end = self.cur_pos();
330
331        // Create and process slice only if comments need to be stored
332        if self.comments_buffer().is_some() {
333            let s = unsafe {
334                // Safety: We know that the start and the end are valid
335                self.input_slice(slice_start, end)
336            };
337            let cmt = swc_common::comments::Comment {
338                kind: swc_common::comments::CommentKind::Line,
339                span: Span::new_with_checked(start, end),
340                text: self.atom(s),
341            };
342
343            if is_for_next {
344                self.comments_buffer_mut().unwrap().push_pending(cmt);
345            } else {
346                let pos = self.state().prev_hi();
347                self.comments_buffer_mut()
348                    .unwrap()
349                    .push_comment(BufferedComment {
350                        kind: BufferedCommentKind::Trailing,
351                        pos,
352                        comment: cmt,
353                    });
354            }
355        }
356
357        unsafe {
358            // Safety: We got end from self.input
359            self.input_mut().reset_to(end);
360        }
361    }
362
363    /// Expects current char to be '/' and next char to be '*'.
364    fn skip_block_comment(&mut self) {
365        let start = self.cur_pos();
366
367        debug_assert_eq!(self.cur(), Some('/'));
368        debug_assert_eq!(self.peek(), Some('*'));
369
370        // Consume initial "/*"
371        self.input_mut().bump_bytes(2);
372
373        // jsdoc
374        let slice_start = self.cur_pos();
375
376        let had_line_break_before_last = self.had_line_break_before_last();
377        let mut should_mark_had_line_break = false;
378
379        loop {
380            let matched_byte = byte_search! {
381                lexer: self,
382                table: BLOCK_COMMENT_SCAN_TABLE,
383                continue_if: (matched_byte, pos_offset) {
384                    if matched_byte == LS_OR_PS_FIRST {
385                        // 0xE2 - could be LS/PS or some other Unicode character
386                        let current_slice = self.input().as_str();
387                        let byte_pos = pos_offset;
388                        if byte_pos + 2 < current_slice.len() {
389                            let bytes = current_slice.as_bytes();
390                            let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
391                            if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
392                                // It's a real line terminator - don't continue
393                                false
394                            } else {
395                                // Some other Unicode character starting with 0xE2
396                                true
397                            }
398                        } else {
399                            // Not enough bytes for full LS/PS sequence
400                            true
401                        }
402                    } else {
403                        // '*', '\r', or '\n' - don't continue
404                        false
405                    }
406                },
407                handle_eof: {
408                    if should_mark_had_line_break {
409                        self.state_mut().mark_had_line_break();
410                    }
411                    let end_pos = self.input().end_pos();
412                    let span = Span::new_with_checked(end_pos, end_pos);
413                    self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
414                    return;
415                }
416            };
417
418            match matched_byte {
419                b'*' => {
420                    if self.peek() == Some('/') {
421                        // Consume "*/"
422                        self.input_mut().bump_bytes(2);
423
424                        if should_mark_had_line_break {
425                            self.state_mut().mark_had_line_break();
426                        }
427
428                        let end = self.cur_pos();
429
430                        // Decide trailing / leading
431                        let mut is_for_next =
432                            had_line_break_before_last || !self.state().can_have_trailing_comment();
433
434                        // If next char is ';' without newline, treat as trailing
435                        if !had_line_break_before_last && self.input().is_byte(b';') {
436                            is_for_next = false;
437                        }
438
439                        if self.comments_buffer().is_some() {
440                            let src = unsafe {
441                                // Safety: We got slice_start and end from self.input so those are
442                                // valid.
443                                self.input_mut().slice(slice_start, end)
444                            };
445                            let s = &src[..src.len() - 2];
446                            let cmt = Comment {
447                                kind: CommentKind::Block,
448                                span: Span::new_with_checked(start, end),
449                                text: self.atom(s),
450                            };
451
452                            if is_for_next {
453                                self.comments_buffer_mut().unwrap().push_pending(cmt);
454                            } else {
455                                let pos = self.state().prev_hi();
456                                self.comments_buffer_mut()
457                                    .unwrap()
458                                    .push_comment(BufferedComment {
459                                        kind: BufferedCommentKind::Trailing,
460                                        pos,
461                                        comment: cmt,
462                                    });
463                            }
464                        }
465
466                        return;
467                    } else {
468                        // Just a lone '*', consume it and continue.
469                        self.bump();
470                    }
471                }
472                b'\n' => {
473                    should_mark_had_line_break = true;
474                    self.bump();
475                }
476                b'\r' => {
477                    should_mark_had_line_break = true;
478                    self.bump();
479                    if self.peek() == Some('\n') {
480                        self.bump();
481                    }
482                }
483                _ => {
484                    // Unicode line terminator (LS/PS) or other character
485                    if let Some('\u{2028}' | '\u{2029}') = self.cur() {
486                        should_mark_had_line_break = true;
487                    }
488                    self.bump();
489                }
490            }
491        }
492    }
493
494    /// Skip comments or whitespaces.
495    ///
496    /// See https://tc39.github.io/ecma262/#sec-white-space
497    #[inline(never)]
498    fn skip_space<const LEX_COMMENTS: bool>(&mut self) {
499        loop {
500            let (offset, newline) = {
501                let mut skip = self::whitespace::SkipWhitespace {
502                    input: self.input().as_str(),
503                    newline: false,
504                    offset: 0,
505                };
506
507                skip.scan();
508
509                (skip.offset, skip.newline)
510            };
511
512            self.input_mut().bump_bytes(offset as usize);
513            if newline {
514                self.state_mut().mark_had_line_break();
515            }
516
517            if LEX_COMMENTS && self.input().is_byte(b'/') {
518                if let Some(c) = self.peek() {
519                    if c == '/' {
520                        self.skip_line_comment(2);
521                        continue;
522                    } else if c == '*' {
523                        self.skip_block_comment();
524                        continue;
525                    }
526                }
527            }
528
529            break;
530        }
531    }
532
533    /// Ensure that ident cannot directly follow numbers.
534    fn ensure_not_ident(&mut self) -> LexResult<()> {
535        match self.cur() {
536            Some(c) if c.is_ident_start() => {
537                let span = pos_span(self.cur_pos());
538                self.error_span(span, SyntaxError::IdentAfterNum)?
539            }
540            _ => Ok(()),
541        }
542    }
543
544    fn make_legacy_octal(&mut self, start: BytePos, val: f64) -> LexResult<f64> {
545        self.ensure_not_ident()?;
546        if self.syntax().typescript() && self.target() >= EsVersion::Es5 {
547            self.emit_error(start, SyntaxError::TS1085);
548        }
549        self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
550        Ok(val)
551    }
552
553    /// `op`- |total, radix, value| -> (total * radix + value, continue)
554    fn read_digits<F, Ret, const RADIX: u8>(
555        &mut self,
556        mut op: F,
557        allow_num_separator: bool,
558        has_underscore: &mut bool,
559    ) -> LexResult<Ret>
560    where
561        F: FnMut(Ret, u8, u32) -> LexResult<(Ret, bool)>,
562        Ret: Copy + Default,
563    {
564        debug_assert!(
565            RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
566            "radix for read_int should be one of 2, 8, 10, 16, but got {RADIX}"
567        );
568
569        if cfg!(feature = "debug") {
570            tracing::trace!("read_digits(radix = {}), cur = {:?}", RADIX, self.cur());
571        }
572
573        let start = self.cur_pos();
574        let mut total: Ret = Default::default();
575        let mut prev = None;
576
577        while let Some(c) = self.cur() {
578            if c == '_' {
579                *has_underscore = true;
580                if allow_num_separator {
581                    let is_allowed = |c: Option<char>| {
582                        let Some(c) = c else {
583                            return false;
584                        };
585                        c.is_digit(RADIX as _)
586                    };
587                    let is_forbidden = |c: Option<char>| {
588                        let Some(c) = c else {
589                            return false;
590                        };
591
592                        if RADIX == 16 {
593                            matches!(c, '.' | 'X' | '_' | 'x')
594                        } else {
595                            matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
596                        }
597                    };
598
599                    let next = self.input().peek();
600
601                    if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) {
602                        self.emit_error(
603                            start,
604                            SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits,
605                        );
606                    }
607
608                    // Ignore this _ character
609                    unsafe {
610                        // Safety: cur() returns Some(c) where c is a valid char
611                        self.input_mut().bump();
612                    }
613
614                    continue;
615                }
616            }
617
618            // e.g. (val for a) = 10  where radix = 16
619            let val = if let Some(val) = c.to_digit(RADIX as _) {
620                val
621            } else {
622                return Ok(total);
623            };
624
625            self.bump();
626
627            let (t, cont) = op(total, RADIX, val)?;
628
629            total = t;
630
631            if !cont {
632                return Ok(total);
633            }
634
635            prev = Some(c);
636        }
637
638        Ok(total)
639    }
640
641    /// This can read long integers like
642    /// "13612536612375123612312312312312312312312".
643    ///
644    /// - Returned `bool` is `true` is there was `8` or `9`.
645    fn read_number_no_dot_as_str<const RADIX: u8>(&mut self) -> LexResult<LazyInteger> {
646        debug_assert!(
647            RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
648            "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}"
649        );
650        let start = self.cur_pos();
651
652        let mut not_octal = false;
653        let mut read_any = false;
654        let mut has_underscore = false;
655
656        self.read_digits::<_, (), RADIX>(
657            |_, _, v| {
658                read_any = true;
659
660                if v == 8 || v == 9 {
661                    not_octal = true;
662                }
663
664                Ok(((), true))
665            },
666            true,
667            &mut has_underscore,
668        )?;
669
670        if !read_any {
671            self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?;
672        }
673
674        Ok(LazyInteger {
675            start,
676            end: self.cur_pos(),
677            not_octal,
678            has_underscore,
679        })
680    }
681
682    /// Reads an integer, octal integer, or floating-point number
683    fn read_number<const START_WITH_DOT: bool, const START_WITH_ZERO: bool>(
684        &mut self,
685    ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
686        debug_assert!(!(START_WITH_DOT && START_WITH_ZERO));
687        debug_assert!(self.cur().is_some());
688
689        let start = self.cur_pos();
690        let mut has_underscore = false;
691
692        let lazy_integer = if START_WITH_DOT {
693            // first char is '.'
694            debug_assert!(
695                self.cur().is_some_and(|c| c == '.'),
696                "read_number<START_WITH_DOT = true> expects current char to be '.'"
697            );
698            LazyInteger {
699                start,
700                end: start,
701                not_octal: true,
702                has_underscore: false,
703            }
704        } else {
705            debug_assert!(!START_WITH_DOT);
706            debug_assert!(!START_WITH_ZERO || self.cur().unwrap() == '0');
707
708            // Use read_number_no_dot to support long numbers.
709            let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
710            let s = unsafe {
711                // Safety: We got both start and end position from `self.input`
712                self.input_slice(lazy_integer.start, lazy_integer.end)
713            };
714
715            // legacy octal number is not allowed in bigint.
716            if (!START_WITH_ZERO || lazy_integer.end - lazy_integer.start == BytePos(1))
717                && self.eat(b'n')
718            {
719                let end = self.cur_pos();
720                let raw = unsafe {
721                    // Safety: We got both start and end position from `self.input`
722                    self.input_slice(start, end)
723                };
724                let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), 10).unwrap();
725                return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
726            }
727
728            if START_WITH_ZERO {
729                // TODO: I guess it would be okay if I don't use -ffast-math
730                // (or something like that), but needs review.
731                if s.as_bytes().iter().all(|&c| c == b'0') {
732                    // If only one zero is used, it's decimal.
733                    // And if multiple zero is used, it's octal.
734                    //
735                    // e.g. `0` is decimal (so it can be part of float)
736                    //
737                    // e.g. `000` is octal
738                    if start.0 != self.last_pos().0 - 1 {
739                        let end = self.cur_pos();
740                        let raw = unsafe {
741                            // Safety: We got both start and end position from `self.input`
742                            self.input_slice(start, end)
743                        };
744                        let raw = self.atom(raw);
745                        return self
746                            .make_legacy_octal(start, 0f64)
747                            .map(|value| Either::Left((value, raw)));
748                    }
749                } else if lazy_integer.not_octal {
750                    // if it contains '8' or '9', it's decimal.
751                    self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal);
752                } else {
753                    // It's Legacy octal, and we should reinterpret value.
754                    let s = remove_underscore(s, lazy_integer.has_underscore);
755                    let val = parse_integer::<8>(&s);
756                    let end = self.cur_pos();
757                    let raw = unsafe {
758                        // Safety: We got both start and end position from `self.input`
759                        self.input_slice(start, end)
760                    };
761                    let raw = self.atom(raw);
762                    return self
763                        .make_legacy_octal(start, val)
764                        .map(|value| Either::Left((value, raw)));
765                }
766            }
767
768            lazy_integer
769        };
770
771        has_underscore |= lazy_integer.has_underscore;
772        // At this point, number cannot be an octal literal.
773
774        let has_dot = self.cur() == Some('.');
775        //  `0.a`, `08.a`, `102.a` are invalid.
776        //
777        // `.1.a`, `.1e-4.a` are valid,
778        if has_dot {
779            self.bump();
780
781            // equal: if START_WITH_DOT { debug_assert!(xxxx) }
782            debug_assert!(!START_WITH_DOT || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
783
784            // Read numbers after dot
785            self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
786        }
787
788        let has_e = self.cur().is_some_and(|c| c == 'e' || c == 'E');
789        // Handle 'e' and 'E'
790        //
791        // .5e1 = 5
792        // 1e2 = 100
793        // 1e+2 = 100
794        // 1e-2 = 0.01
795        if has_e {
796            self.bump(); // `e`/`E`
797
798            let next = match self.cur() {
799                Some(next) => next,
800                None => {
801                    let pos = self.cur_pos();
802                    self.error(pos, SyntaxError::NumLitTerminatedWithExp)?
803                }
804            };
805
806            if next == '+' || next == '-' {
807                self.bump(); // remove '+', '-'
808            }
809
810            let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
811            has_underscore |= lazy_integer.has_underscore;
812        }
813
814        let val = if has_dot || has_e {
815            let end = self.cur_pos();
816            let raw = unsafe {
817                // Safety: We got both start and end position from `self.input`
818                self.input_slice(start, end)
819            };
820
821            let raw = remove_underscore(raw, has_underscore);
822            raw.parse().expect("failed to parse float literal")
823        } else {
824            let s = unsafe { self.input_slice(lazy_integer.start, lazy_integer.end) };
825            let s = remove_underscore(s, has_underscore);
826            parse_integer::<10>(&s)
827        };
828
829        self.ensure_not_ident()?;
830
831        let end = self.cur_pos();
832        let raw_str = unsafe {
833            // Safety: We got both start and end position from `self.input`
834            self.input_slice(start, end)
835        };
836        Ok(Either::Left((val, raw_str.into())))
837    }
838
839    fn read_int_u32<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<u32>> {
840        let start = self.state().start();
841
842        let mut count = 0;
843        let v = self.read_digits::<_, Option<u32>, RADIX>(
844            |opt: Option<u32>, radix, val| {
845                count += 1;
846
847                let total = opt
848                    .unwrap_or_default()
849                    .checked_mul(radix as u32)
850                    .and_then(|v| v.checked_add(val))
851                    .ok_or_else(|| {
852                        let span = Span::new_with_checked(start, start);
853                        crate::error::Error::new(span, SyntaxError::InvalidUnicodeEscape)
854                    })?;
855
856                Ok((Some(total), count != len))
857            },
858            true,
859            &mut false,
860        )?;
861        if len != 0 && count != len {
862            Ok(None)
863        } else {
864            Ok(v)
865        }
866    }
867
868    /// Returns `Left(value)` or `Right(BigInt)`
869    fn read_radix_number<const RADIX: u8>(
870        &mut self,
871    ) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
872        debug_assert!(
873            RADIX == 2 || RADIX == 8 || RADIX == 16,
874            "radix should be one of 2, 8, 16, but got {RADIX}"
875        );
876        let start = self.cur_pos();
877
878        debug_assert_eq!(self.cur(), Some('0'));
879        self.bump();
880
881        debug_assert!(self
882            .cur()
883            .is_some_and(|c| matches!(c, 'b' | 'B' | 'o' | 'O' | 'x' | 'X')));
884        self.bump();
885
886        let lazy_integer = self.read_number_no_dot_as_str::<RADIX>()?;
887        let has_underscore = lazy_integer.has_underscore;
888
889        let s = unsafe {
890            // Safety: We got both start and end position from `self.input`
891            self.input_slice(lazy_integer.start, lazy_integer.end)
892        };
893        if self.eat(b'n') {
894            let end = self.cur_pos();
895            let raw = unsafe {
896                // Safety: We got both start and end position from `self.input`
897                self.input_slice(start, end)
898            };
899
900            let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), RADIX as _).unwrap();
901            return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
902        }
903        let s = remove_underscore(s, has_underscore);
904        let val = parse_integer::<RADIX>(&s);
905
906        self.ensure_not_ident()?;
907
908        let end = self.cur_pos();
909        let raw = unsafe {
910            // Safety: We got both start and end position from `self.input`
911            self.input_slice(start, end)
912        };
913
914        Ok(Either::Left((val, self.atom(raw))))
915    }
916
917    /// Consume pending comments.
918    ///
919    /// This is called when the input is exhausted.
920    #[cold]
921    #[inline(never)]
922    fn consume_pending_comments(&mut self) {
923        if let Some(comments) = self.comments() {
924            let last = self.state().prev_hi();
925            let start_pos = self.start_pos();
926            let comments_buffer = self.comments_buffer_mut().unwrap();
927
928            // if the file had no tokens and no shebang, then treat any
929            // comments in the leading comments buffer as leading.
930            // Otherwise treat them as trailing.
931            let kind = if last == start_pos {
932                BufferedCommentKind::Leading
933            } else {
934                BufferedCommentKind::Trailing
935            };
936            // move the pending to the leading or trailing
937            comments_buffer.pending_to_comment(kind, last);
938
939            // now fill the user's passed in comments
940            for comment in comments_buffer.take_comments() {
941                match comment.kind {
942                    BufferedCommentKind::Leading => {
943                        comments.add_leading(comment.pos, comment.comment);
944                    }
945                    BufferedCommentKind::Trailing => {
946                        comments.add_trailing(comment.pos, comment.comment);
947                    }
948                }
949            }
950        }
951    }
952
953    /// Read a JSX identifier (valid tag or attribute name).
954    ///
955    /// Optimized version since JSX identifiers can"t contain
956    /// escape characters and so can be read as single slice.
957    /// Also assumes that first character was already checked
958    /// by isIdentifierStart in readToken.
959    fn read_jsx_word(&mut self) -> LexResult<Self::Token> {
960        debug_assert!(self.syntax().jsx());
961        debug_assert!(self.input().cur().is_some_and(|c| c.is_ident_start()));
962
963        let mut first = true;
964        let slice = self.input_uncons_while(|c| {
965            if first {
966                first = false;
967                c.is_ident_start()
968            } else {
969                c.is_ident_part() || c == '-'
970            }
971        });
972
973        Ok(Self::Token::jsx_name(slice, self))
974    }
975
976    fn read_jsx_entity(&mut self) -> LexResult<(char, String)> {
977        debug_assert!(self.syntax().jsx());
978
979        fn from_code(s: &str, radix: u32) -> LexResult<char> {
980            // TODO(kdy1): unwrap -> Err
981            let c = char::from_u32(
982                u32::from_str_radix(s, radix).expect("failed to parse string as number"),
983            )
984            .expect("failed to parse number as char");
985
986            Ok(c)
987        }
988
989        fn is_hex(s: &str) -> bool {
990            s.chars().all(|c| c.is_ascii_hexdigit())
991        }
992
993        fn is_dec(s: &str) -> bool {
994            s.chars().all(|c| c.is_ascii_digit())
995        }
996
997        let mut s = SmartString::<LazyCompact>::default();
998
999        debug_assert!(self.input().cur().is_some_and(|c| c == '&'));
1000        self.bump();
1001
1002        let start_pos = self.input().cur_pos();
1003
1004        for _ in 0..10 {
1005            let c = match self.input().cur() {
1006                Some(c) => c,
1007                None => break,
1008            };
1009            self.bump();
1010
1011            if c == ';' {
1012                if let Some(stripped) = s.strip_prefix('#') {
1013                    if stripped.starts_with('x') {
1014                        if is_hex(&s[2..]) {
1015                            let value = from_code(&s[2..], 16)?;
1016
1017                            return Ok((value, format!("&{s};")));
1018                        }
1019                    } else if is_dec(stripped) {
1020                        let value = from_code(stripped, 10)?;
1021
1022                        return Ok((value, format!("&{s};")));
1023                    }
1024                } else if let Some(entity) = xhtml(&s) {
1025                    return Ok((entity, format!("&{s};")));
1026                }
1027
1028                break;
1029            }
1030
1031            s.push(c)
1032        }
1033
1034        unsafe {
1035            // Safety: start_pos is a valid position because we got it from self.input
1036            self.input_mut().reset_to(start_pos);
1037        }
1038
1039        Ok(('&', "&".to_string()))
1040    }
1041
1042    fn read_jsx_new_line(&mut self, normalize_crlf: bool) -> LexResult<Either<&'static str, char>> {
1043        debug_assert!(self.syntax().jsx());
1044        let ch = self.input().cur().unwrap();
1045        self.bump();
1046
1047        let out = if ch == '\r' && self.input().cur() == Some('\n') {
1048            self.bump(); // `\n`
1049            Either::Left(if normalize_crlf { "\n" } else { "\r\n" })
1050        } else {
1051            Either::Right(ch)
1052        };
1053        Ok(out)
1054    }
1055
1056    fn read_jsx_str(&mut self, quote: char) -> LexResult<Self::Token> {
1057        debug_assert!(self.syntax().jsx());
1058        let start = self.input().cur_pos();
1059        unsafe {
1060            // Safety: cur() was Some(quote)
1061            self.input_mut().bump(); // `quote`
1062        }
1063        let mut out = String::new();
1064        let mut chunk_start = self.input().cur_pos();
1065        loop {
1066            let ch = match self.input().cur() {
1067                Some(c) => c,
1068                None => {
1069                    self.emit_error(start, SyntaxError::UnterminatedStrLit);
1070                    break;
1071                }
1072            };
1073            let cur_pos = self.input().cur_pos();
1074            if ch == '\\' {
1075                let value = unsafe {
1076                    // Safety: We already checked for the range
1077                    self.input_slice(chunk_start, cur_pos)
1078                };
1079
1080                out.push_str(value);
1081                out.push('\\');
1082
1083                self.bump();
1084
1085                chunk_start = self.input().cur_pos();
1086
1087                continue;
1088            }
1089
1090            if ch == quote {
1091                break;
1092            }
1093
1094            if ch == '&' {
1095                let value = unsafe {
1096                    // Safety: We already checked for the range
1097                    self.input_slice(chunk_start, cur_pos)
1098                };
1099
1100                out.push_str(value);
1101
1102                let jsx_entity = self.read_jsx_entity()?;
1103
1104                out.push(jsx_entity.0);
1105
1106                chunk_start = self.input().cur_pos();
1107            } else if ch.is_line_terminator() {
1108                let value = unsafe {
1109                    // Safety: We already checked for the range
1110                    self.input_slice(chunk_start, cur_pos)
1111                };
1112
1113                out.push_str(value);
1114
1115                match self.read_jsx_new_line(false)? {
1116                    Either::Left(s) => {
1117                        out.push_str(s);
1118                    }
1119                    Either::Right(c) => {
1120                        out.push(c);
1121                    }
1122                }
1123
1124                chunk_start = cur_pos + BytePos(ch.len_utf8() as _);
1125            } else {
1126                unsafe {
1127                    // Safety: cur() was Some(ch)
1128                    self.input_mut().bump();
1129                }
1130            }
1131        }
1132        let cur_pos = self.input().cur_pos();
1133        let s = unsafe {
1134            // Safety: We already checked for the range
1135            self.input_slice(chunk_start, cur_pos)
1136        };
1137        let value = if out.is_empty() {
1138            // Fast path: We don't need to allocate
1139            self.atom(s)
1140        } else {
1141            out.push_str(s);
1142            self.atom(out)
1143        };
1144
1145        // it might be at the end of the file when
1146        // the string literal is unterminated
1147        if self.input().peek_ahead().is_some() {
1148            self.bump();
1149        }
1150
1151        let end = self.input().cur_pos();
1152        let raw = unsafe {
1153            // Safety: Both of `start` and `end` are generated from `cur_pos()`
1154            self.input_slice(start, end)
1155        };
1156        let raw = self.atom(raw);
1157        Ok(Self::Token::str(value.into(), raw, self))
1158    }
1159
1160    // Modified based on <https://github.com/oxc-project/oxc/blob/f0e1510b44efdb1b0d9a09f950181b0e4c435abe/crates/oxc_parser/src/lexer/unicode.rs#L237>
1161    /// Unicode code unit (`\uXXXX`).
1162    ///
1163    /// The opening `\u` must already have been consumed before calling this
1164    /// method.
1165    ///
1166    /// See background info on surrogate pairs:
1167    ///   * `https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae`
1168    ///   * `https://mathiasbynens.be/notes/javascript-identifiers-es6`
1169    fn read_unicode_code_unit(&mut self) -> LexResult<Option<UnicodeEscape>> {
1170        const MIN_HIGH: u32 = 0xd800;
1171        const MAX_HIGH: u32 = 0xdbff;
1172        const MIN_LOW: u32 = 0xdc00;
1173        const MAX_LOW: u32 = 0xdfff;
1174
1175        let Some(high) = self.read_int_u32::<16>(4)? else {
1176            return Ok(None);
1177        };
1178        if let Some(ch) = char::from_u32(high) {
1179            return Ok(Some(UnicodeEscape::CodePoint(ch)));
1180        }
1181
1182        // The first code unit of a surrogate pair is always in the range from 0xD800 to
1183        // 0xDBFF, and is called a high surrogate or a lead surrogate.
1184        // Note: `high` must be >= `MIN_HIGH`, otherwise `char::from_u32` would have
1185        // returned `Some`, and already exited.
1186        debug_assert!(high >= MIN_HIGH);
1187        let is_pair = high <= MAX_HIGH
1188            && self.input().cur() == Some('\\')
1189            && self.input().peek() == Some('u');
1190        if !is_pair {
1191            return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
1192        }
1193
1194        let before_second = self.input().cur_pos();
1195
1196        // Bump `\u`
1197        self.input_mut().bump_bytes(2);
1198
1199        let Some(low) = self.read_int_u32::<16>(4)? else {
1200            return Ok(None);
1201        };
1202
1203        // The second code unit of a surrogate pair is always in the range from 0xDC00
1204        // to 0xDFFF, and is called a low surrogate or a trail surrogate.
1205        // If this isn't a valid pair, rewind to before the 2nd, and return the first
1206        // only. The 2nd could be the first part of a valid pair.
1207        if !(MIN_LOW..=MAX_LOW).contains(&low) {
1208            unsafe {
1209                // Safety: state is valid position because we got it from cur_pos()
1210                self.input_mut().reset_to(before_second);
1211            }
1212            return Ok(Some(UnicodeEscape::LoneSurrogate(high)));
1213        }
1214
1215        let code_point = pair_to_code_point(high, low);
1216        // SAFETY: `high` and `low` have been checked to be in ranges which always yield
1217        // a `code_point` which is a valid `char`
1218        let ch = unsafe { char::from_u32_unchecked(code_point) };
1219        Ok(Some(UnicodeEscape::SurrogatePair(ch)))
1220    }
1221
1222    fn read_unicode_escape(&mut self) -> LexResult<UnicodeEscape> {
1223        debug_assert_eq!(self.cur(), Some('u'));
1224
1225        let mut is_curly = false;
1226
1227        self.bump(); // 'u'
1228
1229        if self.eat(b'{') {
1230            is_curly = true;
1231        }
1232
1233        let state = self.input().cur_pos();
1234        let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
1235            Ok(Some(val)) => {
1236                if 0x0010_ffff >= val {
1237                    char::from_u32(val)
1238                } else {
1239                    let start = self.cur_pos();
1240
1241                    self.error(
1242                        start,
1243                        SyntaxError::BadCharacterEscapeSequence {
1244                            expected: if is_curly {
1245                                "1-6 hex characters in the range 0 to 10FFFF."
1246                            } else {
1247                                "4 hex characters"
1248                            },
1249                        },
1250                    )?
1251                }
1252            }
1253            _ => {
1254                let start = self.cur_pos();
1255
1256                self.error(
1257                    start,
1258                    SyntaxError::BadCharacterEscapeSequence {
1259                        expected: if is_curly {
1260                            "1-6 hex characters"
1261                        } else {
1262                            "4 hex characters"
1263                        },
1264                    },
1265                )?
1266            }
1267        };
1268
1269        match c {
1270            Some(c) => {
1271                if is_curly && !self.eat(b'}') {
1272                    self.error(state, SyntaxError::InvalidUnicodeEscape)?
1273                }
1274
1275                Ok(UnicodeEscape::CodePoint(c))
1276            }
1277            _ => {
1278                unsafe {
1279                    // Safety: state is valid position because we got it from cur_pos()
1280                    self.input_mut().reset_to(state);
1281                }
1282
1283                let Some(value) = self.read_unicode_code_unit()? else {
1284                    self.error(
1285                        state,
1286                        SyntaxError::BadCharacterEscapeSequence {
1287                            expected: if is_curly {
1288                                "1-6 hex characters"
1289                            } else {
1290                                "4 hex characters"
1291                            },
1292                        },
1293                    )?
1294                };
1295
1296                if is_curly && !self.eat(b'}') {
1297                    self.error(state, SyntaxError::InvalidUnicodeEscape)?
1298                }
1299
1300                Ok(value)
1301            }
1302        }
1303    }
1304
1305    #[cold]
1306    fn read_shebang(&mut self) -> LexResult<Option<Atom>> {
1307        if self.input().cur() != Some('#') || self.input().peek() != Some('!') {
1308            return Ok(None);
1309        }
1310        self.bump(); // `#`
1311        self.bump(); // `!`
1312        let s = self.input_uncons_while(|c| !c.is_line_terminator());
1313        Ok(Some(self.atom(s)))
1314    }
1315
1316    fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult<Self::Token> {
1317        let start = self.cur_pos();
1318
1319        let mut cooked = Ok(Wtf8Buf::new());
1320        let mut cooked_slice_start = start;
1321        let raw_slice_start = start;
1322
1323        macro_rules! consume_cooked {
1324            () => {{
1325                if let Ok(cooked) = &mut cooked {
1326                    let last_pos = self.cur_pos();
1327                    cooked.push_str(unsafe {
1328                        // Safety: Both of start and last_pos are valid position because we got them
1329                        // from `self.input`
1330                        self.input_slice(cooked_slice_start, last_pos)
1331                    });
1332                }
1333            }};
1334        }
1335
1336        // Handle edge case for immediate template end
1337        if start == self.cur_pos() && self.state().last_was_tpl_element() {
1338            if let Some(c) = self.cur() {
1339                if c == '$' && self.peek() == Some('{') {
1340                    self.bump(); // '$'
1341                    self.bump(); // '{'
1342                    return Ok(Self::Token::DOLLAR_LBRACE);
1343                } else if c == '`' {
1344                    self.bump(); // '`'
1345                    return Ok(Self::Token::BACKQUOTE);
1346                }
1347            }
1348        }
1349
1350        // Fast path: use byte_search to scan for template literal terminators
1351        loop {
1352            let matched_byte = byte_search! {
1353                lexer: self,
1354                table: TEMPLATE_LITERAL_TABLE,
1355                handle_eof: {
1356                    // EOF reached - unterminated template
1357                    self.error(start_of_tpl, SyntaxError::UnterminatedTpl)?
1358                }
1359            };
1360
1361            match matched_byte {
1362                b'$' => {
1363                    // Check if this is ${
1364                    if self.peek() == Some('{') {
1365                        // Found template substitution
1366                        let cooked = if cooked_slice_start == raw_slice_start {
1367                            let last_pos = self.cur_pos();
1368                            let s = unsafe {
1369                                // Safety: Both of start and last_pos are valid position because we
1370                                // got them from `self.input`
1371                                self.input_slice(cooked_slice_start, last_pos)
1372                            };
1373                            Ok(self.wtf8_atom(Wtf8::from_str(s)))
1374                        } else {
1375                            consume_cooked!();
1376                            cooked.map(|s| self.wtf8_atom(&*s))
1377                        };
1378
1379                        let end = self.input().cur_pos();
1380                        let raw = unsafe {
1381                            // Safety: Both of start and last_pos are valid position because we got
1382                            // them from `self.input`
1383                            self.input_slice(raw_slice_start, end)
1384                        };
1385                        let raw = self.atom(raw);
1386                        return Ok(Self::Token::template(cooked, raw, self));
1387                    } else {
1388                        // Just a regular $ character, continue scanning
1389                        self.bump();
1390                        continue;
1391                    }
1392                }
1393                b'`' => {
1394                    // Found template end
1395                    let cooked = if cooked_slice_start == raw_slice_start {
1396                        let last_pos = self.cur_pos();
1397                        let s = unsafe { self.input_slice(cooked_slice_start, last_pos) };
1398                        Ok(self.wtf8_atom(Wtf8::from_str(s)))
1399                    } else {
1400                        consume_cooked!();
1401                        cooked.map(|s| self.wtf8_atom(&*s))
1402                    };
1403
1404                    let end = self.input().cur_pos();
1405                    let raw = unsafe { self.input_slice(raw_slice_start, end) };
1406                    let raw = self.atom(raw);
1407                    return Ok(Self::Token::template(cooked, raw, self));
1408                }
1409                b'\r' => {
1410                    // Handle carriage return line terminator
1411                    self.state_mut().mark_had_line_break();
1412                    consume_cooked!();
1413
1414                    // Handle carriage return - consume \r and optionally \n, normalize to \n
1415                    self.bump(); // '\r'
1416                    if self.peek() == Some('\n') {
1417                        self.bump(); // '\n'
1418                    }
1419
1420                    if let Ok(ref mut cooked) = cooked {
1421                        cooked.push_char('\n');
1422                    }
1423                    cooked_slice_start = self.cur_pos();
1424                }
1425                b'\\' => {
1426                    // Handle escape sequence - fall back to slow path for this part
1427                    consume_cooked!();
1428
1429                    match self.read_escaped_char(true) {
1430                        Ok(Some(escaped)) => {
1431                            if let Ok(ref mut cooked) = cooked {
1432                                cooked.push(escaped);
1433                            }
1434                        }
1435                        Ok(None) => {}
1436                        Err(error) => {
1437                            cooked = Err(error);
1438                        }
1439                    }
1440
1441                    cooked_slice_start = self.cur_pos();
1442                }
1443                _ => unreachable!(),
1444            }
1445        }
1446    }
1447
1448    /// Read an escaped character for string literal.
1449    ///
1450    /// In template literal, we should preserve raw string.
1451    fn read_escaped_char(&mut self, in_template: bool) -> LexResult<Option<CodePoint>> {
1452        debug_assert_eq!(self.cur(), Some('\\'));
1453
1454        let start = self.cur_pos();
1455
1456        self.bump(); // '\'
1457
1458        let c = match self.cur() {
1459            Some(c) => c,
1460            None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?,
1461        };
1462
1463        let c = match c {
1464            '\\' => '\\',
1465            'n' => '\n',
1466            'r' => '\r',
1467            't' => '\t',
1468            'b' => '\u{0008}',
1469            'v' => '\u{000b}',
1470            'f' => '\u{000c}',
1471            '\r' => {
1472                self.bump(); // remove '\r'
1473
1474                self.eat(b'\n');
1475
1476                return Ok(None);
1477            }
1478            '\n' | '\u{2028}' | '\u{2029}' => {
1479                self.bump();
1480
1481                return Ok(None);
1482            }
1483
1484            // read hexadecimal escape sequences
1485            'x' => {
1486                self.bump(); // 'x'
1487
1488                match self.read_int_u32::<16>(2)? {
1489                    Some(val) => return Ok(CodePoint::from_u32(val)),
1490                    None => self.error(
1491                        start,
1492                        SyntaxError::BadCharacterEscapeSequence {
1493                            expected: "2 hex characters",
1494                        },
1495                    )?,
1496                }
1497            }
1498
1499            // read unicode escape sequences
1500            'u' => match self.read_unicode_escape() {
1501                Ok(value) => {
1502                    return Ok(Some(value.into()));
1503                }
1504                Err(err) => self.error(start, err.into_kind())?,
1505            },
1506
1507            // octal escape sequences
1508            '0'..='7' => {
1509                self.bump();
1510
1511                let first_c = if c == '0' {
1512                    match self.cur() {
1513                        Some(next) if next.is_digit(8) => c,
1514                        // \0 is not an octal literal nor decimal literal.
1515                        _ => return Ok(Some(CodePoint::from_char('\u{0000}'))),
1516                    }
1517                } else {
1518                    c
1519                };
1520
1521                // TODO: Show template instead of strict mode
1522                if in_template {
1523                    self.error(start, SyntaxError::LegacyOctal)?
1524                }
1525
1526                self.emit_strict_mode_error(start, SyntaxError::LegacyOctal);
1527
1528                let mut value: u8 = first_c.to_digit(8).unwrap() as u8;
1529
1530                macro_rules! one {
1531                    ($check:expr) => {{
1532                        let cur = self.cur();
1533
1534                        match cur.and_then(|c| c.to_digit(8)) {
1535                            Some(v) => {
1536                                value = if $check {
1537                                    let new_val = value
1538                                        .checked_mul(8)
1539                                        .and_then(|value| value.checked_add(v as u8));
1540                                    match new_val {
1541                                        Some(val) => val,
1542                                        None => return Ok(CodePoint::from_u32(value as u32)),
1543                                    }
1544                                } else {
1545                                    value * 8 + v as u8
1546                                };
1547
1548                                self.bump();
1549                            }
1550                            _ => return Ok(CodePoint::from_u32(value as u32)),
1551                        }
1552                    }};
1553                }
1554
1555                one!(false);
1556                one!(true);
1557
1558                return Ok(CodePoint::from_u32(value as u32));
1559            }
1560            _ => c,
1561        };
1562
1563        unsafe {
1564            // Safety: cur() is Some(c) if this method is called.
1565            self.input_mut().bump();
1566        }
1567
1568        Ok(CodePoint::from_u32(c as u32))
1569    }
1570
1571    /// Expects current char to be '/'
1572    fn read_regexp(&mut self, start: BytePos) -> LexResult<Self::Token> {
1573        unsafe {
1574            // Safety: start is valid position, and cur() is Some('/')
1575            self.input_mut().reset_to(start);
1576        }
1577
1578        debug_assert_eq!(self.cur(), Some('/'));
1579
1580        let start = self.cur_pos();
1581
1582        self.bump(); // bump '/'
1583
1584        let slice_start = self.cur_pos();
1585
1586        let (mut escaped, mut in_class) = (false, false);
1587
1588        while let Some(c) = self.cur() {
1589            // This is ported from babel.
1590            // Seems like regexp literal cannot contain linebreak.
1591            if c.is_line_terminator() {
1592                let span = self.span(start);
1593
1594                return Err(crate::error::Error::new(
1595                    span,
1596                    SyntaxError::UnterminatedRegExp,
1597                ));
1598            }
1599
1600            if escaped {
1601                escaped = false;
1602            } else {
1603                match c {
1604                    '[' => in_class = true,
1605                    ']' if in_class => in_class = false,
1606                    // Terminates content part of regex literal
1607                    '/' if !in_class => break,
1608                    _ => {}
1609                }
1610
1611                escaped = c == '\\';
1612            }
1613
1614            self.bump();
1615        }
1616
1617        let content = {
1618            let end = self.cur_pos();
1619            let s = unsafe { self.input_slice(slice_start, end) };
1620            self.atom(s)
1621        };
1622
1623        // input is terminated without following `/`
1624        if !self.is(b'/') {
1625            let span = self.span(start);
1626
1627            return Err(crate::error::Error::new(
1628                span,
1629                SyntaxError::UnterminatedRegExp,
1630            ));
1631        }
1632
1633        self.bump(); // '/'
1634
1635        // Spec says "It is a Syntax Error if IdentifierPart contains a Unicode escape
1636        // sequence." TODO: check for escape
1637
1638        // Need to use `read_word` because '\uXXXX' sequences are allowed
1639        // here (don't ask).
1640        // let flags_start = self.cur_pos();
1641        let flags = {
1642            match self.cur() {
1643                Some(c) if c.is_ident_start() => self
1644                    .read_word_as_str_with()
1645                    .map(|(s, _)| Some(self.atom(s))),
1646                _ => Ok(None),
1647            }
1648        }?
1649        .unwrap_or_default();
1650
1651        Ok(Self::Token::regexp(content, flags, self))
1652    }
1653
1654    /// This method is optimized for texts without escape sequences.
1655    fn read_word_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
1656        debug_assert!(self.cur().is_some());
1657        let slice_start = self.cur_pos();
1658
1659        // Fast path: try to scan ASCII identifier using byte_search
1660        if let Some(c) = self.input().cur_as_ascii() {
1661            if Ident::is_valid_ascii_start(c) {
1662                // Advance past first byte
1663                self.bump();
1664
1665                // Use byte_search to quickly scan to end of ASCII identifier
1666                let next_byte = byte_search! {
1667                    lexer: self,
1668                    table: NOT_ASCII_ID_CONTINUE_TABLE,
1669                    handle_eof: {
1670                        // Reached EOF, entire remainder is identifier
1671                        let end = self.cur_pos();
1672                        let s = unsafe {
1673                            // Safety: slice_start and end are valid position because we got them from
1674                            // `self.input`
1675                            self.input_slice(slice_start, end)
1676                        };
1677
1678                        return Ok((Cow::Borrowed(s), false));
1679                    },
1680                };
1681
1682                // Check if we hit end of identifier or need to fall back to slow path
1683                if !next_byte.is_ascii() {
1684                    // Hit Unicode character, fall back to slow path from current position
1685                    return self.read_word_as_str_with_slow_path(slice_start);
1686                } else if next_byte == b'\\' {
1687                    // Hit escape sequence, fall back to slow path from current position
1688                    return self.read_word_as_str_with_slow_path(slice_start);
1689                } else {
1690                    // Hit end of identifier (non-continue ASCII char)
1691                    let end = self.cur_pos();
1692                    let s = unsafe {
1693                        // Safety: slice_start and end are valid position because we got them from
1694                        // `self.input`
1695                        self.input_slice(slice_start, end)
1696                    };
1697
1698                    return Ok((Cow::Borrowed(s), false));
1699                }
1700            }
1701        }
1702
1703        // Fall back to slow path for non-ASCII start or complex cases
1704        self.read_word_as_str_with_slow_path(slice_start)
1705    }
1706
1707    /// Slow path for identifier parsing that handles Unicode and escapes
1708    #[cold]
1709    fn read_word_as_str_with_slow_path(
1710        &mut self,
1711        mut slice_start: BytePos,
1712    ) -> LexResult<(Cow<'a, str>, bool)> {
1713        let mut first = true;
1714        let mut has_escape = false;
1715
1716        let mut buf = String::with_capacity(16);
1717        loop {
1718            if let Some(c) = self.input().cur_as_ascii() {
1719                if Ident::is_valid_ascii_continue(c) {
1720                    self.bump();
1721                    continue;
1722                } else if first && Ident::is_valid_ascii_start(c) {
1723                    self.bump();
1724                    first = false;
1725                    continue;
1726                }
1727
1728                // unicode escape
1729                if c == b'\\' {
1730                    first = false;
1731                    has_escape = true;
1732                    let start = self.cur_pos();
1733                    self.bump();
1734
1735                    if !self.is(b'u') {
1736                        self.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)?
1737                    }
1738
1739                    {
1740                        let end = self.input().cur_pos();
1741                        let s = unsafe {
1742                            // Safety: start and end are valid position because we got them from
1743                            // `self.input`
1744                            self.input_slice(slice_start, start)
1745                        };
1746                        buf.push_str(s);
1747                        unsafe {
1748                            // Safety: We got end from `self.input`
1749                            self.input_mut().reset_to(end);
1750                        }
1751                    }
1752
1753                    let value = self.read_unicode_escape()?;
1754
1755                    match value {
1756                        UnicodeEscape::CodePoint(ch) => {
1757                            let valid = if first {
1758                                ch.is_ident_start()
1759                            } else {
1760                                ch.is_ident_part()
1761                            };
1762                            if !valid {
1763                                self.emit_error(start, SyntaxError::InvalidIdentChar);
1764                            }
1765                            buf.push(ch);
1766                        }
1767                        UnicodeEscape::SurrogatePair(ch) => {
1768                            buf.push(ch);
1769                            self.emit_error(start, SyntaxError::InvalidIdentChar);
1770                        }
1771                        UnicodeEscape::LoneSurrogate(code_point) => {
1772                            buf.push_str(format!("\\u{code_point:04X}").as_str());
1773                            self.emit_error(start, SyntaxError::InvalidIdentChar);
1774                        }
1775                    };
1776
1777                    slice_start = self.cur_pos();
1778                    continue;
1779                }
1780
1781                // ASCII but not a valid identifier
1782                break;
1783            } else if let Some(c) = self.input().cur() {
1784                if Ident::is_valid_non_ascii_continue(c) {
1785                    self.bump();
1786                    continue;
1787                } else if first && Ident::is_valid_non_ascii_start(c) {
1788                    self.bump();
1789                    first = false;
1790                    continue;
1791                }
1792            }
1793
1794            break;
1795        }
1796
1797        let end = self.cur_pos();
1798        let s = unsafe {
1799            // Safety: slice_start and end are valid position because we got them from
1800            // `self.input`
1801            self.input_slice(slice_start, end)
1802        };
1803        let value = if !has_escape {
1804            // Fast path: raw slice is enough if there's no escape.
1805            Cow::Borrowed(s)
1806        } else {
1807            buf.push_str(s);
1808            Cow::Owned(buf)
1809        };
1810
1811        Ok((value, has_escape))
1812    }
1813
1814    /// `#`
1815    fn read_token_number_sign(&mut self) -> LexResult<Self::Token> {
1816        debug_assert!(self.cur().is_some_and(|c| c == '#'));
1817
1818        self.bump(); // '#'
1819
1820        // `#` can also be a part of shebangs, however they should have been
1821        // handled by `read_shebang()`
1822        debug_assert!(
1823            !self.input().is_at_start() || self.cur() != Some('!'),
1824            "#! should have already been handled by read_shebang()"
1825        );
1826        Ok(Self::Token::HASH)
1827    }
1828
1829    /// Read a token given `.`.
1830    ///
1831    /// This is extracted as a method to reduce size of `read_token`.
1832    #[inline(never)]
1833    fn read_token_dot(&mut self) -> LexResult<Self::Token> {
1834        debug_assert!(self.cur().is_some_and(|c| c == '.'));
1835        // Check for eof
1836        let next = match self.input().peek() {
1837            Some(next) => next,
1838            None => {
1839                self.bump(); // '.'
1840                return Ok(Self::Token::DOT);
1841            }
1842        };
1843        if next.is_ascii_digit() {
1844            return self.read_number::<true, false>().map(|v| match v {
1845                Left((value, raw)) => Self::Token::num(value, raw, self),
1846                Right(_) => unreachable!("read_number should not return bigint for leading dot"),
1847            });
1848        }
1849
1850        self.bump(); // 1st `.`
1851
1852        if next == '.' && self.input().peek() == Some('.') {
1853            self.bump(); // 2nd `.`
1854            self.bump(); // 3rd `.`
1855
1856            return Ok(Self::Token::DOTDOTDOT);
1857        }
1858
1859        Ok(Self::Token::DOT)
1860    }
1861
1862    /// Read a token given `?`.
1863    ///
1864    /// This is extracted as a method to reduce size of `read_token`.
1865    #[inline(never)]
1866    fn read_token_question_mark(&mut self) -> LexResult<Self::Token> {
1867        debug_assert!(self.cur().is_some_and(|c| c == '?'));
1868        self.bump();
1869        if self.input_mut().eat_byte(b'?') {
1870            if self.input_mut().eat_byte(b'=') {
1871                Ok(Self::Token::NULLISH_ASSIGN)
1872            } else {
1873                Ok(Self::Token::NULLISH_COALESCING)
1874            }
1875        } else {
1876            Ok(Self::Token::QUESTION)
1877        }
1878    }
1879
1880    /// Read a token given `:`.
1881    ///
1882    /// This is extracted as a method to reduce size of `read_token`.
1883    #[inline(never)]
1884    fn read_token_colon(&mut self) -> LexResult<Self::Token> {
1885        debug_assert!(self.cur().is_some_and(|c| c == ':'));
1886        self.bump(); // ':'
1887        Ok(Self::Token::COLON)
1888    }
1889
1890    /// Read a token given `0`.
1891    ///
1892    /// This is extracted as a method to reduce size of `read_token`.
1893    #[inline(never)]
1894    fn read_token_zero(&mut self) -> LexResult<Self::Token> {
1895        debug_assert_eq!(self.cur(), Some('0'));
1896        let next = self.input().peek();
1897
1898        let bigint = match next {
1899            Some('x') | Some('X') => self.read_radix_number::<16>(),
1900            Some('o') | Some('O') => self.read_radix_number::<8>(),
1901            Some('b') | Some('B') => self.read_radix_number::<2>(),
1902            _ => {
1903                return self.read_number::<false, true>().map(|v| match v {
1904                    Left((value, raw)) => Self::Token::num(value, raw, self),
1905                    Right((value, raw)) => Self::Token::bigint(value, raw, self),
1906                });
1907            }
1908        };
1909
1910        bigint.map(|v| match v {
1911            Left((value, raw)) => Self::Token::num(value, raw, self),
1912            Right((value, raw)) => Self::Token::bigint(value, raw, self),
1913        })
1914    }
1915
1916    /// Read a token given `|` or `&`.
1917    ///
1918    /// This is extracted as a method to reduce size of `read_token`.
1919    #[inline(never)]
1920    fn read_token_logical<const C: u8>(&mut self) -> LexResult<Self::Token> {
1921        debug_assert!(C == b'|' || C == b'&');
1922        let is_bit_and = C == b'&';
1923        let had_line_break_before_last = self.had_line_break_before_last();
1924        let start = self.cur_pos();
1925
1926        unsafe {
1927            // Safety: cur() is Some(c as char)
1928            self.input_mut().bump();
1929        }
1930        let token = if is_bit_and {
1931            Self::Token::BIT_AND
1932        } else {
1933            Self::Token::BIT_OR
1934        };
1935
1936        // '|=', '&='
1937        if self.input_mut().eat_byte(b'=') {
1938            return Ok(if is_bit_and {
1939                Self::Token::BIT_AND_EQ
1940            } else {
1941                debug_assert!(token.is_bit_or());
1942                Self::Token::BIT_OR_EQ
1943            });
1944        }
1945
1946        // '||', '&&'
1947        if self.input().cur() == Some(C as char) {
1948            unsafe {
1949                // Safety: cur() is Some(c)
1950                self.input_mut().bump();
1951            }
1952
1953            if self.input().cur() == Some('=') {
1954                unsafe {
1955                    // Safety: cur() is Some('=')
1956                    self.input_mut().bump();
1957                }
1958
1959                return Ok(if is_bit_and {
1960                    Self::Token::LOGICAL_AND_EQ
1961                } else {
1962                    debug_assert!(token.is_bit_or());
1963                    Self::Token::LOGICAL_OR_EQ
1964                });
1965            }
1966
1967            // |||||||
1968            //   ^
1969            if had_line_break_before_last && !is_bit_and && self.is_str("||||| ") {
1970                let span = fixed_len_span(start, 7);
1971                self.emit_error_span(span, SyntaxError::TS1185);
1972                self.skip_line_comment(5);
1973                self.skip_space::<true>();
1974                return self.error_span(span, SyntaxError::TS1185);
1975            }
1976
1977            return Ok(if is_bit_and {
1978                Self::Token::LOGICAL_AND
1979            } else {
1980                debug_assert!(token.is_bit_or());
1981                Self::Token::LOGICAL_OR
1982            });
1983        }
1984
1985        Ok(token)
1986    }
1987
1988    /// Read a token given `*` or `%`.
1989    ///
1990    /// This is extracted as a method to reduce size of `read_token`.
1991    #[inline(never)]
1992    fn read_token_mul_mod(&mut self, is_mul: bool) -> LexResult<Self::Token> {
1993        debug_assert!(self.cur().is_some_and(|c| c == '*' || c == '%'));
1994        self.bump();
1995        let token = if is_mul {
1996            if self.input_mut().eat_byte(b'*') {
1997                // `**`
1998                Self::Token::EXP
1999            } else {
2000                Self::Token::MUL
2001            }
2002        } else {
2003            Self::Token::MOD
2004        };
2005
2006        Ok(if self.input_mut().eat_byte(b'=') {
2007            if token.is_star() {
2008                Self::Token::MUL_EQ
2009            } else if token.is_mod() {
2010                Self::Token::MOD_EQ
2011            } else {
2012                debug_assert!(token.is_exp());
2013                Self::Token::EXP_EQ
2014            }
2015        } else {
2016            token
2017        })
2018    }
2019
2020    #[inline(never)]
2021    fn read_slash(&mut self) -> LexResult<Self::Token> {
2022        debug_assert_eq!(self.cur(), Some('/'));
2023        self.bump(); // '/'
2024        Ok(if self.eat(b'=') {
2025            Self::Token::DIV_EQ
2026        } else {
2027            Self::Token::DIV
2028        })
2029    }
2030
2031    /// This can be used if there's no keyword starting with the first
2032    /// character.
2033    fn read_ident_unknown(&mut self) -> LexResult<Self::Token> {
2034        debug_assert!(self.cur().is_some());
2035
2036        let (s, has_escape) = self.read_word_as_str_with()?;
2037        let atom = self.atom(s);
2038        let word = Self::Token::unknown_ident(atom, self);
2039
2040        if has_escape {
2041            self.update_token_flags(|flags| *flags |= TokenFlags::UNICODE);
2042        }
2043
2044        Ok(word)
2045    }
2046
2047    /// See https://tc39.github.io/ecma262/#sec-literals-string-literals
2048    // TODO: merge `read_str_lit` and `read_jsx_str`
2049    fn read_str_lit(&mut self) -> LexResult<Self::Token> {
2050        debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"'));
2051        let start = self.cur_pos();
2052        let quote = self.cur().unwrap() as u8;
2053
2054        self.bump(); // '"' or '\''
2055
2056        let mut slice_start = self.input().cur_pos();
2057
2058        let mut buf: Option<Wtf8Buf> = None;
2059
2060        loop {
2061            let table = if quote == b'"' {
2062                &DOUBLE_QUOTE_STRING_END_TABLE
2063            } else {
2064                &SINGLE_QUOTE_STRING_END_TABLE
2065            };
2066
2067            let fast_path_result = byte_search! {
2068                lexer: self,
2069                table: table,
2070                handle_eof: {
2071                    let value_end = self.cur_pos();
2072                    let s = unsafe {
2073                            // Safety: slice_start and value_end are valid position because we
2074                            // got them from `self.input`
2075                        self.input_slice(slice_start, value_end)
2076                    };
2077
2078                    self.emit_error(start, SyntaxError::UnterminatedStrLit);
2079
2080                    let end = self.cur_pos();
2081                    let raw = unsafe { self.input_slice(start, end) };
2082                    return Ok(Self::Token::str(self.wtf8_atom(Wtf8::from_str(s)), self.atom(raw), self));
2083                },
2084            };
2085            // dbg!(char::from_u32(fast_path_result as u32));
2086
2087            match fast_path_result {
2088                b'"' | b'\'' if fast_path_result == quote => {
2089                    let value_end = self.cur_pos();
2090
2091                    let value = if let Some(buf) = buf.as_mut() {
2092                        // `buf` only exist when there has escape.
2093                        debug_assert!(unsafe { self.input_slice(start, value_end).contains('\\') });
2094                        let s = unsafe {
2095                            // Safety: slice_start and value_end are valid position because we
2096                            // got them from `self.input`
2097                            self.input_slice(slice_start, value_end)
2098                        };
2099                        buf.push_str(s);
2100                        self.wtf8_atom(&**buf)
2101                    } else {
2102                        let s = unsafe { self.input_slice(slice_start, value_end) };
2103                        self.wtf8_atom(Wtf8::from_str(s))
2104                    };
2105
2106                    unsafe {
2107                        // Safety: cur is quote
2108                        self.input_mut().bump();
2109                    }
2110
2111                    let end = self.cur_pos();
2112                    let raw = unsafe {
2113                        // Safety: start and end are valid position because we got them from
2114                        // `self.input`
2115                        self.input_slice(start, end)
2116                    };
2117                    let raw = self.atom(raw);
2118                    return Ok(Self::Token::str(value, raw, self));
2119                }
2120                b'\\' => {
2121                    let end = self.cur_pos();
2122                    let s = unsafe {
2123                        // Safety: start and end are valid position because we got them from
2124                        // `self.input`
2125                        self.input_slice(slice_start, end)
2126                    };
2127
2128                    if buf.is_none() {
2129                        buf = Some(Wtf8Buf::from_str(s));
2130                    } else {
2131                        buf.as_mut().unwrap().push_str(s);
2132                    }
2133
2134                    if let Some(escaped) = self.read_escaped_char(false)? {
2135                        buf.as_mut().unwrap().push(escaped);
2136                    }
2137
2138                    slice_start = self.cur_pos();
2139                    continue;
2140                }
2141                b'\n' | b'\r' => {
2142                    let end = self.cur_pos();
2143                    let s = unsafe {
2144                        // Safety: start and end are valid position because we got them from
2145                        // `self.input`
2146                        self.input_slice(slice_start, end)
2147                    };
2148
2149                    self.emit_error(start, SyntaxError::UnterminatedStrLit);
2150
2151                    let end = self.cur_pos();
2152
2153                    let raw = unsafe {
2154                        // Safety: start and end are valid position because we got them from
2155                        // `self.input`
2156                        self.input_slice(start, end)
2157                    };
2158                    return Ok(Self::Token::str(
2159                        self.wtf8_atom(Wtf8::from_str(s)),
2160                        self.atom(raw),
2161                        self,
2162                    ));
2163                }
2164                _ => self.bump(),
2165            }
2166        }
2167    }
2168
2169    fn read_keyword_with(
2170        &mut self,
2171        convert: &dyn Fn(&str) -> Option<Self::Token>,
2172    ) -> LexResult<Self::Token> {
2173        debug_assert!(self.cur().is_some());
2174
2175        let start = self.cur_pos();
2176        let (s, has_escape) = self.read_keyword_as_str_with()?;
2177        if let Some(word) = convert(s.as_ref()) {
2178            // Note: ctx is store in lexer because of this error.
2179            // 'await' and 'yield' may have semantic of reserved word, which means lexer
2180            // should know context or parser should handle this error. Our approach to this
2181            // problem is former one.
2182            if has_escape && word.is_reserved(self.ctx()) {
2183                self.error(
2184                    start,
2185                    SyntaxError::EscapeInReservedWord { word: Atom::new(s) },
2186                )
2187            } else {
2188                Ok(word)
2189            }
2190        } else {
2191            let atom = self.atom(s);
2192            Ok(Self::Token::unknown_ident(atom, self))
2193        }
2194    }
2195
2196    /// This is a performant version of [Lexer::read_word_as_str_with] for
2197    /// reading keywords. We should make sure the first byte is a valid
2198    /// ASCII.
2199    fn read_keyword_as_str_with(&mut self) -> LexResult<(Cow<'a, str>, bool)> {
2200        let slice_start = self.cur_pos();
2201
2202        // Fast path: try to scan ASCII identifier using byte_search
2203        // Performance optimization: check if first char disqualifies as keyword
2204        // Advance past first byte
2205        self.bump();
2206
2207        // Use byte_search to quickly scan to end of ASCII identifier
2208        let next_byte = byte_search! {
2209            lexer: self,
2210            table: NOT_ASCII_ID_CONTINUE_TABLE,
2211            handle_eof: {
2212                // Reached EOF, entire remainder is identifier
2213                let end = self.cur_pos();
2214                let s = unsafe {
2215                    // Safety: slice_start and end are valid position because we got them from
2216                    // `self.input`
2217                    self.input_slice(slice_start, end)
2218                };
2219
2220                return Ok((Cow::Borrowed(s), false));
2221            },
2222        };
2223
2224        // Check if we hit end of identifier or need to fall back to slow path
2225        if !next_byte.is_ascii() || next_byte == b'\\' {
2226            // Hit Unicode character or escape sequence, fall back to slow path from current
2227            // position
2228            self.read_word_as_str_with_slow_path(slice_start)
2229        } else {
2230            // Hit end of identifier (non-continue ASCII char)
2231            let end = self.cur_pos();
2232            let s = unsafe {
2233                // Safety: slice_start and end are valid position because we got them from
2234                // `self.input`
2235                self.input_slice(slice_start, end)
2236            };
2237
2238            Ok((Cow::Borrowed(s), false))
2239        }
2240    }
2241}
2242
2243pub fn pos_span(p: BytePos) -> Span {
2244    Span::new_with_checked(p, p)
2245}
2246
2247pub fn fixed_len_span(p: BytePos, len: u32) -> Span {
2248    Span::new_with_checked(p, p + BytePos(len))
2249}