swc_ecma_parser/lexer/
state.rs

1use std::mem::take;
2
3use swc_common::BytePos;
4use swc_ecma_ast::EsVersion;
5use swc_ecma_lexer::{
6    common::{
7        lexer::{
8            char::CharExt,
9            comments_buffer::{BufferedComment, BufferedCommentKind},
10            state::State as StateTrait,
11            LexResult,
12        },
13        syntax::SyntaxFlags,
14    },
15    error::SyntaxError,
16    TokenContexts,
17};
18
19use super::{Context, Input, Lexer, LexerTrait};
20use crate::{
21    error::Error,
22    input::Tokens,
23    lexer::token::{Token, TokenAndSpan, TokenValue},
24};
25
26/// State of lexer.
27///
28/// Ported from babylon.
29#[derive(Clone)]
30pub struct State {
31    /// if line break exists between previous token and new token?
32    pub had_line_break: bool,
33    /// if line break exists before last?
34    pub had_line_break_before_last: bool,
35    /// TODO: Remove this field.
36    is_first: bool,
37    pub next_regexp: Option<BytePos>,
38    pub start: BytePos,
39    pub prev_hi: BytePos,
40
41    pub(super) token_value: Option<TokenValue>,
42    token_type: Option<Token>,
43}
44
45impl swc_ecma_lexer::common::input::Tokens<TokenAndSpan> for Lexer<'_> {
46    #[inline]
47    fn set_ctx(&mut self, ctx: Context) {
48        if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() {
49            let mut module_errors = self.module_errors.borrow_mut();
50            self.errors.borrow_mut().append(&mut *module_errors);
51        }
52        self.ctx = ctx
53    }
54
55    #[inline]
56    fn ctx(&self) -> Context {
57        self.ctx
58    }
59
60    #[inline]
61    fn ctx_mut(&mut self) -> &mut Context {
62        &mut self.ctx
63    }
64
65    #[inline]
66    fn syntax(&self) -> SyntaxFlags {
67        self.syntax
68    }
69
70    #[inline]
71    fn target(&self) -> EsVersion {
72        self.target
73    }
74
75    #[inline]
76    fn start_pos(&self) -> BytePos {
77        self.start_pos
78    }
79
80    #[inline]
81    fn set_expr_allowed(&mut self, _: bool) {}
82
83    #[inline]
84    fn set_next_regexp(&mut self, start: Option<BytePos>) {
85        self.state.next_regexp = start;
86    }
87
88    #[inline]
89    fn token_context(&self) -> &TokenContexts {
90        unreachable!();
91    }
92
93    #[inline]
94    fn token_context_mut(&mut self) -> &mut TokenContexts {
95        unreachable!();
96    }
97
98    #[inline]
99    fn set_token_context(&mut self, _: TokenContexts) {
100        unreachable!();
101    }
102
103    fn add_error(&self, error: Error) {
104        self.errors.borrow_mut().push(error);
105    }
106
107    fn add_module_mode_error(&self, error: Error) {
108        if self.ctx.contains(Context::Module) {
109            self.add_error(error);
110            return;
111        }
112        self.module_errors.borrow_mut().push(error);
113    }
114
115    #[inline]
116    fn take_errors(&mut self) -> Vec<Error> {
117        take(&mut self.errors.borrow_mut())
118    }
119
120    #[inline]
121    fn take_script_module_errors(&mut self) -> Vec<Error> {
122        take(&mut self.module_errors.borrow_mut())
123    }
124
125    #[inline]
126    fn end_pos(&self) -> BytePos {
127        self.input.end_pos()
128    }
129
130    #[inline]
131    fn update_token_flags(&mut self, f: impl FnOnce(&mut swc_ecma_lexer::lexer::TokenFlags)) {
132        f(&mut self.token_flags)
133    }
134
135    #[inline]
136    fn token_flags(&self) -> swc_ecma_lexer::lexer::TokenFlags {
137        self.token_flags
138    }
139}
140
141impl crate::input::Tokens for Lexer<'_> {
142    fn clone_token_value(&self) -> Option<TokenValue> {
143        self.state.token_value.clone()
144    }
145
146    fn get_token_value(&self) -> Option<&TokenValue> {
147        self.state.token_value.as_ref()
148    }
149
150    fn set_token_value(&mut self, token_value: Option<TokenValue>) {
151        self.state.token_value = token_value;
152    }
153
154    fn take_token_value(&mut self) -> Option<TokenValue> {
155        self.state.token_value.take()
156    }
157
158    fn rescan_jsx_token(
159        &mut self,
160        allow_multiline_jsx_text: bool,
161        reset: BytePos,
162    ) -> Option<TokenAndSpan> {
163        unsafe {
164            self.input.reset_to(reset);
165        }
166        Tokens::scan_jsx_token(self, allow_multiline_jsx_text)
167    }
168
169    fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> Option<TokenAndSpan> {
170        unsafe {
171            self.input.reset_to(reset);
172        }
173        Tokens::scan_jsx_open_el_terminal_token(self)
174    }
175
176    fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> Option<TokenAndSpan> {
177        let start = self.cur_pos();
178        let res = match self.scan_jsx_token(allow_multiline_jsx_text) {
179            Ok(res) => Ok(res),
180            Err(error) => {
181                self.state.set_token_value(TokenValue::Error(error));
182                Err(Token::Error)
183            }
184        };
185        let token = match res.map_err(Some) {
186            Ok(t) => t,
187            Err(e) => e,
188        };
189        let span = self.span(start);
190        if let Some(token) = token {
191            if let Some(comments) = self.comments_buffer.as_mut() {
192                for comment in comments.take_pending_leading() {
193                    comments.push(BufferedComment {
194                        kind: BufferedCommentKind::Leading,
195                        pos: start,
196                        comment,
197                    });
198                }
199            }
200
201            self.state.set_token_type(token);
202            self.state.prev_hi = self.last_pos();
203            self.state.had_line_break_before_last = self.had_line_break_before_last();
204        }
205        token.map(|token| {
206            // Attach span to token.
207            TokenAndSpan {
208                token,
209                had_line_break: self.had_line_break_before_last(),
210                span,
211            }
212        })
213    }
214
215    fn scan_jsx_open_el_terminal_token(&mut self) -> Option<TokenAndSpan> {
216        self.skip_space::<true>();
217        let start = self.input.cur_pos();
218        let res = match self.scan_jsx_attrs_terminal_token() {
219            Ok(res) => Ok(res),
220            Err(error) => {
221                self.state.set_token_value(TokenValue::Error(error));
222                Err(Token::Error)
223            }
224        };
225        let token = match res.map_err(Some) {
226            Ok(t) => t,
227            Err(e) => e,
228        };
229        let span = self.span(start);
230        if let Some(token) = token {
231            if let Some(comments) = self.comments_buffer.as_mut() {
232                for comment in comments.take_pending_leading() {
233                    comments.push(BufferedComment {
234                        kind: BufferedCommentKind::Leading,
235                        pos: start,
236                        comment,
237                    });
238                }
239            }
240
241            self.state.set_token_type(token);
242            self.state.prev_hi = self.last_pos();
243            self.state.had_line_break_before_last = self.had_line_break_before_last();
244        }
245        token.map(|token| {
246            // Attach span to token.
247            TokenAndSpan {
248                token,
249                had_line_break: self.had_line_break_before_last(),
250                span,
251            }
252        })
253    }
254
255    fn scan_jsx_identifier(&mut self, start: BytePos) -> TokenAndSpan {
256        let token = self.state.token_type.unwrap();
257        debug_assert!(token.is_word());
258        let mut v = String::with_capacity(16);
259        while let Some(ch) = self.input().cur() {
260            if ch == '-' {
261                v.push(ch);
262                self.bump();
263            } else {
264                let old_pos = self.cur_pos();
265                v.push_str(&self.scan_identifier_parts());
266                if self.cur_pos() == old_pos {
267                    break;
268                }
269            }
270        }
271        let v = if !v.is_empty() {
272            let v = if token.is_known_ident() {
273                format!("{}{}", token.to_string(None), v)
274            } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
275                format!("{value}{v}")
276            } else {
277                format!("{}{}", token.to_string(None), v)
278            };
279            self.atom(v)
280        } else if token.is_known_ident() || token.is_keyword() {
281            self.atom(token.to_string(None))
282        } else if let Some(TokenValue::Word(value)) = self.state.token_value.take() {
283            value
284        } else {
285            self.atom(token.to_string(None))
286        };
287        self.state.set_token_value(TokenValue::Word(v));
288        TokenAndSpan {
289            token: Token::JSXName,
290            had_line_break: self.had_line_break_before_last(),
291            span: self.span(start),
292        }
293    }
294
295    fn scan_jsx_attribute_value(&mut self) -> Option<TokenAndSpan> {
296        let Some(cur) = self.cur() else {
297            return self.next();
298        };
299        let start = self.cur_pos();
300
301        match cur {
302            '\'' | '"' => {
303                let token = self.read_jsx_str(cur).ok()?;
304                debug_assert!(self
305                    .get_token_value()
306                    .is_some_and(|t| matches!(t, TokenValue::Str { .. })));
307                debug_assert!(token == Token::Str);
308                Some(TokenAndSpan {
309                    token,
310                    had_line_break: self.had_line_break_before_last(),
311                    span: self.span(start),
312                })
313            }
314            _ => self.next(),
315        }
316    }
317
318    fn rescan_template_token(
319        &mut self,
320        start: BytePos,
321        start_with_back_tick: bool,
322    ) -> Option<TokenAndSpan> {
323        unsafe { self.input.reset_to(start) };
324        let res = self
325            .scan_template_token(start, start_with_back_tick)
326            .map(Some);
327        let token = match res
328            .map_err(|e| {
329                self.state.set_token_value(TokenValue::Error(e));
330                Token::Error
331            })
332            .map_err(Some)
333        {
334            Ok(t) => t,
335            Err(e) => e,
336        };
337        let span = if start_with_back_tick {
338            self.span(start)
339        } else {
340            // `+ BytePos(1)` is used to skip `{`
341            self.span(start + BytePos(1))
342        };
343        if let Some(token) = token {
344            if let Some(comments) = self.comments_buffer.as_mut() {
345                for comment in comments.take_pending_leading() {
346                    comments.push(BufferedComment {
347                        kind: BufferedCommentKind::Leading,
348                        pos: start,
349                        comment,
350                    });
351                }
352            }
353
354            self.state.set_token_type(token);
355            self.state.prev_hi = self.last_pos();
356            self.state.had_line_break_before_last = self.had_line_break_before_last();
357        }
358        token.map(|token| {
359            // Attach span to token.
360            TokenAndSpan {
361                token,
362                had_line_break: self.had_line_break_before_last(),
363                span,
364            }
365        })
366    }
367}
368
369impl Lexer<'_> {
370    fn next_token(&mut self, start: &mut BytePos) -> Result<Option<Token>, Error> {
371        if let Some(start) = self.state.next_regexp {
372            return Ok(Some(self.read_regexp(start)?));
373        }
374
375        if self.state.is_first {
376            if let Some(shebang) = self.read_shebang()? {
377                self.state.set_token_value(TokenValue::Word(shebang));
378                return Ok(Some(Token::Shebang));
379            }
380        }
381
382        self.state.had_line_break = self.state.is_first;
383        self.state.is_first = false;
384
385        self.skip_space::<true>();
386        *start = self.input.cur_pos();
387
388        if self.input.last_pos() == self.input.end_pos() {
389            // End of input.
390            self.consume_pending_comments();
391            return Ok(None);
392        }
393
394        // println!(
395        //     "\tContext: ({:?}) {:?}",
396        //     self.input.cur().unwrap(),
397        //     self.state.context.0
398        // );
399
400        self.state.start = *start;
401
402        self.read_token()
403    }
404
405    fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> Result<Option<Token>, Error> {
406        debug_assert!(self.syntax.jsx());
407
408        if self.input_mut().as_str().is_empty() {
409            return Ok(None);
410        };
411
412        if self.input.eat_byte(b'<') {
413            return Ok(Some(if self.input.eat_byte(b'/') {
414                Token::LessSlash
415            } else {
416                Token::Lt
417            }));
418        } else if self.input.eat_byte(b'{') {
419            return Ok(Some(Token::LBrace));
420        }
421
422        let start = self.input.cur_pos();
423        let mut first_non_whitespace = 0;
424        let mut chunk_start = start;
425        let mut value = String::new();
426
427        while let Some(ch) = self.input_mut().cur() {
428            if ch == '{' {
429                break;
430            } else if ch == '<' {
431                // TODO: check git conflict mark
432                break;
433            }
434
435            if ch == '>' {
436                self.emit_error(
437                    self.input().cur_pos(),
438                    SyntaxError::UnexpectedTokenWithSuggestions {
439                        candidate_list: vec!["`{'>'}`", "`&gt;`"],
440                    },
441                );
442            } else if ch == '}' {
443                self.emit_error(
444                    self.input().cur_pos(),
445                    SyntaxError::UnexpectedTokenWithSuggestions {
446                        candidate_list: vec!["`{'}'}`", "`&rbrace;`"],
447                    },
448                );
449            }
450
451            if first_non_whitespace == 0 && ch.is_line_terminator() {
452                first_non_whitespace = -1;
453            } else if !allow_multiline_jsx_text
454                && ch.is_line_terminator()
455                && first_non_whitespace > 0
456            {
457                break;
458            } else if ch.is_whitespace() {
459                first_non_whitespace = self.cur_pos().0 as i32;
460            }
461
462            if ch == '&' {
463                let cur_pos = self.input().cur_pos();
464
465                let s = unsafe {
466                    // Safety: We already checked for the range
467                    self.input_slice(chunk_start, cur_pos)
468                };
469                value.push_str(s);
470
471                if let Ok(jsx_entity) = self.read_jsx_entity() {
472                    value.push(jsx_entity.0);
473
474                    chunk_start = self.input.cur_pos();
475                }
476            } else {
477                self.bump();
478            }
479        }
480
481        let end = self.input().cur_pos();
482        let raw = unsafe {
483            // Safety: Both of `start` and `end` are generated from `cur_pos()`
484            self.input_slice(start, end)
485        };
486        let value = if value.is_empty() {
487            self.atom(raw)
488        } else {
489            let s = unsafe {
490                // Safety: We already checked for the range
491                self.input_slice(chunk_start, end)
492            };
493            value.push_str(s);
494            self.atom(value)
495        };
496
497        let raw: swc_atoms::Atom = self.atom(raw);
498
499        self.state.set_token_value(TokenValue::Str { raw, value });
500
501        self.state.start = start;
502
503        Ok(Some(Token::JSXText))
504    }
505
506    fn scan_jsx_attrs_terminal_token(&mut self) -> LexResult<Option<Token>> {
507        if self.input_mut().as_str().is_empty() {
508            Ok(None)
509        } else if self.input.eat_byte(b'>') {
510            Ok(Some(Token::Gt))
511        } else if self.input.eat_byte(b'/') {
512            Ok(Some(Token::Slash))
513        } else {
514            self.read_token()
515        }
516    }
517
518    fn scan_identifier_parts(&mut self) -> String {
519        let mut v = String::with_capacity(16);
520        while let Some(ch) = self.input().cur() {
521            if ch.is_ident_part() {
522                v.push(ch);
523                self.input_mut().bump_bytes(ch.len_utf8());
524            } else if ch == '\\' {
525                self.bump(); // bump '\'
526                if !self.is(b'u') {
527                    self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
528                    continue;
529                }
530                self.bump(); // bump 'u'
531                let Ok(chars) = self.read_unicode_escape() else {
532                    self.emit_error(self.cur_pos(), SyntaxError::InvalidUnicodeEscape);
533                    break;
534                };
535                for c in chars {
536                    v.extend(c);
537                }
538                self.token_flags |= swc_ecma_lexer::lexer::TokenFlags::UNICODE;
539            } else {
540                break;
541            }
542        }
543        v
544    }
545}
546
547impl Iterator for Lexer<'_> {
548    type Item = TokenAndSpan;
549
550    fn next(&mut self) -> Option<Self::Item> {
551        let mut start = self.cur_pos();
552
553        let res = match self.next_token(&mut start) {
554            Ok(res) => Ok(res),
555            Err(error) => {
556                self.state.set_token_value(TokenValue::Error(error));
557                Err(Token::Error)
558            }
559        };
560        let token = match res.map_err(Some) {
561            Ok(t) => t,
562            Err(e) => e,
563        };
564
565        let span = self.span(start);
566        if let Some(token) = token {
567            if let Some(comments) = self.comments_buffer.as_mut() {
568                for comment in comments.take_pending_leading() {
569                    comments.push(BufferedComment {
570                        kind: BufferedCommentKind::Leading,
571                        pos: start,
572                        comment,
573                    });
574                }
575            }
576
577            self.state.set_token_type(token);
578            self.state.prev_hi = self.last_pos();
579            self.state.had_line_break_before_last = self.had_line_break_before_last();
580        }
581
582        token.map(|token| {
583            // Attach span to token.
584            TokenAndSpan {
585                token,
586                had_line_break: self.had_line_break_before_last(),
587                span,
588            }
589        })
590    }
591}
592
593impl State {
594    pub fn new(start_pos: BytePos) -> Self {
595        State {
596            had_line_break: false,
597            had_line_break_before_last: false,
598            is_first: true,
599            next_regexp: None,
600            start: BytePos(0),
601            prev_hi: start_pos,
602            token_value: None,
603            token_type: None,
604        }
605    }
606
607    pub(crate) fn set_token_value(&mut self, token_value: TokenValue) {
608        self.token_value = Some(token_value);
609    }
610}
611
612impl swc_ecma_lexer::common::lexer::state::State for State {
613    type TokenKind = Token;
614    type TokenType = Token;
615
616    #[inline(always)]
617    fn is_expr_allowed(&self) -> bool {
618        unreachable!("is_expr_allowed should not be called in Parser/State")
619    }
620
621    #[inline(always)]
622    fn set_is_expr_allowed(&mut self, _: bool) {
623        // noop
624    }
625
626    #[inline(always)]
627    fn set_next_regexp(&mut self, start: Option<BytePos>) {
628        self.next_regexp = start;
629    }
630
631    #[inline(always)]
632    fn had_line_break(&self) -> bool {
633        self.had_line_break
634    }
635
636    #[inline(always)]
637    fn mark_had_line_break(&mut self) {
638        self.had_line_break = true;
639    }
640
641    #[inline(always)]
642    fn had_line_break_before_last(&self) -> bool {
643        self.had_line_break_before_last
644    }
645
646    #[inline(always)]
647    fn token_contexts(&self) -> &swc_ecma_lexer::TokenContexts {
648        unreachable!();
649    }
650
651    #[inline(always)]
652    fn mut_token_contexts(&mut self) -> &mut swc_ecma_lexer::TokenContexts {
653        unreachable!();
654    }
655
656    #[inline(always)]
657    fn set_token_type(&mut self, token_type: Self::TokenType) {
658        self.token_type = Some(token_type);
659    }
660
661    #[inline(always)]
662    fn token_type(&self) -> Option<Self::TokenType> {
663        self.token_type
664    }
665
666    #[inline(always)]
667    fn syntax(&self) -> SyntaxFlags {
668        unreachable!("syntax is not stored in State, but in Lexer")
669    }
670
671    #[inline(always)]
672    fn prev_hi(&self) -> BytePos {
673        self.prev_hi
674    }
675
676    #[inline(always)]
677    fn start(&self) -> BytePos {
678        self.start
679    }
680}