swc_ecma_parser/parser/
mod.rs

1#![allow(clippy::let_unit_value)]
2#![deny(non_snake_case)]
3
4use rustc_hash::FxHashMap;
5use swc_atoms::Atom;
6use swc_common::{comments::Comments, input::StringInput, BytePos, Span, Spanned};
7use swc_ecma_ast::*;
8
9use crate::{
10    error::SyntaxError,
11    input::Buffer,
12    lexer::{Token, TokenAndSpan},
13    parser::{
14        input::Tokens,
15        state::{State, WithState},
16        util::ExprExt,
17    },
18    syntax::SyntaxFlags,
19    Context, Syntax,
20};
21#[cfg(test)]
22extern crate test;
23#[cfg(test)]
24use test::Bencher;
25
26use crate::error::Error;
27
28#[macro_use]
29mod macros;
30mod class_and_fn;
31mod expr;
32mod ident;
33pub mod input;
34mod jsx;
35mod module_item;
36mod object;
37mod pat;
38mod state;
39mod stmt;
40#[cfg(test)]
41mod tests;
42#[cfg(feature = "typescript")]
43mod typescript;
44mod util;
45#[cfg(feature = "verify")]
46mod verifier;
47
48pub type PResult<T> = Result<T, crate::error::Error>;
49
50pub struct ParserCheckpoint<I: Tokens> {
51    lexer: I::Checkpoint,
52    buffer_prev_span: Span,
53    buffer_cur: TokenAndSpan,
54    buffer_next: Option<crate::lexer::NextTokenAndSpan>,
55}
56
57/// EcmaScript parser.
58#[derive(Clone)]
59pub struct Parser<I: self::input::Tokens> {
60    state: State,
61    input: self::input::Buffer<I>,
62    found_module_item: bool,
63}
64
65impl<I: Tokens> Parser<I> {
66    #[inline(always)]
67    pub fn input(&self) -> &Buffer<I> {
68        &self.input
69    }
70
71    #[inline(always)]
72    pub fn input_mut(&mut self) -> &mut Buffer<I> {
73        &mut self.input
74    }
75
76    #[inline(always)]
77    fn state(&self) -> &State {
78        &self.state
79    }
80
81    #[inline(always)]
82    fn state_mut(&mut self) -> &mut State {
83        &mut self.state
84    }
85
86    fn checkpoint_save(&self) -> ParserCheckpoint<I> {
87        ParserCheckpoint {
88            lexer: self.input.iter.checkpoint_save(),
89            buffer_cur: self.input.cur,
90            buffer_next: self.input.next.clone(),
91            buffer_prev_span: self.input.prev_span,
92        }
93    }
94
95    fn checkpoint_load(&mut self, checkpoint: ParserCheckpoint<I>) {
96        self.input.iter.checkpoint_load(checkpoint.lexer);
97        self.input.cur = checkpoint.buffer_cur;
98        self.input.next = checkpoint.buffer_next;
99        self.input.prev_span = checkpoint.buffer_prev_span;
100    }
101
102    #[inline(always)]
103    fn mark_found_module_item(&mut self) {
104        self.found_module_item = true;
105    }
106}
107
108impl<'a> Parser<crate::lexer::Lexer<'a>> {
109    pub fn new(syntax: Syntax, input: StringInput<'a>, comments: Option<&'a dyn Comments>) -> Self {
110        let lexer = crate::lexer::Lexer::new(syntax, Default::default(), input, comments);
111        Self::new_from(lexer)
112    }
113}
114
115impl<I: Tokens> Parser<I> {
116    pub fn new_from(mut input: I) -> Self {
117        let in_declare = input.syntax().dts();
118        let mut ctx = input.ctx() | Context::TopLevel;
119        ctx.set(Context::InDeclare, in_declare);
120        input.set_ctx(ctx);
121
122        let mut p = Parser {
123            state: Default::default(),
124            input: crate::parser::input::Buffer::new(input),
125            found_module_item: false,
126        };
127        p.input.bump(); // consume EOF
128        p
129    }
130
131    pub fn take_errors(&mut self) -> Vec<Error> {
132        self.input.iter.take_errors()
133    }
134
135    pub fn take_script_module_errors(&mut self) -> Vec<Error> {
136        self.input.iter.take_script_module_errors()
137    }
138
139    pub fn parse_script(&mut self) -> PResult<Script> {
140        trace_cur!(self, parse_script);
141
142        let ctx = (self.ctx() & !Context::Module) | Context::TopLevel;
143        self.set_ctx(ctx);
144
145        let start = self.cur_pos();
146
147        let shebang = self.parse_shebang()?;
148
149        let ret = self.parse_stmt_block_body(true, None).map(|body| Script {
150            span: self.span(start),
151            body,
152            shebang,
153        })?;
154
155        debug_assert!(self.input().cur() == Token::Eof);
156        self.input_mut().bump();
157
158        Ok(ret)
159    }
160
161    pub fn parse_commonjs(&mut self) -> PResult<Script> {
162        trace_cur!(self, parse_commonjs);
163
164        // CommonJS module is acctually in a function scope
165        let ctx = (self.ctx() & !Context::Module)
166            | Context::InFunction
167            | Context::InsideNonArrowFunctionScope;
168        self.set_ctx(ctx);
169
170        let start = self.cur_pos();
171        let shebang = self.parse_shebang()?;
172
173        let ret = self.parse_stmt_block_body(true, None).map(|body| Script {
174            span: self.span(start),
175            body,
176            shebang,
177        })?;
178
179        debug_assert!(self.input().cur() == Token::Eof);
180        self.input_mut().bump();
181
182        Ok(ret)
183    }
184
185    pub fn parse_typescript_module(&mut self) -> PResult<Module> {
186        trace_cur!(self, parse_typescript_module);
187
188        debug_assert!(self.syntax().typescript());
189
190        //TODO: parse() -> PResult<Program>
191        let ctx = (self.ctx() | Context::Module | Context::TopLevel) & !Context::Strict;
192        // Module code is always in strict mode
193        self.set_ctx(ctx);
194
195        let start = self.cur_pos();
196        let shebang = self.parse_shebang()?;
197
198        let ret = self
199            .parse_module_item_block_body(true, None)
200            .map(|body| Module {
201                span: self.span(start),
202                body,
203                shebang,
204            })?;
205
206        debug_assert!(self.input().cur() == Token::Eof);
207        self.input_mut().bump();
208
209        Ok(ret)
210    }
211
212    /// Returns [Module] if it's a module and returns [Script] if it's not a
213    /// module.
214    ///
215    /// Note: This is not perfect yet. It means, some strict mode violations may
216    /// not be reported even if the method returns [Module].
217    pub fn parse_program(&mut self) -> PResult<Program> {
218        let start = self.cur_pos();
219        let shebang = self.parse_shebang()?;
220
221        let body: Vec<ModuleItem> = self
222            .do_inside_of_context(Context::CanBeModule.union(Context::TopLevel), |p| {
223                p.parse_module_item_block_body(true, None)
224            })?;
225        let has_module_item = self.found_module_item
226            || body
227                .iter()
228                .any(|item| matches!(item, ModuleItem::ModuleDecl(..)));
229        if has_module_item && !self.ctx().contains(Context::Module) {
230            let ctx = self.ctx()
231                | Context::Module
232                | Context::CanBeModule
233                | Context::TopLevel
234                | Context::Strict;
235            // Emit buffered strict mode / module code violations
236            self.input.set_ctx(ctx);
237        }
238
239        let ret = if has_module_item {
240            Program::Module(Module {
241                span: self.span(start),
242                body,
243                shebang,
244            })
245        } else {
246            let body = body
247                .into_iter()
248                .map(|item| match item {
249                    ModuleItem::ModuleDecl(_) => unreachable!("Module is handled above"),
250                    ModuleItem::Stmt(stmt) => stmt,
251                    #[cfg(swc_ast_unknown)]
252                    _ => unreachable!(),
253                })
254                .collect();
255            Program::Script(Script {
256                span: self.span(start),
257                body,
258                shebang,
259            })
260        };
261
262        debug_assert!(self.input().cur() == Token::Eof);
263        self.input_mut().bump();
264
265        Ok(ret)
266    }
267
268    pub fn parse_module(&mut self) -> PResult<Module> {
269        let ctx = self.ctx()
270            | Context::Module
271            | Context::CanBeModule
272            | Context::TopLevel
273            | Context::Strict;
274        // Module code is always in strict mode
275        self.set_ctx(ctx);
276
277        let start = self.cur_pos();
278        let shebang = self.parse_shebang()?;
279
280        let ret = self
281            .parse_module_item_block_body(true, None)
282            .map(|body| Module {
283                span: self.span(start),
284                body,
285                shebang,
286            })?;
287
288        debug_assert!(self.input().cur() == Token::Eof);
289        self.input_mut().bump();
290
291        Ok(ret)
292    }
293
294    pub fn parse_shebang(&mut self) -> PResult<Option<Atom>> {
295        let cur = self.input().cur();
296        Ok(if cur == Token::Shebang {
297            let ret = self.input_mut().expect_shebang_token_and_bump();
298            Some(ret)
299        } else {
300            None
301        })
302    }
303}
304
305impl<I: Tokens> Parser<I> {
306    #[inline(always)]
307    pub fn with_state<'w>(&'w mut self, state: State) -> WithState<'w, I> {
308        let orig_state = std::mem::replace(self.state_mut(), state);
309        WithState {
310            orig_state,
311            inner: self,
312        }
313    }
314
315    #[inline(always)]
316    pub fn ctx(&self) -> Context {
317        self.input().get_ctx()
318    }
319
320    #[inline(always)]
321    pub fn set_ctx(&mut self, ctx: Context) {
322        self.input_mut().set_ctx(ctx);
323    }
324
325    #[inline]
326    pub fn do_inside_of_context<T>(
327        &mut self,
328        context: Context,
329        f: impl FnOnce(&mut Self) -> T,
330    ) -> T {
331        let ctx = self.ctx();
332        let inserted = ctx.complement().intersection(context);
333        if inserted.is_empty() {
334            f(self)
335        } else {
336            self.input_mut().update_ctx(|ctx| ctx.insert(inserted));
337            let result = f(self);
338            self.input_mut().update_ctx(|ctx| ctx.remove(inserted));
339            result
340        }
341    }
342
343    pub fn do_outside_of_context<T>(
344        &mut self,
345        context: Context,
346        f: impl FnOnce(&mut Self) -> T,
347    ) -> T {
348        let ctx = self.ctx();
349        let removed = ctx.intersection(context);
350        if !removed.is_empty() {
351            self.input_mut().update_ctx(|ctx| ctx.remove(removed));
352            let result = f(self);
353            self.input_mut().update_ctx(|ctx| ctx.insert(removed));
354            result
355        } else {
356            f(self)
357        }
358    }
359
360    #[inline(always)]
361    pub fn strict_mode<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> T {
362        self.do_inside_of_context(Context::Strict, f)
363    }
364
365    /// Original context is restored when returned guard is dropped.
366    #[inline(always)]
367    pub fn in_type<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> T {
368        self.do_inside_of_context(Context::InType, f)
369    }
370
371    #[inline(always)]
372    pub fn allow_in_expr<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> T {
373        self.do_inside_of_context(Context::IncludeInExpr, f)
374    }
375
376    #[inline(always)]
377    pub fn disallow_in_expr<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> T {
378        self.do_outside_of_context(Context::IncludeInExpr, f)
379    }
380
381    #[inline(always)]
382    pub fn syntax(&self) -> SyntaxFlags {
383        self.input().syntax()
384    }
385
386    #[cold]
387    pub fn emit_err(&mut self, span: Span, error: SyntaxError) {
388        if self.ctx().contains(Context::IgnoreError) || !self.syntax().early_errors() {
389            return;
390        }
391        self.emit_error(crate::error::Error::new(span, error))
392    }
393
394    #[cold]
395    pub fn emit_error(&mut self, error: crate::error::Error) {
396        if self.ctx().contains(Context::IgnoreError) || !self.syntax().early_errors() {
397            return;
398        }
399        let cur = self.input().cur();
400        if cur == Token::Error {
401            let err = self.input_mut().expect_error_token_and_bump();
402            self.input_mut().iter_mut().add_error(err);
403        }
404        self.input_mut().iter_mut().add_error(error);
405    }
406
407    #[cold]
408    pub fn emit_strict_mode_err(&mut self, span: Span, error: SyntaxError) {
409        if self.ctx().contains(Context::IgnoreError) {
410            return;
411        }
412        let error = crate::error::Error::new(span, error);
413        if self.ctx().contains(Context::Strict) {
414            self.input_mut().iter_mut().add_error(error);
415        } else {
416            self.input_mut().iter_mut().add_module_mode_error(error);
417        }
418    }
419
420    pub fn verify_expr(&mut self, expr: Box<Expr>) -> PResult<Box<Expr>> {
421        #[cfg(feature = "verify")]
422        {
423            use swc_ecma_visit::Visit;
424            let mut v = self::verifier::Verifier { errors: Vec::new() };
425            v.visit_expr(&expr);
426            for (span, error) in v.errors {
427                self.emit_err(span, error);
428            }
429        }
430        Ok(expr)
431    }
432
433    #[inline(always)]
434    pub fn cur_pos(&self) -> BytePos {
435        self.input().cur_pos()
436    }
437
438    #[inline(always)]
439    pub fn last_pos(&self) -> BytePos {
440        self.input().prev_span().hi
441    }
442
443    #[inline]
444    pub fn is_general_semi(&mut self) -> bool {
445        let cur = self.input().cur();
446        matches!(cur, Token::Semi | Token::RBrace | Token::Eof)
447            || self.input().had_line_break_before_cur()
448    }
449
450    pub fn eat_general_semi(&mut self) -> bool {
451        if cfg!(feature = "debug") {
452            tracing::trace!("eat(';'): cur={:?}", self.input().cur());
453        }
454        let cur = self.input().cur();
455        if cur == Token::Semi {
456            self.bump();
457            true
458        } else {
459            cur == Token::RBrace || self.input().had_line_break_before_cur() || cur == Token::Eof
460        }
461    }
462
463    #[inline]
464    pub fn expect_general_semi(&mut self) -> PResult<()> {
465        if !self.eat_general_semi() {
466            let span = self.input().cur_span();
467            let cur = self.input_mut().dump_cur();
468            syntax_error!(self, span, SyntaxError::Expected(";".to_string(), cur))
469        }
470        Ok(())
471    }
472
473    #[inline]
474    pub fn expect(&mut self, t: Token) -> PResult<()> {
475        if !self.input_mut().eat(t) {
476            let span = self.input().cur_span();
477            let cur = self.input_mut().dump_cur();
478            syntax_error!(self, span, SyntaxError::Expected(format!("{t:?}"), cur))
479        } else {
480            Ok(())
481        }
482    }
483
484    #[inline(always)]
485    pub fn expect_without_advance(&mut self, t: Token) -> PResult<()> {
486        if !self.input_mut().is(t) {
487            let span = self.input().cur_span();
488            let cur = self.input_mut().dump_cur();
489            syntax_error!(self, span, SyntaxError::Expected(format!("{t:?}"), cur))
490        } else {
491            Ok(())
492        }
493    }
494
495    #[inline(always)]
496    pub fn bump(&mut self) {
497        debug_assert!(
498            self.input().cur() != Token::Eof,
499            "parser should not call bump() without knowing current token"
500        );
501        self.input_mut().bump()
502    }
503
504    #[inline]
505    pub fn span(&self, start: BytePos) -> Span {
506        let end = self.last_pos();
507        debug_assert!(
508            start <= end,
509            "assertion failed: (span.start <= span.end). start = {start:?}, end = {end:?}",
510        );
511        Span::new_with_checked(start, end)
512    }
513
514    #[inline(always)]
515    pub fn assert_and_bump(&mut self, token: Token) {
516        debug_assert!(
517            self.input().is(token),
518            "assertion failed: expected {token:?}, got {:?}",
519            self.input().cur()
520        );
521        self.bump();
522    }
523
524    pub fn check_assign_target(&mut self, expr: &Expr, deny_call: bool) {
525        if !expr.is_valid_simple_assignment_target(self.ctx().contains(Context::Strict)) {
526            self.emit_err(expr.span(), SyntaxError::TS2406);
527        }
528
529        // We follow behavior of tsc
530        if self.input().syntax().typescript() && self.syntax().early_errors() {
531            let is_eval_or_arguments = match expr {
532                Expr::Ident(i) => i.is_reserved_in_strict_bind(),
533                _ => false,
534            };
535
536            if is_eval_or_arguments {
537                self.emit_strict_mode_err(expr.span(), SyntaxError::TS1100);
538            }
539
540            fn should_deny(e: &Expr, deny_call: bool) -> bool {
541                match e {
542                    Expr::Lit(..) => false,
543                    Expr::Call(..) => deny_call,
544                    Expr::Bin(..) => false,
545                    Expr::Paren(ref p) => should_deny(&p.expr, deny_call),
546
547                    _ => true,
548                }
549            }
550
551            // It is an early Reference Error if LeftHandSideExpression is neither
552            // an ObjectLiteral nor an ArrayLiteral and
553            // IsValidSimpleAssignmentTarget of LeftHandSideExpression is false.
554            if !is_eval_or_arguments
555                && !expr.is_valid_simple_assignment_target(self.ctx().contains(Context::Strict))
556                && should_deny(expr, deny_call)
557            {
558                self.emit_err(expr.span(), SyntaxError::TS2406);
559            }
560        }
561    }
562
563    /// spec: 'PropertyName'
564    pub fn parse_prop_name(&mut self) -> PResult<PropName> {
565        trace_cur!(self, parse_prop_name);
566        self.do_inside_of_context(Context::InPropertyName, |p| {
567            let start = p.input().cur_pos();
568            let cur = p.input().cur();
569            let v = if cur == Token::Str {
570                PropName::Str(p.parse_str_lit())
571            } else if cur == Token::Num {
572                let (value, raw) = p.input_mut().expect_number_token_and_bump();
573                PropName::Num(Number {
574                    span: p.span(start),
575                    value,
576                    raw: Some(raw),
577                })
578            } else if cur == Token::BigInt {
579                let (value, raw) = p.input_mut().expect_bigint_token_and_bump();
580                PropName::BigInt(BigInt {
581                    span: p.span(start),
582                    value,
583                    raw: Some(raw),
584                })
585            } else if cur.is_word() {
586                let w = p.input_mut().expect_word_token_and_bump();
587                PropName::Ident(IdentName::new(w, p.span(start)))
588            } else if cur == Token::LBracket {
589                p.bump();
590                let inner_start = p.input().cur_pos();
591                let mut expr = p.allow_in_expr(Self::parse_assignment_expr)?;
592                if p.syntax().typescript() && p.input().is(Token::Comma) {
593                    let mut exprs = vec![expr];
594                    while p.input_mut().eat(Token::Comma) {
595                        //
596                        exprs.push(p.allow_in_expr(Self::parse_assignment_expr)?);
597                    }
598                    p.emit_err(p.span(inner_start), SyntaxError::TS1171);
599                    expr = Box::new(
600                        SeqExpr {
601                            span: p.span(inner_start),
602                            exprs,
603                        }
604                        .into(),
605                    );
606                }
607                expect!(p, Token::RBracket);
608                PropName::Computed(ComputedPropName {
609                    span: p.span(start),
610                    expr,
611                })
612            } else {
613                unexpected!(
614                    p,
615                    "identifier, string literal, numeric literal or [ for the computed key"
616                )
617            };
618            Ok(v)
619        })
620    }
621
622    #[inline]
623    pub fn is_ident_ref(&mut self) -> bool {
624        let cur = self.input().cur();
625        cur.is_word() && !cur.is_reserved(self.ctx())
626    }
627
628    #[inline]
629    pub fn peek_is_ident_ref(&mut self) -> bool {
630        let ctx = self.ctx();
631        peek!(self).is_some_and(|peek| peek.is_word() && !peek.is_reserved(ctx))
632    }
633
634    #[inline(always)]
635    pub fn eat_ident_ref(&mut self) -> bool {
636        if self.is_ident_ref() {
637            self.bump();
638            true
639        } else {
640            false
641        }
642    }
643
644    #[cold]
645    #[inline(never)]
646    pub fn eof_error(&mut self) -> Error {
647        debug_assert!(
648            self.input().cur() == Token::Eof,
649            "Parser should not call throw_eof_error() without knowing current token"
650        );
651        let pos = self.input().end_pos();
652        let last = Span { lo: pos, hi: pos };
653        Error::new(last, SyntaxError::Eof)
654    }
655}
656
657#[cfg(test)]
658pub fn test_parser<F, Ret>(s: &'static str, syntax: Syntax, f: F) -> Ret
659where
660    F: FnOnce(&mut Parser<crate::lexer::Lexer>) -> Result<Ret, Error>,
661{
662    crate::with_test_sess(s, |handler, input| {
663        let lexer = crate::lexer::Lexer::new(syntax, EsVersion::Es2019, input, None);
664        let mut p = Parser::new_from(lexer);
665        let ret = f(&mut p);
666        let mut error = false;
667
668        for err in p.take_errors() {
669            error = true;
670            err.into_diagnostic(handler).emit();
671        }
672
673        let res = ret.map_err(|err| err.into_diagnostic(handler).emit())?;
674
675        if error {
676            return Err(());
677        }
678
679        Ok(res)
680    })
681    .unwrap_or_else(|output| panic!("test_parser(): failed to parse \n{s}\n{output}"))
682}
683
684#[cfg(test)]
685pub fn test_parser_comment<F, Ret>(c: &dyn Comments, s: &'static str, syntax: Syntax, f: F) -> Ret
686where
687    F: FnOnce(&mut Parser<crate::lexer::Lexer>) -> Result<Ret, Error>,
688{
689    crate::with_test_sess(s, |handler, input| {
690        let lexer = crate::lexer::Lexer::new(syntax, EsVersion::Es2019, input, Some(&c));
691        let mut p = Parser::new_from(lexer);
692        let ret = f(&mut p);
693
694        for err in p.take_errors() {
695            err.into_diagnostic(handler).emit();
696        }
697
698        ret.map_err(|err| err.into_diagnostic(handler).emit())
699    })
700    .unwrap_or_else(|output| panic!("test_parser(): failed to parse \n{s}\n{output}"))
701}
702
703#[cfg(test)]
704pub fn bench_parser<F>(b: &mut Bencher, s: &'static str, syntax: Syntax, mut f: F)
705where
706    F: for<'a> FnMut(&'a mut Parser<crate::lexer::Lexer<'a>>) -> PResult<()>,
707{
708    b.bytes = s.len() as u64;
709
710    let _ = crate::with_test_sess(s, |handler, input| {
711        b.iter(|| {
712            let lexer = crate::lexer::Lexer::new(syntax, Default::default(), input.clone(), None);
713            let _ =
714                f(&mut Parser::new_from(lexer)).map_err(|err| err.into_diagnostic(handler).emit());
715        });
716
717        Ok(())
718    });
719}