swc_ecma_parser/parser/
input.rs

1use swc_atoms::{Atom, Wtf8Atom};
2use swc_common::{BytePos, Span};
3use swc_ecma_ast::EsVersion;
4
5use crate::{
6    error::Error,
7    lexer::{LexResult, NextTokenAndSpan, Token, TokenAndSpan, TokenFlags, TokenValue},
8    syntax::SyntaxFlags,
9    Context,
10};
11
12/// Clone should be cheap if you are parsing typescript because typescript
13/// syntax requires backtracking.
14pub trait Tokens: Clone + Iterator<Item = TokenAndSpan> {
15    type Checkpoint;
16
17    fn set_ctx(&mut self, ctx: Context);
18    fn ctx(&self) -> Context;
19    fn ctx_mut(&mut self) -> &mut Context;
20    fn syntax(&self) -> SyntaxFlags;
21    fn target(&self) -> EsVersion;
22
23    fn checkpoint_save(&self) -> Self::Checkpoint;
24    fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint);
25
26    fn start_pos(&self) -> BytePos {
27        BytePos(0)
28    }
29
30    fn set_expr_allowed(&mut self, allow: bool);
31    fn set_next_regexp(&mut self, start: Option<BytePos>);
32
33    /// Implementors should use Rc<RefCell<Vec<Error>>>.
34    ///
35    /// It is required because parser should backtrack while parsing typescript
36    /// code.
37    fn add_error(&mut self, error: Error);
38
39    /// Add an error which is valid syntax in script mode.
40    ///
41    /// This errors should be dropped if it's not a module.
42    ///
43    /// Implementor should check for if [Context].module, and buffer errors if
44    /// module is false. Also, implementors should move errors to the error
45    /// buffer on set_ctx if the parser mode become module mode.
46    fn add_module_mode_error(&mut self, error: Error);
47
48    fn end_pos(&self) -> BytePos;
49
50    fn take_errors(&mut self) -> Vec<Error>;
51
52    /// If the program was parsed as a script, this contains the module
53    /// errors should the program be identified as a module in the future.
54    fn take_script_module_errors(&mut self) -> Vec<Error>;
55    fn update_token_flags(&mut self, f: impl FnOnce(&mut TokenFlags));
56    fn token_flags(&self) -> TokenFlags;
57
58    fn clone_token_value(&self) -> Option<TokenValue>;
59    fn take_token_value(&mut self) -> Option<TokenValue>;
60    fn get_token_value(&self) -> Option<&TokenValue>;
61    fn set_token_value(&mut self, token_value: Option<TokenValue>);
62
63    fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> TokenAndSpan;
64    fn scan_jsx_open_el_terminal_token(&mut self) -> TokenAndSpan;
65    fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> TokenAndSpan;
66    fn rescan_jsx_token(&mut self, allow_multiline_jsx_text: bool, reset: BytePos) -> TokenAndSpan;
67    fn scan_jsx_identifier(&mut self, start: BytePos) -> TokenAndSpan;
68    fn scan_jsx_attribute_value(&mut self) -> TokenAndSpan;
69    fn rescan_template_token(&mut self, start: BytePos, start_with_back_tick: bool)
70        -> TokenAndSpan;
71}
72
73/// This struct is responsible for managing current token and peeked token.
74#[derive(Clone)]
75pub struct Buffer<I> {
76    pub iter: I,
77    /// Span of the previous token.
78    pub prev_span: Span,
79    pub cur: TokenAndSpan,
80    /// Peeked token
81    pub next: Option<NextTokenAndSpan>,
82}
83
84impl<I: Tokens> Buffer<I> {
85    pub fn expect_word_token_value(&mut self) -> Atom {
86        let Some(crate::lexer::TokenValue::Word(word)) = self.iter.take_token_value() else {
87            unreachable!()
88        };
89        word
90    }
91
92    pub fn expect_word_token_value_ref(&self) -> &Atom {
93        let Some(crate::lexer::TokenValue::Word(word)) = self.iter.get_token_value() else {
94            unreachable!("token_value: {:?}", self.iter.get_token_value())
95        };
96        word
97    }
98
99    pub fn expect_number_token_value(&mut self) -> (f64, Atom) {
100        let Some(crate::lexer::TokenValue::Num { value, raw }) = self.iter.take_token_value()
101        else {
102            unreachable!()
103        };
104        (value, raw)
105    }
106
107    pub fn expect_string_token_value(&mut self) -> (Wtf8Atom, Atom) {
108        let Some(crate::lexer::TokenValue::Str { value, raw }) = self.iter.take_token_value()
109        else {
110            unreachable!()
111        };
112        (value, raw)
113    }
114
115    pub fn expect_bigint_token_value(&mut self) -> (Box<num_bigint::BigInt>, Atom) {
116        let Some(crate::lexer::TokenValue::BigInt { value, raw }) = self.iter.take_token_value()
117        else {
118            unreachable!()
119        };
120        (value, raw)
121    }
122
123    pub fn expect_regex_token_value(&mut self) -> (Atom, Atom) {
124        let Some(crate::lexer::TokenValue::Regex { value, flags }) = self.iter.take_token_value()
125        else {
126            unreachable!()
127        };
128        (value, flags)
129    }
130
131    pub fn expect_template_token_value(&mut self) -> (LexResult<Wtf8Atom>, Atom) {
132        let Some(crate::lexer::TokenValue::Template { cooked, raw }) = self.iter.take_token_value()
133        else {
134            unreachable!()
135        };
136        (cooked, raw)
137    }
138
139    pub fn expect_error_token_value(&mut self) -> Error {
140        let Some(crate::lexer::TokenValue::Error(error)) = self.iter.take_token_value() else {
141            unreachable!()
142        };
143        error
144    }
145
146    pub fn get_token_value(&self) -> Option<&TokenValue> {
147        self.iter.get_token_value()
148    }
149
150    pub fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) {
151        let prev = self.cur;
152        let t = self.iter.scan_jsx_token(allow_multiline_jsx_text);
153        self.prev_span = prev.span;
154        self.set_cur(t);
155    }
156
157    #[allow(unused)]
158    fn scan_jsx_open_el_terminal_token(&mut self) {
159        let prev = self.cur;
160        let t = self.iter.scan_jsx_open_el_terminal_token();
161        self.prev_span = prev.span;
162        self.set_cur(t);
163    }
164
165    pub fn rescan_jsx_open_el_terminal_token(&mut self) {
166        if !self.cur().should_rescan_into_gt_in_jsx() {
167            return;
168        }
169        // rescan `>=`, `>>`, `>>=`, `>>>`, `>>>=` into `>`
170        let start = self.cur.span.lo;
171        let t = self.iter.rescan_jsx_open_el_terminal_token(start);
172        self.set_cur(t);
173    }
174
175    pub fn rescan_jsx_token(&mut self, allow_multiline_jsx_text: bool) {
176        let start = self.cur.span.lo;
177        let t = self.iter.rescan_jsx_token(allow_multiline_jsx_text, start);
178        self.set_cur(t);
179    }
180
181    pub fn scan_jsx_identifier(&mut self) {
182        if !self.cur().is_word() {
183            return;
184        }
185        let start = self.cur.span.lo;
186        let cur = self.iter.scan_jsx_identifier(start);
187        debug_assert!(cur.token == Token::JSXName);
188        self.set_cur(cur);
189    }
190
191    pub fn scan_jsx_attribute_value(&mut self) {
192        self.cur = self.iter.scan_jsx_attribute_value();
193    }
194
195    pub fn rescan_template_token(&mut self, start_with_back_tick: bool) {
196        let start = self.cur_pos();
197        self.cur = self.iter.rescan_template_token(start, start_with_back_tick);
198    }
199}
200
201impl<I: Tokens> Buffer<I> {
202    pub fn new(lexer: I) -> Self {
203        let start_pos = lexer.start_pos();
204        let prev_span = Span::new_with_checked(start_pos, start_pos);
205        Buffer {
206            iter: lexer,
207            cur: TokenAndSpan::new(Token::Eof, prev_span, false),
208            prev_span,
209            next: None,
210        }
211    }
212
213    #[inline(always)]
214    pub fn set_cur(&mut self, token: TokenAndSpan) {
215        self.cur = token
216    }
217
218    #[inline(always)]
219    pub fn next(&self) -> Option<&NextTokenAndSpan> {
220        self.next.as_ref()
221    }
222
223    #[inline(always)]
224    pub fn set_next(&mut self, token: Option<NextTokenAndSpan>) {
225        self.next = token;
226    }
227
228    #[inline(always)]
229    pub fn next_mut(&mut self) -> &mut Option<NextTokenAndSpan> {
230        &mut self.next
231    }
232
233    #[inline(always)]
234    pub fn cur(&self) -> Token {
235        self.cur.token
236    }
237
238    #[inline(always)]
239    pub fn get_cur(&self) -> &TokenAndSpan {
240        &self.cur
241    }
242
243    #[inline(always)]
244    pub fn prev_span(&self) -> Span {
245        self.prev_span
246    }
247
248    #[inline(always)]
249    pub fn iter(&self) -> &I {
250        &self.iter
251    }
252
253    #[inline(always)]
254    pub fn iter_mut(&mut self) -> &mut I {
255        &mut self.iter
256    }
257
258    pub fn peek(&mut self) -> Option<Token> {
259        debug_assert!(
260            self.cur.token != Token::Eof,
261            "parser should not call peek() without knowing current token"
262        );
263
264        if self.next.is_none() {
265            let old = self.iter.take_token_value();
266            let next_token = self.iter.next();
267            self.next = next_token.map(|t| NextTokenAndSpan {
268                token_and_span: t,
269                value: self.iter.take_token_value(),
270            });
271            self.iter.set_token_value(old);
272        }
273
274        self.next.as_ref().map(|ts| ts.token_and_span.token)
275    }
276
277    pub fn store(&mut self, token: Token) {
278        debug_assert!(self.next().is_none());
279        debug_assert!(self.cur() != Token::Eof);
280        let span = self.prev_span();
281        let token = TokenAndSpan::new(token, span, false);
282        self.set_cur(token);
283    }
284
285    pub fn bump(&mut self) {
286        let next = if let Some(next) = self.next.take() {
287            self.iter.set_token_value(next.value);
288            next.token_and_span
289        } else if let Some(next) = self.iter.next() {
290            next
291        } else {
292            let eof_pos = self.cur.span.hi;
293            let eof_span = Span::new_with_checked(eof_pos, eof_pos);
294            TokenAndSpan::new(Token::Eof, eof_span, true)
295        };
296        self.prev_span = self.cur.span;
297        self.set_cur(next);
298    }
299
300    pub fn expect_word_token_and_bump(&mut self) -> Atom {
301        let cur = self.cur();
302        let word = cur.take_word(self).unwrap();
303        self.bump();
304        word
305    }
306
307    pub fn expect_shebang_token_and_bump(&mut self) -> swc_atoms::Atom {
308        let cur = self.cur();
309        let ret = cur.take_shebang(self);
310        self.bump();
311        ret
312    }
313
314    pub fn expect_jsx_name_token_and_bump(&mut self) -> Atom {
315        let cur = self.cur();
316        let word = cur.take_jsx_name(self);
317        self.bump();
318        word
319    }
320
321    pub fn expect_jsx_text_token_and_bump(&mut self) -> (Atom, Atom) {
322        let cur = self.cur();
323        let ret = cur.take_jsx_text(self);
324        self.bump();
325        ret
326    }
327
328    pub fn expect_number_token_and_bump(&mut self) -> (f64, Atom) {
329        let cur = self.cur();
330        let ret = cur.take_num(self);
331        self.bump();
332        ret
333    }
334
335    pub fn expect_string_token_and_bump(&mut self) -> (Wtf8Atom, Atom) {
336        let cur = self.cur();
337        let ret = cur.take_str(self);
338        self.bump();
339        ret
340    }
341
342    pub fn expect_bigint_token_and_bump(&mut self) -> (Box<num_bigint::BigInt>, Atom) {
343        let cur = self.cur();
344        let ret = cur.take_bigint(self);
345        self.bump();
346        ret
347    }
348
349    pub fn expect_regex_token_and_bump(&mut self) -> (Atom, Atom) {
350        let cur = self.cur();
351        let ret = cur.take_regexp(self);
352        self.bump();
353        ret
354    }
355
356    pub fn expect_template_token_and_bump(&mut self) -> (LexResult<Wtf8Atom>, Atom) {
357        let cur = self.cur();
358        let ret = cur.take_template(self);
359        self.bump();
360        ret
361    }
362
363    pub fn expect_error_token_and_bump(&mut self) -> Error {
364        let cur = self.cur();
365        let ret = cur.take_error(self);
366        self.bump();
367        ret
368    }
369
370    #[cold]
371    #[inline(never)]
372    pub fn dump_cur(&self) -> String {
373        let cur = self.cur();
374        cur.to_string(self.get_token_value())
375    }
376}
377
378impl<I: Tokens> Buffer<I> {
379    pub fn had_line_break_before_cur(&self) -> bool {
380        self.get_cur().had_line_break
381    }
382
383    /// This returns true on eof.
384    pub fn has_linebreak_between_cur_and_peeked(&mut self) -> bool {
385        let _ = self.peek();
386        self.next().map(|item| item.had_line_break()).unwrap_or({
387            // return true on eof.
388            true
389        })
390    }
391
392    pub fn cut_lshift(&mut self) {
393        debug_assert!(
394            self.is(Token::LShift),
395            "parser should only call cut_lshift when encountering LShift token"
396        );
397        let span = self.cur_span().with_lo(self.cur_span().lo + BytePos(1));
398        let token = TokenAndSpan::new(Token::Lt, span, false);
399        self.set_cur(token);
400    }
401
402    pub fn merge_lt_gt(&mut self) {
403        debug_assert!(
404            self.is(Token::Lt) || self.is(Token::Gt),
405            "parser should only call merge_lt_gt when encountering Less token"
406        );
407        if self.peek().is_none() {
408            return;
409        }
410        let span = self.cur_span();
411        let next = self.next().unwrap();
412        if span.hi != next.span().lo {
413            return;
414        }
415        let next = self.next_mut().take().unwrap();
416        let cur = self.get_cur();
417        let cur_token = cur.token;
418        let token = if cur_token == Token::Gt {
419            let next_token = next.token();
420            if next_token == Token::Gt {
421                // >>
422                Token::RShift
423            } else if next_token == Token::Eq {
424                // >=
425                Token::GtEq
426            } else if next_token == Token::RShift {
427                // >>>
428                Token::ZeroFillRShift
429            } else if next_token == Token::GtEq {
430                // >>=
431                Token::RShiftEq
432            } else if next_token == Token::RShiftEq {
433                // >>>=
434                Token::ZeroFillRShiftEq
435            } else {
436                self.set_next(Some(next));
437                return;
438            }
439        } else if cur_token == Token::Lt {
440            let next_token = next.token();
441            if next_token == Token::Lt {
442                // <<
443                Token::LShift
444            } else if next_token == Token::Eq {
445                // <=
446                Token::LtEq
447            } else if next_token == Token::LtEq {
448                // <<=
449                Token::LShiftEq
450            } else {
451                self.set_next(Some(next));
452                return;
453            }
454        } else {
455            self.set_next(Some(next));
456            return;
457        };
458        let span = span.with_hi(next.span().hi);
459        let token = TokenAndSpan::new(token, span, cur.had_line_break);
460        self.set_cur(token);
461    }
462
463    #[inline(always)]
464    pub fn is(&self, expected: Token) -> bool {
465        self.cur() == expected
466    }
467
468    #[inline(always)]
469    pub fn eat(&mut self, expected: Token) -> bool {
470        let v = self.is(expected);
471        if v {
472            self.bump();
473        }
474        v
475    }
476
477    /// Returns start of current token.
478    #[inline]
479    pub fn cur_pos(&self) -> BytePos {
480        self.get_cur().span.lo
481    }
482
483    #[inline]
484    pub fn cur_span(&self) -> Span {
485        self.get_cur().span
486    }
487
488    /// Returns last byte position of previous token.
489    #[inline]
490    pub fn last_pos(&self) -> BytePos {
491        self.prev_span().hi
492    }
493
494    #[inline]
495    pub fn get_ctx(&self) -> Context {
496        self.iter().ctx()
497    }
498
499    #[inline]
500    pub fn update_ctx(&mut self, f: impl FnOnce(&mut Context)) {
501        let ctx = self.iter_mut().ctx_mut();
502        f(ctx)
503    }
504
505    #[inline]
506    pub fn set_ctx(&mut self, ctx: Context) {
507        self.iter_mut().set_ctx(ctx);
508    }
509
510    #[inline]
511    pub fn syntax(&self) -> SyntaxFlags {
512        self.iter().syntax()
513    }
514
515    #[inline]
516    pub fn target(&self) -> EsVersion {
517        self.iter().target()
518    }
519
520    #[inline]
521    pub fn set_expr_allowed(&mut self, allow: bool) {
522        self.iter_mut().set_expr_allowed(allow)
523    }
524
525    #[inline]
526    pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
527        self.iter_mut().set_next_regexp(start);
528    }
529
530    #[inline]
531    pub fn end_pos(&self) -> BytePos {
532        self.iter().end_pos()
533    }
534
535    #[inline]
536    pub fn token_flags(&self) -> crate::lexer::TokenFlags {
537        self.iter().token_flags()
538    }
539}