1use std::{char, iter::FusedIterator, rc::Rc};
4
5use swc_atoms::AtomStoreCell;
6use swc_common::{
7 comments::Comments,
8 input::{Input, StringInput},
9 BytePos,
10};
11use swc_ecma_ast::EsVersion;
12use swc_ecma_lexer::{
13 common::{
14 lexer::{char::CharExt, fixed_len_span, pos_span, LexResult, Lexer as LexerTrait},
15 syntax::SyntaxFlags,
16 },
17 lexer::TokenFlags,
18};
19
20use self::table::{ByteHandler, BYTE_HANDLERS};
21use crate::{
22 error::{Error, SyntaxError},
23 input::Tokens,
24 lexer::comments_buffer::CommentsBuffer,
25 Context, Syntax,
26};
27
28#[cfg(feature = "unstable")]
29pub(crate) mod capturing;
30mod comments_buffer;
31mod state;
32mod table;
33pub(crate) mod token;
34
35pub(crate) use token::{NextTokenAndSpan, Token, TokenAndSpan, TokenValue};
36
37#[derive(Clone)]
38pub struct Lexer<'a> {
39 comments: Option<&'a dyn Comments>,
40 comments_buffer: Option<CommentsBuffer>,
42
43 pub ctx: Context,
44 input: StringInput<'a>,
45 start_pos: BytePos,
46
47 state: self::state::State,
48 token_flags: TokenFlags,
49 pub(crate) syntax: SyntaxFlags,
50 pub(crate) target: EsVersion,
51
52 errors: Vec<Error>,
53 module_errors: Vec<Error>,
54
55 atoms: Rc<AtomStoreCell>,
56}
57
58impl FusedIterator for Lexer<'_> {}
59
60impl<'a> swc_ecma_lexer::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> {
61 type CommentsBuffer = CommentsBuffer;
62 type State = self::state::State;
63 type Token = self::Token;
64
65 #[inline(always)]
66 fn input(&self) -> &StringInput<'a> {
67 &self.input
68 }
69
70 #[inline(always)]
71 fn input_mut(&mut self) -> &mut StringInput<'a> {
72 &mut self.input
73 }
74
75 #[inline(always)]
76 fn push_error(&mut self, error: Error) {
77 self.errors.push(error);
78 }
79
80 #[inline(always)]
81 fn state(&self) -> &Self::State {
82 &self.state
83 }
84
85 #[inline(always)]
86 fn state_mut(&mut self) -> &mut Self::State {
87 &mut self.state
88 }
89
90 #[inline(always)]
91 fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> {
92 self.comments
93 }
94
95 #[inline(always)]
96 fn comments_buffer(&self) -> Option<&Self::CommentsBuffer> {
97 self.comments_buffer.as_ref()
98 }
99
100 #[inline(always)]
101 fn comments_buffer_mut(&mut self) -> Option<&mut Self::CommentsBuffer> {
102 self.comments_buffer.as_mut()
103 }
104
105 #[inline(always)]
106 unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
107 self.input.slice(start, end)
108 }
109
110 #[inline(always)]
111 fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str {
112 self.input_mut().uncons_while(f)
113 }
114
115 #[inline(always)]
116 fn atom<'b>(&self, s: impl Into<std::borrow::Cow<'b, str>>) -> swc_atoms::Atom {
117 self.atoms.atom(s)
118 }
119}
120
121impl<'a> Lexer<'a> {
122 pub fn new(
123 syntax: Syntax,
124 target: EsVersion,
125 input: StringInput<'a>,
126 comments: Option<&'a dyn Comments>,
127 ) -> Self {
128 let start_pos = input.last_pos();
129
130 Lexer {
131 comments,
132 comments_buffer: comments.is_some().then(CommentsBuffer::new),
133 ctx: Default::default(),
134 input,
135 start_pos,
136 state: self::state::State::new(start_pos),
137 syntax: syntax.into_flags(),
138 target,
139 errors: Default::default(),
140 module_errors: Default::default(),
141 atoms: Default::default(),
142 token_flags: TokenFlags::empty(),
143 }
144 }
145
146 fn read_token(&mut self) -> LexResult<Token> {
148 self.token_flags = TokenFlags::empty();
149 let byte = match self.input.as_str().as_bytes().first() {
150 Some(&v) => v,
151 None => return Ok(Token::Eof),
152 };
153
154 let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
155 handler(self)
156 }
157
158 fn read_token_plus_minus<const C: u8>(&mut self) -> LexResult<Token> {
159 let start = self.cur_pos();
160
161 unsafe {
162 self.input.bump();
164 }
165
166 Ok(if self.input.cur() == Some(C as char) {
168 unsafe {
169 self.input.bump();
171 }
172
173 if self.state.had_line_break && C == b'-' && self.eat(b'>') {
175 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
176 self.skip_line_comment(0);
177 self.skip_space::<true>();
178 return self.read_token();
179 }
180
181 if C == b'+' {
182 Token::PlusPlus
183 } else {
184 Token::MinusMinus
185 }
186 } else if self.input.eat_byte(b'=') {
187 if C == b'+' {
188 Token::PlusEq
189 } else {
190 Token::MinusEq
191 }
192 } else if C == b'+' {
193 Token::Plus
194 } else {
195 Token::Minus
196 })
197 }
198
199 fn read_token_bang_or_eq<const C: u8>(&mut self) -> LexResult<Token> {
200 let start = self.cur_pos();
201 let had_line_break_before_last = self.had_line_break_before_last();
202
203 unsafe {
204 self.input.bump();
206 }
207
208 Ok(if self.input.eat_byte(b'=') {
209 if self.input.eat_byte(b'=') {
212 if C == b'!' {
213 Token::NotEqEq
214 } else {
215 if had_line_break_before_last && self.is_str("====") {
218 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
219 self.skip_line_comment(4);
220 self.skip_space::<true>();
221 return self.read_token();
222 }
223
224 Token::EqEqEq
225 }
226 } else if C == b'!' {
227 Token::NotEq
228 } else {
229 Token::EqEq
230 }
231 } else if C == b'=' && self.input.eat_byte(b'>') {
232 Token::Arrow
235 } else if C == b'!' {
236 Token::Bang
237 } else {
238 Token::Eq
239 })
240 }
241}
242
243impl Lexer<'_> {
244 #[inline(never)]
245 fn read_token_lt_gt<const C: u8>(&mut self) -> LexResult<Token> {
246 let had_line_break_before_last = self.had_line_break_before_last();
247 let start = self.cur_pos();
248 self.bump();
249
250 if self.syntax.typescript()
251 && self.ctx.contains(Context::InType)
252 && !self.ctx.contains(Context::ShouldNotLexLtOrGtAsType)
253 {
254 if C == b'<' {
255 return Ok(Token::Lt);
256 } else if C == b'>' {
257 return Ok(Token::Gt);
258 }
259 }
260
261 if C == b'<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-')
263 {
264 self.skip_line_comment(3);
265 self.skip_space::<true>();
266 self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
267
268 return self.read_token();
269 }
270
271 let mut op = if C == b'<' { Token::Lt } else { Token::Gt };
272
273 if self.cur() == Some(C as char) {
275 self.bump();
276 op = if C == b'<' {
277 Token::LShift
278 } else {
279 Token::RShift
280 };
281
282 if C == b'>' && self.cur() == Some(C as char) {
284 self.bump();
285 op = Token::ZeroFillRShift;
286 }
287 }
288
289 let token = if self.eat(b'=') {
290 match op {
291 Token::Lt => Token::LtEq,
292 Token::Gt => Token::GtEq,
293 Token::LShift => Token::LShiftEq,
294 Token::RShift => Token::RShiftEq,
295 Token::ZeroFillRShift => Token::ZeroFillRShiftEq,
296 _ => unreachable!(),
297 }
298 } else {
299 op
300 };
301
302 if had_line_break_before_last
309 && match op {
310 Token::LShift if self.is_str("<<<<< ") => true,
311 Token::ZeroFillRShift if self.is_str(">>>> ") => true,
312 _ => false,
313 }
314 {
315 self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
316 self.skip_line_comment(5);
317 self.skip_space::<true>();
318 return self.read_token();
319 }
320
321 Ok(token)
322 }
323
324 fn read_token_back_quote(&mut self) -> LexResult<Token> {
325 let start = self.cur_pos();
326 self.scan_template_token(start, true)
327 }
328
329 fn scan_template_token(
330 &mut self,
331 start: BytePos,
332 started_with_backtick: bool,
333 ) -> LexResult<Token> {
334 debug_assert!(self.cur() == Some(if started_with_backtick { '`' } else { '}' }));
335 let mut cooked = Ok(String::with_capacity(8));
336 self.bump(); let mut cooked_slice_start = self.cur_pos();
338 let raw_slice_start = cooked_slice_start;
339 let raw_atom = |this: &mut Self| {
340 let last_pos = this.cur_pos();
341 let s = unsafe { this.input.slice(raw_slice_start, last_pos) };
342 this.atoms.atom(s)
343 };
344 macro_rules! consume_cooked {
345 () => {{
346 if let Ok(cooked) = &mut cooked {
347 let last_pos = self.cur_pos();
348 cooked.push_str(unsafe {
349 self.input.slice(cooked_slice_start, last_pos)
352 });
353 }
354 }};
355 }
356
357 while let Some(c) = self.cur() {
358 if c == '`' {
359 consume_cooked!();
360 let cooked = cooked.map(|cooked| self.atoms.atom(cooked));
361 let raw = raw_atom(self);
362 self.bump();
363 return Ok(if started_with_backtick {
364 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
365 Token::NoSubstitutionTemplateLiteral
366 } else {
367 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
368 Token::TemplateTail
369 });
370 } else if c == '$' && self.input.peek() == Some('{') {
371 consume_cooked!();
372 let cooked = cooked.map(|cooked| self.atoms.atom(cooked));
373 let raw = raw_atom(self);
374 self.input.bump_bytes(2);
375 return Ok(if started_with_backtick {
376 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
377 Token::TemplateHead
378 } else {
379 self.set_token_value(Some(TokenValue::Template { raw, cooked }));
380 Token::TemplateMiddle
381 });
382 } else if c == '\\' {
383 consume_cooked!();
384
385 match self.read_escaped_char(true) {
386 Ok(Some(chars)) => {
387 if let Ok(ref mut cooked) = cooked {
388 for c in chars {
389 cooked.extend(c);
390 }
391 }
392 }
393 Ok(None) => {}
394 Err(error) => {
395 cooked = Err(error);
396 }
397 }
398
399 cooked_slice_start = self.cur_pos();
400 } else if c.is_line_terminator() {
401 consume_cooked!();
402
403 let c = if c == '\r' && self.peek() == Some('\n') {
404 self.bump(); '\n'
406 } else {
407 match c {
408 '\n' => '\n',
409 '\r' => '\n',
410 '\u{2028}' => '\u{2028}',
411 '\u{2029}' => '\u{2029}',
412 _ => unreachable!(),
413 }
414 };
415
416 self.bump();
417
418 if let Ok(ref mut cooked) = cooked {
419 cooked.push(c);
420 }
421 cooked_slice_start = self.cur_pos();
422 } else {
423 self.bump();
424 }
425 }
426
427 self.error(start, SyntaxError::UnterminatedTpl)?
428 }
429}